{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999205655731194, "global_step": 12588, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 3.968253968253968e-06, "loss": 0.0675, "theoretical_loss": 3.515377441090986, "tokens_seen": 1650065408 }, { "epoch": 0.0, "learning_rate": 7.936507936507936e-06, "loss": 0.0685, "theoretical_loss": 3.515353465512317, "tokens_seen": 1650196480 }, { "epoch": 0.0, "learning_rate": 1.1904761904761905e-05, "loss": 0.0645, "theoretical_loss": 3.5153294923710736, "tokens_seen": 1650327552 }, { "epoch": 0.0, "learning_rate": 1.5873015873015872e-05, "loss": 0.0683, "theoretical_loss": 3.5153055216668134, "tokens_seen": 1650458624 }, { "epoch": 0.0, "learning_rate": 1.984126984126984e-05, "loss": 0.0628, "theoretical_loss": 3.515281553399096, "tokens_seen": 1650589696 }, { "epoch": 0.0, "learning_rate": 2.380952380952381e-05, "loss": 0.0682, "theoretical_loss": 3.51525758756748, "tokens_seen": 1650720768 }, { "epoch": 0.0, "learning_rate": 2.7777777777777776e-05, "loss": 0.0659, "theoretical_loss": 3.515233624171524, "tokens_seen": 1650851840 }, { "epoch": 0.0, "learning_rate": 3.1746031746031745e-05, "loss": 0.067, "theoretical_loss": 3.5152096632107876, "tokens_seen": 1650982912 }, { "epoch": 0.0, "learning_rate": 3.571428571428571e-05, "loss": 0.0672, "theoretical_loss": 3.5151857046848294, "tokens_seen": 1651113984 }, { "epoch": 0.0, "learning_rate": 3.968253968253968e-05, "loss": 0.0636, "theoretical_loss": 3.5151617485932096, "tokens_seen": 1651245056 }, { "epoch": 0.0, "learning_rate": 4.365079365079365e-05, "loss": 0.0696, "theoretical_loss": 3.515137794935487, "tokens_seen": 1651376128 }, { "epoch": 0.0, "learning_rate": 4.761904761904762e-05, "loss": 0.0656, "theoretical_loss": 3.5151138437112213, "tokens_seen": 1651507200 }, { "epoch": 0.0, "learning_rate": 5.1587301587301586e-05, "loss": 0.0639, "theoretical_loss": 3.5150898949199725, "tokens_seen": 1651638272 }, { "epoch": 0.0, "learning_rate": 5.555555555555555e-05, "loss": 0.066, "theoretical_loss": 3.5150659485613005, "tokens_seen": 1651769344 }, { "epoch": 0.0, "learning_rate": 5.9523809523809524e-05, "loss": 0.0696, "theoretical_loss": 3.515042004634764, "tokens_seen": 1651900416 }, { "epoch": 0.0, "learning_rate": 6.349206349206349e-05, "loss": 0.0663, "theoretical_loss": 3.5150180631399244, "tokens_seen": 1652031488 }, { "epoch": 0.0, "learning_rate": 6.746031746031745e-05, "loss": 0.0687, "theoretical_loss": 3.514994124076341, "tokens_seen": 1652162560 }, { "epoch": 0.0, "learning_rate": 7.142857142857142e-05, "loss": 0.0698, "theoretical_loss": 3.5149701874435753, "tokens_seen": 1652293632 }, { "epoch": 0.0, "learning_rate": 7.53968253968254e-05, "loss": 0.0695, "theoretical_loss": 3.514946253241186, "tokens_seen": 1652424704 }, { "epoch": 0.0, "learning_rate": 7.936507936507937e-05, "loss": 0.0693, "theoretical_loss": 3.514922321468734, "tokens_seen": 1652555776 }, { "epoch": 0.0, "learning_rate": 8.333333333333333e-05, "loss": 0.0656, "theoretical_loss": 3.5148983921257813, "tokens_seen": 1652686848 }, { "epoch": 0.0, "learning_rate": 8.73015873015873e-05, "loss": 0.0703, "theoretical_loss": 3.5148744652118866, "tokens_seen": 1652817920 }, { "epoch": 0.0, "learning_rate": 9.126984126984126e-05, "loss": 0.0642, "theoretical_loss": 3.514850540726612, "tokens_seen": 1652948992 }, { "epoch": 0.0, "learning_rate": 9.523809523809524e-05, "loss": 0.066, "theoretical_loss": 3.5148266186695185, "tokens_seen": 1653080064 }, { "epoch": 0.0, "objective/train/advantage_avg": 0.001204964006319642, "objective/train/docs_used": 603740, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3499644994735718, "objective/train/original_loss": 1.3499644994735718, "objective/train/theoretical_loss": 3.5148026990401666, "objective/train/tokens_used": 23735776, "objective/train/value_avg": -0.005832672119140625, "objective/train/value_loss": 0.00015637517208233476, "objective/train/value_max": -5.918741226196289e-05, "objective/train/value_min": -0.251953125, "objective/train/value_reward_corr": 0.6294844612814327, "objective/train/value_std": 0.00983428955078125, "objective/train/weight_avg": 1.001273512840271, "objective/train/weighted_lm_loss": 1.3519909381866455, "objective/train/weights_max": 1.2235772609710693, "objective/train/weights_min": 0.3864767551422119, "theoretical_loss": 3.5148026990401666, "tokens_seen": 1653211136 }, { "epoch": 0.0, "learning_rate": 9.92063492063492e-05, "loss": 0.0697, "theoretical_loss": 3.5148026990401666, "tokens_seen": 1653211136 }, { "epoch": 0.0, "learning_rate": 0.00010317460317460317, "loss": 0.0689, "theoretical_loss": 3.5147787818381175, "tokens_seen": 1653342208 }, { "epoch": 0.0, "learning_rate": 0.00010714285714285714, "loss": 0.0701, "theoretical_loss": 3.5147548670629325, "tokens_seen": 1653473280 }, { "epoch": 0.0, "learning_rate": 0.0001111111111111111, "loss": 0.0717, "theoretical_loss": 3.514730954714173, "tokens_seen": 1653604352 }, { "epoch": 0.0, "learning_rate": 0.00011507936507936508, "loss": 0.0689, "theoretical_loss": 3.5147070447914013, "tokens_seen": 1653735424 }, { "epoch": 0.0, "learning_rate": 0.00011904761904761905, "loss": 0.0658, "theoretical_loss": 3.5146831372941776, "tokens_seen": 1653866496 }, { "epoch": 0.0, "learning_rate": 0.000123015873015873, "loss": 0.0697, "theoretical_loss": 3.514659232222065, "tokens_seen": 1653997568 }, { "epoch": 0.0, "learning_rate": 0.00012698412698412698, "loss": 0.0688, "theoretical_loss": 3.5146353295746247, "tokens_seen": 1654128640 }, { "epoch": 0.0, "learning_rate": 0.00013095238095238096, "loss": 0.0695, "theoretical_loss": 3.514611429351419, "tokens_seen": 1654259712 }, { "epoch": 0.0, "learning_rate": 0.0001349206349206349, "loss": 0.0676, "theoretical_loss": 3.514587531552009, "tokens_seen": 1654390784 }, { "epoch": 0.0, "learning_rate": 0.0001388888888888889, "loss": 0.0655, "theoretical_loss": 3.5145636361759576, "tokens_seen": 1654521856 }, { "epoch": 0.0, "learning_rate": 0.00014285714285714284, "loss": 0.0703, "theoretical_loss": 3.5145397432228274, "tokens_seen": 1654652928 }, { "epoch": 0.0, "learning_rate": 0.00014682539682539685, "loss": 0.066, "theoretical_loss": 3.5145158526921807, "tokens_seen": 1654784000 }, { "epoch": 0.0, "learning_rate": 0.0001507936507936508, "loss": 0.0654, "theoretical_loss": 3.5144919645835797, "tokens_seen": 1654915072 }, { "epoch": 0.0, "learning_rate": 0.00015476190476190478, "loss": 0.065, "theoretical_loss": 3.5144680788965874, "tokens_seen": 1655046144 }, { "epoch": 0.0, "learning_rate": 0.00015873015873015873, "loss": 0.0658, "theoretical_loss": 3.514444195630766, "tokens_seen": 1655177216 }, { "epoch": 0.0, "learning_rate": 0.0001626984126984127, "loss": 0.0669, "theoretical_loss": 3.514420314785679, "tokens_seen": 1655308288 }, { "epoch": 0.0, "learning_rate": 0.00016666666666666666, "loss": 0.0684, "theoretical_loss": 3.5143964363608893, "tokens_seen": 1655439360 }, { "epoch": 0.0, "learning_rate": 0.00017063492063492064, "loss": 0.0664, "theoretical_loss": 3.5143725603559597, "tokens_seen": 1655570432 }, { "epoch": 0.0, "learning_rate": 0.0001746031746031746, "loss": 0.0681, "theoretical_loss": 3.514348686770454, "tokens_seen": 1655701504 }, { "epoch": 0.0, "learning_rate": 0.00017857142857142857, "loss": 0.0677, "theoretical_loss": 3.5143248156039344, "tokens_seen": 1655832576 }, { "epoch": 0.0, "learning_rate": 0.00018253968253968252, "loss": 0.0671, "theoretical_loss": 3.5143009468559656, "tokens_seen": 1655963648 }, { "epoch": 0.0, "learning_rate": 0.0001865079365079365, "loss": 0.0624, "theoretical_loss": 3.51427708052611, "tokens_seen": 1656094720 }, { "epoch": 0.0, "learning_rate": 0.00019047619047619048, "loss": 0.0645, "theoretical_loss": 3.514253216613932, "tokens_seen": 1656225792 }, { "epoch": 0.0, "learning_rate": 0.00019444444444444446, "loss": 0.0701, "theoretical_loss": 3.514229355118996, "tokens_seen": 1656356864 }, { "epoch": 0.0, "objective/train/advantage_avg": 0.0008027777075767517, "objective/train/docs_used": 604805, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3501604795455933, "objective/train/original_loss": 1.3501605987548828, "objective/train/theoretical_loss": 3.514205496040865, "objective/train/tokens_used": 27012576, "objective/train/value_avg": -0.00780487060546875, "objective/train/value_loss": 0.00035824047517962754, "objective/train/value_max": -9.608268737792969e-05, "objective/train/value_min": -0.255859375, "objective/train/value_reward_corr": 0.5337083664878491, "objective/train/value_std": 0.01177215576171875, "objective/train/weight_avg": 1.0009499788284302, "objective/train/weighted_lm_loss": 1.3511680364608765, "objective/train/weights_max": 1.160445213317871, "objective/train/weights_min": 0.3684496581554413, "theoretical_loss": 3.514205496040865, "tokens_seen": 1656487936 }, { "epoch": 0.0, "learning_rate": 0.0001984126984126984, "loss": 0.0696, "theoretical_loss": 3.514205496040865, "tokens_seen": 1656487936 }, { "epoch": 0.0, "learning_rate": 0.0002023809523809524, "loss": 0.0637, "theoretical_loss": 3.5141816393791023, "tokens_seen": 1656619008 }, { "epoch": 0.0, "learning_rate": 0.00020634920634920634, "loss": 0.0685, "theoretical_loss": 3.5141577851332735, "tokens_seen": 1656750080 }, { "epoch": 0.0, "learning_rate": 0.00021031746031746032, "loss": 0.0643, "theoretical_loss": 3.514133933302942, "tokens_seen": 1656881152 }, { "epoch": 0.0, "learning_rate": 0.00021428571428571427, "loss": 0.0639, "theoretical_loss": 3.5141100838876724, "tokens_seen": 1657012224 }, { "epoch": 0.0, "learning_rate": 0.00021825396825396825, "loss": 0.0687, "theoretical_loss": 3.51408623688703, "tokens_seen": 1657143296 }, { "epoch": 0.0, "learning_rate": 0.0002222222222222222, "loss": 0.0685, "theoretical_loss": 3.5140623923005774, "tokens_seen": 1657274368 }, { "epoch": 0.0, "learning_rate": 0.00022619047619047618, "loss": 0.0711, "theoretical_loss": 3.514038550127881, "tokens_seen": 1657405440 }, { "epoch": 0.0, "learning_rate": 0.00023015873015873016, "loss": 0.0674, "theoretical_loss": 3.514014710368505, "tokens_seen": 1657536512 }, { "epoch": 0.0, "learning_rate": 0.00023412698412698414, "loss": 0.0664, "theoretical_loss": 3.5139908730220135, "tokens_seen": 1657667584 }, { "epoch": 0.0, "learning_rate": 0.0002380952380952381, "loss": 0.067, "theoretical_loss": 3.513967038087973, "tokens_seen": 1657798656 }, { "epoch": 0.0, "learning_rate": 0.00024206349206349207, "loss": 0.067, "theoretical_loss": 3.513943205565948, "tokens_seen": 1657929728 }, { "epoch": 0.0, "learning_rate": 0.000246031746031746, "loss": 0.0714, "theoretical_loss": 3.5139193754555036, "tokens_seen": 1658060800 }, { "epoch": 0.01, "learning_rate": 0.00025, "loss": 0.0678, "theoretical_loss": 3.5138955477562055, "tokens_seen": 1658191872 }, { "epoch": 0.01, "learning_rate": 0.00025396825396825396, "loss": 0.0702, "theoretical_loss": 3.513871722467619, "tokens_seen": 1658322944 }, { "epoch": 0.01, "learning_rate": 0.00025793650793650796, "loss": 0.0639, "theoretical_loss": 3.51384789958931, "tokens_seen": 1658454016 }, { "epoch": 0.01, "learning_rate": 0.0002619047619047619, "loss": 0.0693, "theoretical_loss": 3.513824079120843, "tokens_seen": 1658585088 }, { "epoch": 0.01, "learning_rate": 0.00026587301587301587, "loss": 0.064, "theoretical_loss": 3.5138002610617853, "tokens_seen": 1658716160 }, { "epoch": 0.01, "learning_rate": 0.0002698412698412698, "loss": 0.0645, "theoretical_loss": 3.513776445411702, "tokens_seen": 1658847232 }, { "epoch": 0.01, "learning_rate": 0.0002738095238095238, "loss": 0.0655, "theoretical_loss": 3.513752632170159, "tokens_seen": 1658978304 }, { "epoch": 0.01, "learning_rate": 0.0002777777777777778, "loss": 0.0691, "theoretical_loss": 3.5137288213367235, "tokens_seen": 1659109376 }, { "epoch": 0.01, "learning_rate": 0.00028174603174603173, "loss": 0.0691, "theoretical_loss": 3.513705012910961, "tokens_seen": 1659240448 }, { "epoch": 0.01, "learning_rate": 0.0002857142857142857, "loss": 0.0651, "theoretical_loss": 3.5136812068924375, "tokens_seen": 1659371520 }, { "epoch": 0.01, "learning_rate": 0.0002896825396825397, "loss": 0.0685, "theoretical_loss": 3.5136574032807206, "tokens_seen": 1659502592 }, { "epoch": 0.01, "learning_rate": 0.0002936507936507937, "loss": 0.0688, "theoretical_loss": 3.513633602075376, "tokens_seen": 1659633664 }, { "epoch": 0.01, "objective/train/advantage_avg": 0.00154752261005342, "objective/train/docs_used": 606029, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4579224586486816, "objective/train/original_loss": 1.457922339439392, "objective/train/theoretical_loss": 3.5136098032759704, "objective/train/tokens_used": 30289376, "objective/train/value_avg": -0.005924224853515625, "objective/train/value_loss": 0.0002064805303234607, "objective/train/value_max": -3.975629806518555e-05, "objective/train/value_min": -0.99560546875, "objective/train/value_reward_corr": 0.6807965590203882, "objective/train/value_std": 0.0114898681640625, "objective/train/weight_avg": 1.0016330480575562, "objective/train/weighted_lm_loss": 1.4596384763717651, "objective/train/weights_max": 1.1746975183486938, "objective/train/weights_min": 0.25541436672210693, "theoretical_loss": 3.5136098032759704, "tokens_seen": 1659764736 }, { "epoch": 0.01, "learning_rate": 0.00029761904761904765, "loss": 0.0681, "theoretical_loss": 3.5136098032759704, "tokens_seen": 1659764736 }, { "epoch": 0.01, "learning_rate": 0.0003015873015873016, "loss": 0.0662, "theoretical_loss": 3.513586006882071, "tokens_seen": 1659895808 }, { "epoch": 0.01, "learning_rate": 0.0003055555555555556, "loss": 0.0689, "theoretical_loss": 3.5135622128932447, "tokens_seen": 1660026880 }, { "epoch": 0.01, "learning_rate": 0.00030952380952380956, "loss": 0.064, "theoretical_loss": 3.513538421309059, "tokens_seen": 1660157952 }, { "epoch": 0.01, "learning_rate": 0.0003134920634920635, "loss": 0.0644, "theoretical_loss": 3.5135146321290796, "tokens_seen": 1660289024 }, { "epoch": 0.01, "learning_rate": 0.00031746031746031746, "loss": 0.0696, "theoretical_loss": 3.5134908453528757, "tokens_seen": 1660420096 }, { "epoch": 0.01, "learning_rate": 0.00032142857142857147, "loss": 0.0687, "theoretical_loss": 3.513467060980013, "tokens_seen": 1660551168 }, { "epoch": 0.01, "learning_rate": 0.0003253968253968254, "loss": 0.0662, "theoretical_loss": 3.51344327901006, "tokens_seen": 1660682240 }, { "epoch": 0.01, "learning_rate": 0.00032936507936507937, "loss": 0.0703, "theoretical_loss": 3.513419499442584, "tokens_seen": 1660813312 }, { "epoch": 0.01, "learning_rate": 0.0003333333333333333, "loss": 0.0732, "theoretical_loss": 3.513395722277153, "tokens_seen": 1660944384 }, { "epoch": 0.01, "learning_rate": 0.00033730158730158733, "loss": 0.0646, "theoretical_loss": 3.513371947513334, "tokens_seen": 1661075456 }, { "epoch": 0.01, "learning_rate": 0.0003412698412698413, "loss": 0.0692, "theoretical_loss": 3.513348175150696, "tokens_seen": 1661206528 }, { "epoch": 0.01, "learning_rate": 0.00034523809523809523, "loss": 0.0675, "theoretical_loss": 3.513324405188806, "tokens_seen": 1661337600 }, { "epoch": 0.01, "learning_rate": 0.0003492063492063492, "loss": 0.0722, "theoretical_loss": 3.5133006376272338, "tokens_seen": 1661468672 }, { "epoch": 0.01, "learning_rate": 0.0003531746031746032, "loss": 0.0659, "theoretical_loss": 3.513276872465546, "tokens_seen": 1661599744 }, { "epoch": 0.01, "learning_rate": 0.00035714285714285714, "loss": 0.071, "theoretical_loss": 3.5132531097033115, "tokens_seen": 1661730816 }, { "epoch": 0.01, "learning_rate": 0.0003611111111111111, "loss": 0.0698, "theoretical_loss": 3.513229349340099, "tokens_seen": 1661861888 }, { "epoch": 0.01, "learning_rate": 0.00036507936507936505, "loss": 0.067, "theoretical_loss": 3.5132055913754776, "tokens_seen": 1661992960 }, { "epoch": 0.01, "learning_rate": 0.00036904761904761905, "loss": 0.0693, "theoretical_loss": 3.5131818358090148, "tokens_seen": 1662124032 }, { "epoch": 0.01, "learning_rate": 0.000373015873015873, "loss": 0.0675, "theoretical_loss": 3.5131580826402806, "tokens_seen": 1662255104 }, { "epoch": 0.01, "learning_rate": 0.000376984126984127, "loss": 0.0674, "theoretical_loss": 3.5131343318688435, "tokens_seen": 1662386176 }, { "epoch": 0.01, "learning_rate": 0.00038095238095238096, "loss": 0.0696, "theoretical_loss": 3.5131105834942726, "tokens_seen": 1662517248 }, { "epoch": 0.01, "learning_rate": 0.00038492063492063497, "loss": 0.0694, "theoretical_loss": 3.513086837516137, "tokens_seen": 1662648320 }, { "epoch": 0.01, "learning_rate": 0.0003888888888888889, "loss": 0.067, "theoretical_loss": 3.5130630939340053, "tokens_seen": 1662779392 }, { "epoch": 0.01, "learning_rate": 0.0003928571428571429, "loss": 0.0697, "theoretical_loss": 3.513039352747448, "tokens_seen": 1662910464 }, { "epoch": 0.01, "objective/train/advantage_avg": 0.0005926720914430916, "objective/train/docs_used": 607267, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3112833499908447, "objective/train/original_loss": 1.3112833499908447, "objective/train/theoretical_loss": 3.5130156139560347, "objective/train/tokens_used": 33566176, "objective/train/value_avg": -0.00823211669921875, "objective/train/value_loss": 0.00015823828289285302, "objective/train/value_max": -8.028745651245117e-05, "objective/train/value_min": -0.26904296875, "objective/train/value_reward_corr": 0.7162060709232081, "objective/train/value_std": 0.01338958740234375, "objective/train/weight_avg": 1.0006674528121948, "objective/train/weighted_lm_loss": 1.312343955039978, "objective/train/weights_max": 1.2374004125595093, "objective/train/weights_min": 0.39345037937164307, "theoretical_loss": 3.5130156139560347, "tokens_seen": 1663041536 }, { "epoch": 0.01, "learning_rate": 0.0003968253968253968, "loss": 0.0644, "theoretical_loss": 3.5130156139560347, "tokens_seen": 1663041536 }, { "epoch": 0.01, "learning_rate": 0.00040079365079365083, "loss": 0.07, "theoretical_loss": 3.5129918775593345, "tokens_seen": 1663172608 }, { "epoch": 0.01, "learning_rate": 0.0004047619047619048, "loss": 0.0686, "theoretical_loss": 3.512968143556917, "tokens_seen": 1663303680 }, { "epoch": 0.01, "learning_rate": 0.00040873015873015874, "loss": 0.0669, "theoretical_loss": 3.512944411948352, "tokens_seen": 1663434752 }, { "epoch": 0.01, "learning_rate": 0.0004126984126984127, "loss": 0.0675, "theoretical_loss": 3.51292068273321, "tokens_seen": 1663565824 }, { "epoch": 0.01, "learning_rate": 0.0004166666666666667, "loss": 0.0658, "theoretical_loss": 3.5128969559110605, "tokens_seen": 1663696896 }, { "epoch": 0.01, "learning_rate": 0.00042063492063492065, "loss": 0.0686, "theoretical_loss": 3.512873231481474, "tokens_seen": 1663827968 }, { "epoch": 0.01, "learning_rate": 0.0004246031746031746, "loss": 0.0658, "theoretical_loss": 3.512849509444021, "tokens_seen": 1663959040 }, { "epoch": 0.01, "learning_rate": 0.00042857142857142855, "loss": 0.0659, "theoretical_loss": 3.512825789798271, "tokens_seen": 1664090112 }, { "epoch": 0.01, "learning_rate": 0.00043253968253968256, "loss": 0.068, "theoretical_loss": 3.5128020725437956, "tokens_seen": 1664221184 }, { "epoch": 0.01, "learning_rate": 0.0004365079365079365, "loss": 0.0709, "theoretical_loss": 3.5127783576801646, "tokens_seen": 1664352256 }, { "epoch": 0.01, "learning_rate": 0.00044047619047619046, "loss": 0.0686, "theoretical_loss": 3.5127546452069494, "tokens_seen": 1664483328 }, { "epoch": 0.01, "learning_rate": 0.0004444444444444444, "loss": 0.0726, "theoretical_loss": 3.51273093512372, "tokens_seen": 1664614400 }, { "epoch": 0.01, "learning_rate": 0.0004484126984126984, "loss": 0.0695, "theoretical_loss": 3.512707227430049, "tokens_seen": 1664745472 }, { "epoch": 0.01, "learning_rate": 0.00045238095238095237, "loss": 0.0705, "theoretical_loss": 3.512683522125505, "tokens_seen": 1664876544 }, { "epoch": 0.01, "learning_rate": 0.0004563492063492063, "loss": 0.0635, "theoretical_loss": 3.512659819209661, "tokens_seen": 1665007616 }, { "epoch": 0.01, "learning_rate": 0.00046031746031746033, "loss": 0.0683, "theoretical_loss": 3.512636118682088, "tokens_seen": 1665138688 }, { "epoch": 0.01, "learning_rate": 0.00046428571428571433, "loss": 0.0724, "theoretical_loss": 3.5126124205423572, "tokens_seen": 1665269760 }, { "epoch": 0.01, "learning_rate": 0.0004682539682539683, "loss": 0.0673, "theoretical_loss": 3.5125887247900396, "tokens_seen": 1665400832 }, { "epoch": 0.01, "learning_rate": 0.00047222222222222224, "loss": 0.0711, "theoretical_loss": 3.5125650314247077, "tokens_seen": 1665531904 }, { "epoch": 0.01, "learning_rate": 0.0004761904761904762, "loss": 0.0666, "theoretical_loss": 3.512541340445933, "tokens_seen": 1665662976 }, { "epoch": 0.01, "learning_rate": 0.0004801587301587302, "loss": 0.071, "theoretical_loss": 3.512517651853287, "tokens_seen": 1665794048 }, { "epoch": 0.01, "learning_rate": 0.00048412698412698415, "loss": 0.071, "theoretical_loss": 3.5124939656463416, "tokens_seen": 1665925120 }, { "epoch": 0.01, "learning_rate": 0.0004880952380952381, "loss": 0.0697, "theoretical_loss": 3.5124702818246694, "tokens_seen": 1666056192 }, { "epoch": 0.01, "learning_rate": 0.000492063492063492, "loss": 0.0696, "theoretical_loss": 3.5124466003878423, "tokens_seen": 1666187264 }, { "epoch": 0.01, "objective/train/advantage_avg": 0.0005098440451547503, "objective/train/docs_used": 608508, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2983801364898682, "objective/train/original_loss": 1.2983801364898682, "objective/train/theoretical_loss": 3.512422921335432, "objective/train/tokens_used": 36842976, "objective/train/value_avg": -0.007289886474609375, "objective/train/value_loss": 0.00015202356735244393, "objective/train/value_max": -6.556510925292969e-05, "objective/train/value_min": -0.6982421875, "objective/train/value_reward_corr": 0.7275330051087645, "objective/train/value_std": 0.0121002197265625, "objective/train/weight_avg": 1.0005797147750854, "objective/train/weighted_lm_loss": 1.299207091331482, "objective/train/weights_max": 1.119400143623352, "objective/train/weights_min": 0.4100736975669861, "theoretical_loss": 3.512422921335432, "tokens_seen": 1666318336 }, { "epoch": 0.01, "learning_rate": 0.000496031746031746, "loss": 0.0665, "theoretical_loss": 3.512422921335432, "tokens_seen": 1666318336 }, { "epoch": 0.01, "learning_rate": 0.0005, "loss": 0.0709, "theoretical_loss": 3.512399244667012, "tokens_seen": 1666449408 }, { "epoch": 0.01, "learning_rate": 0.0004999598812484955, "loss": 0.0704, "theoretical_loss": 3.5123755703821535, "tokens_seen": 1666580480 }, { "epoch": 0.01, "learning_rate": 0.0004999197624969911, "loss": 0.0704, "theoretical_loss": 3.5123518984804303, "tokens_seen": 1666711552 }, { "epoch": 0.01, "learning_rate": 0.0004998796437454867, "loss": 0.0702, "theoretical_loss": 3.5123282289614153, "tokens_seen": 1666842624 }, { "epoch": 0.01, "learning_rate": 0.0004998395249939822, "loss": 0.0699, "theoretical_loss": 3.5123045618246795, "tokens_seen": 1666973696 }, { "epoch": 0.01, "learning_rate": 0.0004997994062424778, "loss": 0.07, "theoretical_loss": 3.5122808970697976, "tokens_seen": 1667104768 }, { "epoch": 0.01, "learning_rate": 0.0004997592874909733, "loss": 0.069, "theoretical_loss": 3.5122572346963423, "tokens_seen": 1667235840 }, { "epoch": 0.01, "learning_rate": 0.0004997191687394688, "loss": 0.0685, "theoretical_loss": 3.5122335747038864, "tokens_seen": 1667366912 }, { "epoch": 0.01, "learning_rate": 0.0004996790499879643, "loss": 0.0729, "theoretical_loss": 3.512209917092003, "tokens_seen": 1667497984 }, { "epoch": 0.01, "learning_rate": 0.00049963893123646, "loss": 0.0678, "theoretical_loss": 3.512186261860266, "tokens_seen": 1667629056 }, { "epoch": 0.01, "learning_rate": 0.0004995988124849555, "loss": 0.0752, "theoretical_loss": 3.5121626090082487, "tokens_seen": 1667760128 }, { "epoch": 0.01, "learning_rate": 0.000499558693733451, "loss": 0.0721, "theoretical_loss": 3.512138958535525, "tokens_seen": 1667891200 }, { "epoch": 0.01, "learning_rate": 0.0004995185749819466, "loss": 0.0722, "theoretical_loss": 3.5121153104416676, "tokens_seen": 1668022272 }, { "epoch": 0.01, "learning_rate": 0.0004994784562304421, "loss": 0.073, "theoretical_loss": 3.5120916647262517, "tokens_seen": 1668153344 }, { "epoch": 0.01, "learning_rate": 0.0004994383374789376, "loss": 0.0718, "theoretical_loss": 3.5120680213888504, "tokens_seen": 1668284416 }, { "epoch": 0.01, "learning_rate": 0.0004993982187274333, "loss": 0.0714, "theoretical_loss": 3.512044380429038, "tokens_seen": 1668415488 }, { "epoch": 0.01, "learning_rate": 0.0004993580999759288, "loss": 0.065, "theoretical_loss": 3.512020741846388, "tokens_seen": 1668546560 }, { "epoch": 0.01, "learning_rate": 0.0004993179812244243, "loss": 0.0662, "theoretical_loss": 3.511997105640476, "tokens_seen": 1668677632 }, { "epoch": 0.01, "learning_rate": 0.0004992778624729198, "loss": 0.0703, "theoretical_loss": 3.511973471810875, "tokens_seen": 1668808704 }, { "epoch": 0.01, "learning_rate": 0.0004992377437214154, "loss": 0.0705, "theoretical_loss": 3.5119498403571603, "tokens_seen": 1668939776 }, { "epoch": 0.01, "learning_rate": 0.0004991976249699109, "loss": 0.0736, "theoretical_loss": 3.5119262112789063, "tokens_seen": 1669070848 }, { "epoch": 0.01, "learning_rate": 0.0004991575062184065, "loss": 0.0659, "theoretical_loss": 3.511902584575687, "tokens_seen": 1669201920 }, { "epoch": 0.01, "learning_rate": 0.0004991173874669021, "loss": 0.0757, "theoretical_loss": 3.5118789602470786, "tokens_seen": 1669332992 }, { "epoch": 0.01, "learning_rate": 0.0004990772687153976, "loss": 0.0653, "theoretical_loss": 3.511855338292655, "tokens_seen": 1669464064 }, { "epoch": 0.01, "objective/train/advantage_avg": 0.0005414688494056463, "objective/train/docs_used": 609762, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2836406230926514, "objective/train/original_loss": 1.2836405038833618, "objective/train/theoretical_loss": 3.5118317187119916, "objective/train/tokens_used": 40119776, "objective/train/value_avg": -0.0061187744140625, "objective/train/value_loss": 0.0002275774022564292, "objective/train/value_max": -4.07099723815918e-05, "objective/train/value_min": -0.97314453125, "objective/train/value_reward_corr": 0.7139527544997281, "objective/train/value_std": 0.0141754150390625, "objective/train/weight_avg": 1.0006449222564697, "objective/train/weighted_lm_loss": 1.2846599817276, "objective/train/weights_max": 1.730138897895813, "objective/train/weights_min": 0.3922994136810303, "theoretical_loss": 3.5118317187119916, "tokens_seen": 1669595136 }, { "epoch": 0.01, "learning_rate": 0.0004990371499638931, "loss": 0.0691, "theoretical_loss": 3.5118317187119916, "tokens_seen": 1669595136 }, { "epoch": 0.01, "learning_rate": 0.0004989970312123887, "loss": 0.0671, "theoretical_loss": 3.5118081015046636, "tokens_seen": 1669726208 }, { "epoch": 0.01, "learning_rate": 0.0004989569124608842, "loss": 0.0746, "theoretical_loss": 3.511784486670246, "tokens_seen": 1669857280 }, { "epoch": 0.01, "learning_rate": 0.0004989167937093798, "loss": 0.0723, "theoretical_loss": 3.5117608742083135, "tokens_seen": 1669988352 }, { "epoch": 0.01, "learning_rate": 0.0004988766749578753, "loss": 0.0729, "theoretical_loss": 3.5117372641184432, "tokens_seen": 1670119424 }, { "epoch": 0.01, "learning_rate": 0.0004988365562063709, "loss": 0.0709, "theoretical_loss": 3.5117136564002096, "tokens_seen": 1670250496 }, { "epoch": 0.01, "learning_rate": 0.0004987964374548664, "loss": 0.0733, "theoretical_loss": 3.5116900510531885, "tokens_seen": 1670381568 }, { "epoch": 0.01, "learning_rate": 0.0004987563187033619, "loss": 0.0709, "theoretical_loss": 3.511666448076956, "tokens_seen": 1670512640 }, { "epoch": 0.01, "learning_rate": 0.0004987161999518575, "loss": 0.0687, "theoretical_loss": 3.5116428474710872, "tokens_seen": 1670643712 }, { "epoch": 0.01, "learning_rate": 0.0004986760812003531, "loss": 0.0703, "theoretical_loss": 3.5116192492351592, "tokens_seen": 1670774784 }, { "epoch": 0.01, "learning_rate": 0.0004986359624488486, "loss": 0.0705, "theoretical_loss": 3.5115956533687473, "tokens_seen": 1670905856 }, { "epoch": 0.01, "learning_rate": 0.0004985958436973442, "loss": 0.0652, "theoretical_loss": 3.5115720598714284, "tokens_seen": 1671036928 }, { "epoch": 0.01, "learning_rate": 0.0004985557249458397, "loss": 0.0706, "theoretical_loss": 3.5115484687427783, "tokens_seen": 1671168000 }, { "epoch": 0.01, "learning_rate": 0.0004985156061943352, "loss": 0.068, "theoretical_loss": 3.5115248799823733, "tokens_seen": 1671299072 }, { "epoch": 0.01, "learning_rate": 0.0004984754874428307, "loss": 0.0664, "theoretical_loss": 3.5115012935897907, "tokens_seen": 1671430144 }, { "epoch": 0.01, "learning_rate": 0.0004984353686913264, "loss": 0.0677, "theoretical_loss": 3.5114777095646064, "tokens_seen": 1671561216 }, { "epoch": 0.01, "learning_rate": 0.0004983952499398219, "loss": 0.0701, "theoretical_loss": 3.5114541279063975, "tokens_seen": 1671692288 }, { "epoch": 0.01, "learning_rate": 0.0004983551311883174, "loss": 0.0663, "theoretical_loss": 3.511430548614741, "tokens_seen": 1671823360 }, { "epoch": 0.01, "learning_rate": 0.000498315012436813, "loss": 0.0657, "theoretical_loss": 3.511406971689214, "tokens_seen": 1671954432 }, { "epoch": 0.01, "learning_rate": 0.0004982748936853085, "loss": 0.073, "theoretical_loss": 3.511383397129393, "tokens_seen": 1672085504 }, { "epoch": 0.01, "learning_rate": 0.000498234774933804, "loss": 0.0665, "theoretical_loss": 3.511359824934856, "tokens_seen": 1672216576 }, { "epoch": 0.01, "learning_rate": 0.0004981946561822997, "loss": 0.0736, "theoretical_loss": 3.5113362551051797, "tokens_seen": 1672347648 }, { "epoch": 0.01, "learning_rate": 0.0004981545374307952, "loss": 0.0728, "theoretical_loss": 3.5113126876399416, "tokens_seen": 1672478720 }, { "epoch": 0.01, "learning_rate": 0.0004981144186792907, "loss": 0.0709, "theoretical_loss": 3.5112891225387193, "tokens_seen": 1672609792 }, { "epoch": 0.01, "learning_rate": 0.0004980742999277862, "loss": 0.0715, "theoretical_loss": 3.511265559801091, "tokens_seen": 1672740864 }, { "epoch": 0.01, "objective/train/advantage_avg": 0.0009203064255416393, "objective/train/docs_used": 611052, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3329579830169678, "objective/train/original_loss": 1.3329577445983887, "objective/train/theoretical_loss": 3.511241999426634, "objective/train/tokens_used": 43396576, "objective/train/value_avg": -0.00855255126953125, "objective/train/value_loss": 0.00033462108694948256, "objective/train/value_max": -0.00012826919555664062, "objective/train/value_min": -0.27001953125, "objective/train/value_reward_corr": 0.6063411727676533, "objective/train/value_std": 0.0146331787109375, "objective/train/weight_avg": 1.0010658502578735, "objective/train/weighted_lm_loss": 1.3346182107925415, "objective/train/weights_max": 1.2563867568969727, "objective/train/weights_min": 0.36866194009780884, "theoretical_loss": 3.511241999426634, "tokens_seen": 1672871936 }, { "epoch": 0.01, "learning_rate": 0.0004980341811762818, "loss": 0.0696, "theoretical_loss": 3.511241999426634, "tokens_seen": 1672871936 }, { "epoch": 0.01, "learning_rate": 0.0004979940624247773, "loss": 0.0707, "theoretical_loss": 3.5112184414149255, "tokens_seen": 1673003008 }, { "epoch": 0.01, "learning_rate": 0.0004979539436732729, "loss": 0.0707, "theoretical_loss": 3.5111948857655446, "tokens_seen": 1673134080 }, { "epoch": 0.01, "learning_rate": 0.0004979138249217685, "loss": 0.0718, "theoretical_loss": 3.5111713324780687, "tokens_seen": 1673265152 }, { "epoch": 0.01, "learning_rate": 0.000497873706170264, "loss": 0.0726, "theoretical_loss": 3.511147781552076, "tokens_seen": 1673396224 }, { "epoch": 0.01, "learning_rate": 0.0004978335874187595, "loss": 0.0746, "theoretical_loss": 3.5111242329871457, "tokens_seen": 1673527296 }, { "epoch": 0.01, "learning_rate": 0.0004977934686672551, "loss": 0.069, "theoretical_loss": 3.5111006867828545, "tokens_seen": 1673658368 }, { "epoch": 0.01, "learning_rate": 0.0004977533499157506, "loss": 0.0723, "theoretical_loss": 3.5110771429387824, "tokens_seen": 1673789440 }, { "epoch": 0.01, "learning_rate": 0.0004977132311642462, "loss": 0.0733, "theoretical_loss": 3.511053601454507, "tokens_seen": 1673920512 }, { "epoch": 0.01, "learning_rate": 0.0004976731124127417, "loss": 0.0683, "theoretical_loss": 3.511030062329608, "tokens_seen": 1674051584 }, { "epoch": 0.01, "learning_rate": 0.0004976329936612373, "loss": 0.0668, "theoretical_loss": 3.5110065255636638, "tokens_seen": 1674182656 }, { "epoch": 0.01, "learning_rate": 0.0004975928749097328, "loss": 0.0744, "theoretical_loss": 3.5109829911562533, "tokens_seen": 1674313728 }, { "epoch": 0.01, "learning_rate": 0.0004975527561582283, "loss": 0.0674, "theoretical_loss": 3.5109594591069553, "tokens_seen": 1674444800 }, { "epoch": 0.01, "learning_rate": 0.000497512637406724, "loss": 0.0711, "theoretical_loss": 3.5109359294153495, "tokens_seen": 1674575872 }, { "epoch": 0.02, "learning_rate": 0.0004974725186552195, "loss": 0.0672, "theoretical_loss": 3.510912402081015, "tokens_seen": 1674706944 }, { "epoch": 0.02, "learning_rate": 0.000497432399903715, "loss": 0.0708, "theoretical_loss": 3.5108888771035307, "tokens_seen": 1674838016 }, { "epoch": 0.02, "learning_rate": 0.0004973922811522106, "loss": 0.071, "theoretical_loss": 3.510865354482476, "tokens_seen": 1674969088 }, { "epoch": 0.02, "learning_rate": 0.0004973521624007061, "loss": 0.0703, "theoretical_loss": 3.5108418342174317, "tokens_seen": 1675100160 }, { "epoch": 0.02, "learning_rate": 0.0004973120436492016, "loss": 0.0672, "theoretical_loss": 3.5108183163079767, "tokens_seen": 1675231232 }, { "epoch": 0.02, "learning_rate": 0.0004972719248976971, "loss": 0.0664, "theoretical_loss": 3.5107948007536907, "tokens_seen": 1675362304 }, { "epoch": 0.02, "learning_rate": 0.0004972318061461928, "loss": 0.0661, "theoretical_loss": 3.5107712875541535, "tokens_seen": 1675493376 }, { "epoch": 0.02, "learning_rate": 0.0004971916873946883, "loss": 0.0667, "theoretical_loss": 3.5107477767089454, "tokens_seen": 1675624448 }, { "epoch": 0.02, "learning_rate": 0.0004971515686431838, "loss": 0.0657, "theoretical_loss": 3.510724268217647, "tokens_seen": 1675755520 }, { "epoch": 0.02, "learning_rate": 0.0004971114498916794, "loss": 0.068, "theoretical_loss": 3.5107007620798374, "tokens_seen": 1675886592 }, { "epoch": 0.02, "learning_rate": 0.0004970713311401749, "loss": 0.0685, "theoretical_loss": 3.510677258295098, "tokens_seen": 1676017664 }, { "epoch": 0.02, "objective/train/advantage_avg": 0.0007873879512771964, "objective/train/docs_used": 612232, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3322644233703613, "objective/train/original_loss": 1.3322644233703613, "objective/train/theoretical_loss": 3.510653756863009, "objective/train/tokens_used": 46673376, "objective/train/value_avg": -0.01161956787109375, "objective/train/value_loss": 0.00033080577850341797, "objective/train/value_max": -8.749961853027344e-05, "objective/train/value_min": -0.39111328125, "objective/train/value_reward_corr": 0.6740619357808635, "objective/train/value_std": 0.0181427001953125, "objective/train/weight_avg": 1.0009422302246094, "objective/train/weighted_lm_loss": 1.3328336477279663, "objective/train/weights_max": 1.3691500425338745, "objective/train/weights_min": 0.37315136194229126, "theoretical_loss": 3.510653756863009, "tokens_seen": 1676148736 }, { "epoch": 0.02, "learning_rate": 0.0004970312123886704, "loss": 0.0679, "theoretical_loss": 3.510653756863009, "tokens_seen": 1676148736 }, { "epoch": 0.02, "learning_rate": 0.0004969910936371661, "loss": 0.0694, "theoretical_loss": 3.5106302577831503, "tokens_seen": 1676279808 }, { "epoch": 0.02, "learning_rate": 0.0004969509748856616, "loss": 0.0677, "theoretical_loss": 3.5106067610551035, "tokens_seen": 1676410880 }, { "epoch": 0.02, "learning_rate": 0.0004969108561341571, "loss": 0.0696, "theoretical_loss": 3.5105832666784487, "tokens_seen": 1676541952 }, { "epoch": 0.02, "learning_rate": 0.0004968707373826526, "loss": 0.0684, "theoretical_loss": 3.5105597746527675, "tokens_seen": 1676673024 }, { "epoch": 0.02, "learning_rate": 0.0004968306186311482, "loss": 0.0679, "theoretical_loss": 3.51053628497764, "tokens_seen": 1676804096 }, { "epoch": 0.02, "learning_rate": 0.0004967904998796437, "loss": 0.0664, "theoretical_loss": 3.5105127976526482, "tokens_seen": 1676935168 }, { "epoch": 0.02, "learning_rate": 0.0004967503811281393, "loss": 0.0703, "theoretical_loss": 3.5104893126773726, "tokens_seen": 1677066240 }, { "epoch": 0.02, "learning_rate": 0.0004967102623766349, "loss": 0.0679, "theoretical_loss": 3.5104658300513942, "tokens_seen": 1677197312 }, { "epoch": 0.02, "learning_rate": 0.0004966701436251304, "loss": 0.0699, "theoretical_loss": 3.5104423497742956, "tokens_seen": 1677328384 }, { "epoch": 0.02, "learning_rate": 0.0004966300248736259, "loss": 0.0689, "theoretical_loss": 3.5104188718456575, "tokens_seen": 1677459456 }, { "epoch": 0.02, "learning_rate": 0.0004965899061221215, "loss": 0.0666, "theoretical_loss": 3.510395396265062, "tokens_seen": 1677590528 }, { "epoch": 0.02, "learning_rate": 0.000496549787370617, "loss": 0.0701, "theoretical_loss": 3.5103719230320904, "tokens_seen": 1677721600 }, { "epoch": 0.02, "learning_rate": 0.0004965096686191126, "loss": 0.0718, "theoretical_loss": 3.5103484521463244, "tokens_seen": 1677852672 }, { "epoch": 0.02, "learning_rate": 0.0004964695498676081, "loss": 0.0702, "theoretical_loss": 3.510324983607347, "tokens_seen": 1677983744 }, { "epoch": 0.02, "learning_rate": 0.0004964294311161037, "loss": 0.0673, "theoretical_loss": 3.510301517414739, "tokens_seen": 1678114816 }, { "epoch": 0.02, "learning_rate": 0.0004963893123645992, "loss": 0.0707, "theoretical_loss": 3.5102780535680838, "tokens_seen": 1678245888 }, { "epoch": 0.02, "learning_rate": 0.0004963491936130947, "loss": 0.0701, "theoretical_loss": 3.5102545920669623, "tokens_seen": 1678376960 }, { "epoch": 0.02, "learning_rate": 0.0004963090748615904, "loss": 0.0685, "theoretical_loss": 3.510231132910958, "tokens_seen": 1678508032 }, { "epoch": 0.02, "learning_rate": 0.0004962689561100859, "loss": 0.0699, "theoretical_loss": 3.5102076760996526, "tokens_seen": 1678639104 }, { "epoch": 0.02, "learning_rate": 0.0004962288373585814, "loss": 0.0694, "theoretical_loss": 3.5101842216326293, "tokens_seen": 1678770176 }, { "epoch": 0.02, "learning_rate": 0.000496188718607077, "loss": 0.0696, "theoretical_loss": 3.5101607695094708, "tokens_seen": 1678901248 }, { "epoch": 0.02, "learning_rate": 0.0004961485998555725, "loss": 0.0712, "theoretical_loss": 3.5101373197297594, "tokens_seen": 1679032320 }, { "epoch": 0.02, "learning_rate": 0.000496108481104068, "loss": 0.0701, "theoretical_loss": 3.5101138722930783, "tokens_seen": 1679163392 }, { "epoch": 0.02, "learning_rate": 0.0004960683623525635, "loss": 0.0693, "theoretical_loss": 3.510090427199011, "tokens_seen": 1679294464 }, { "epoch": 0.02, "objective/train/advantage_avg": 0.0009506663191132247, "objective/train/docs_used": 613519, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4658650159835815, "objective/train/original_loss": 1.465865135192871, "objective/train/theoretical_loss": 3.51006698444714, "objective/train/tokens_used": 49950176, "objective/train/value_avg": -0.0108489990234375, "objective/train/value_loss": 0.0005092518404126167, "objective/train/value_max": -8.749961853027344e-05, "objective/train/value_min": -0.45361328125, "objective/train/value_reward_corr": 0.6362608023689811, "objective/train/value_std": 0.0192108154296875, "objective/train/weight_avg": 1.0011649131774902, "objective/train/weighted_lm_loss": 1.468434453010559, "objective/train/weights_max": 1.3644565343856812, "objective/train/weights_min": 0.23666702210903168, "theoretical_loss": 3.51006698444714, "tokens_seen": 1679425536 }, { "epoch": 0.02, "learning_rate": 0.0004960282436010592, "loss": 0.0701, "theoretical_loss": 3.51006698444714, "tokens_seen": 1679425536 }, { "epoch": 0.02, "learning_rate": 0.0004959881248495547, "loss": 0.0666, "theoretical_loss": 3.5100435440370483, "tokens_seen": 1679556608 }, { "epoch": 0.02, "learning_rate": 0.0004959480060980502, "loss": 0.0683, "theoretical_loss": 3.5100201059683203, "tokens_seen": 1679687680 }, { "epoch": 0.02, "learning_rate": 0.0004959078873465458, "loss": 0.0695, "theoretical_loss": 3.5099966702405383, "tokens_seen": 1679818752 }, { "epoch": 0.02, "learning_rate": 0.0004958677685950413, "loss": 0.0692, "theoretical_loss": 3.5099732368532868, "tokens_seen": 1679949824 }, { "epoch": 0.02, "learning_rate": 0.0004958276498435368, "loss": 0.0671, "theoretical_loss": 3.5099498058061487, "tokens_seen": 1680080896 }, { "epoch": 0.02, "learning_rate": 0.0004957875310920325, "loss": 0.0693, "theoretical_loss": 3.5099263770987084, "tokens_seen": 1680211968 }, { "epoch": 0.02, "learning_rate": 0.000495747412340528, "loss": 0.0699, "theoretical_loss": 3.5099029507305497, "tokens_seen": 1680343040 }, { "epoch": 0.02, "learning_rate": 0.0004957072935890235, "loss": 0.068, "theoretical_loss": 3.5098795267012557, "tokens_seen": 1680474112 }, { "epoch": 0.02, "learning_rate": 0.000495667174837519, "loss": 0.0713, "theoretical_loss": 3.509856105010412, "tokens_seen": 1680605184 }, { "epoch": 0.02, "learning_rate": 0.0004956270560860146, "loss": 0.0686, "theoretical_loss": 3.509832685657601, "tokens_seen": 1680736256 }, { "epoch": 0.02, "learning_rate": 0.0004955869373345101, "loss": 0.0716, "theoretical_loss": 3.509809268642409, "tokens_seen": 1680867328 }, { "epoch": 0.02, "learning_rate": 0.0004955468185830057, "loss": 0.0696, "theoretical_loss": 3.509785853964419, "tokens_seen": 1680998400 }, { "epoch": 0.02, "learning_rate": 0.0004955066998315013, "loss": 0.0719, "theoretical_loss": 3.5097624416232156, "tokens_seen": 1681129472 }, { "epoch": 0.02, "learning_rate": 0.0004954665810799968, "loss": 0.0706, "theoretical_loss": 3.5097390316183845, "tokens_seen": 1681260544 }, { "epoch": 0.02, "learning_rate": 0.0004954264623284923, "loss": 0.0695, "theoretical_loss": 3.509715623949509, "tokens_seen": 1681391616 }, { "epoch": 0.02, "learning_rate": 0.0004953863435769879, "loss": 0.0698, "theoretical_loss": 3.509692218616175, "tokens_seen": 1681522688 }, { "epoch": 0.02, "learning_rate": 0.0004953462248254834, "loss": 0.0685, "theoretical_loss": 3.509668815617967, "tokens_seen": 1681653760 }, { "epoch": 0.02, "learning_rate": 0.000495306106073979, "loss": 0.0694, "theoretical_loss": 3.50964541495447, "tokens_seen": 1681784832 }, { "epoch": 0.02, "learning_rate": 0.0004952659873224745, "loss": 0.0684, "theoretical_loss": 3.5096220166252694, "tokens_seen": 1681915904 }, { "epoch": 0.02, "learning_rate": 0.0004952258685709701, "loss": 0.0713, "theoretical_loss": 3.5095986206299505, "tokens_seen": 1682046976 }, { "epoch": 0.02, "learning_rate": 0.0004951857498194656, "loss": 0.0695, "theoretical_loss": 3.509575226968098, "tokens_seen": 1682178048 }, { "epoch": 0.02, "learning_rate": 0.0004951456310679611, "loss": 0.0741, "theoretical_loss": 3.5095518356392983, "tokens_seen": 1682309120 }, { "epoch": 0.02, "learning_rate": 0.0004951055123164568, "loss": 0.0699, "theoretical_loss": 3.5095284466431362, "tokens_seen": 1682440192 }, { "epoch": 0.02, "learning_rate": 0.0004950653935649523, "loss": 0.0723, "theoretical_loss": 3.5095050599791975, "tokens_seen": 1682571264 }, { "epoch": 0.02, "objective/train/advantage_avg": 0.0006220215000212193, "objective/train/docs_used": 614786, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4391088485717773, "objective/train/original_loss": 1.4391087293624878, "objective/train/theoretical_loss": 3.509481675647069, "objective/train/tokens_used": 53226976, "objective/train/value_avg": -0.00901031494140625, "objective/train/value_loss": 0.0003752339107450098, "objective/train/value_max": -6.35385513305664e-05, "objective/train/value_min": -0.974609375, "objective/train/value_reward_corr": 0.6652227067529786, "objective/train/value_std": 0.0187835693359375, "objective/train/weight_avg": 1.0007959604263306, "objective/train/weighted_lm_loss": 1.4396719932556152, "objective/train/weights_max": 2.046863555908203, "objective/train/weights_min": 0.38571086525917053, "theoretical_loss": 3.509481675647069, "tokens_seen": 1682702336 }, { "epoch": 0.02, "learning_rate": 0.0004950252748134479, "loss": 0.069, "theoretical_loss": 3.509481675647069, "tokens_seen": 1682702336 }, { "epoch": 0.02, "learning_rate": 0.0004949851560619434, "loss": 0.0699, "theoretical_loss": 3.509458293646335, "tokens_seen": 1682833408 }, { "epoch": 0.02, "learning_rate": 0.0004949450373104389, "loss": 0.0694, "theoretical_loss": 3.509434913976583, "tokens_seen": 1682964480 }, { "epoch": 0.02, "learning_rate": 0.0004949049185589344, "loss": 0.0671, "theoretical_loss": 3.509411536637398, "tokens_seen": 1683095552 }, { "epoch": 0.02, "learning_rate": 0.0004948647998074299, "loss": 0.0704, "theoretical_loss": 3.509388161628367, "tokens_seen": 1683226624 }, { "epoch": 0.02, "learning_rate": 0.0004948246810559256, "loss": 0.0705, "theoretical_loss": 3.5093647889490756, "tokens_seen": 1683357696 }, { "epoch": 0.02, "learning_rate": 0.0004947845623044211, "loss": 0.0677, "theoretical_loss": 3.509341418599111, "tokens_seen": 1683488768 }, { "epoch": 0.02, "learning_rate": 0.0004947444435529166, "loss": 0.0751, "theoretical_loss": 3.5093180505780586, "tokens_seen": 1683619840 }, { "epoch": 0.02, "learning_rate": 0.0004947043248014122, "loss": 0.0674, "theoretical_loss": 3.509294684885506, "tokens_seen": 1683750912 }, { "epoch": 0.02, "learning_rate": 0.0004946642060499077, "loss": 0.0701, "theoretical_loss": 3.50927132152104, "tokens_seen": 1683881984 }, { "epoch": 0.02, "learning_rate": 0.0004946240872984034, "loss": 0.0736, "theoretical_loss": 3.509247960484247, "tokens_seen": 1684013056 }, { "epoch": 0.02, "learning_rate": 0.0004945839685468989, "loss": 0.0677, "theoretical_loss": 3.509224601774714, "tokens_seen": 1684144128 }, { "epoch": 0.02, "learning_rate": 0.0004945438497953944, "loss": 0.0715, "theoretical_loss": 3.5092012453920285, "tokens_seen": 1684275200 }, { "epoch": 0.02, "learning_rate": 0.0004945037310438899, "loss": 0.0691, "theoretical_loss": 3.509177891335777, "tokens_seen": 1684406272 }, { "epoch": 0.02, "learning_rate": 0.0004944636122923854, "loss": 0.0712, "theoretical_loss": 3.5091545396055475, "tokens_seen": 1684537344 }, { "epoch": 0.02, "learning_rate": 0.000494423493540881, "loss": 0.0754, "theoretical_loss": 3.509131190200926, "tokens_seen": 1684668416 }, { "epoch": 0.02, "learning_rate": 0.0004943833747893765, "loss": 0.0712, "theoretical_loss": 3.5091078431215017, "tokens_seen": 1684799488 }, { "epoch": 0.02, "learning_rate": 0.0004943432560378721, "loss": 0.0708, "theoretical_loss": 3.5090844983668616, "tokens_seen": 1684930560 }, { "epoch": 0.02, "learning_rate": 0.0004943031372863677, "loss": 0.0661, "theoretical_loss": 3.5090611559365925, "tokens_seen": 1685061632 }, { "epoch": 0.02, "learning_rate": 0.0004942630185348632, "loss": 0.0659, "theoretical_loss": 3.5090378158302835, "tokens_seen": 1685192704 }, { "epoch": 0.02, "learning_rate": 0.0004942228997833588, "loss": 0.0724, "theoretical_loss": 3.509014478047522, "tokens_seen": 1685323776 }, { "epoch": 0.02, "learning_rate": 0.0004941827810318543, "loss": 0.0667, "theoretical_loss": 3.5089911425878952, "tokens_seen": 1685454848 }, { "epoch": 0.02, "learning_rate": 0.0004941426622803499, "loss": 0.0723, "theoretical_loss": 3.5089678094509926, "tokens_seen": 1685585920 }, { "epoch": 0.02, "learning_rate": 0.0004941025435288454, "loss": 0.068, "theoretical_loss": 3.5089444786364012, "tokens_seen": 1685716992 }, { "epoch": 0.02, "learning_rate": 0.0004940624247773409, "loss": 0.0682, "theoretical_loss": 3.5089211501437103, "tokens_seen": 1685848064 }, { "epoch": 0.02, "objective/train/advantage_avg": 0.00010195112554356456, "objective/train/docs_used": 615881, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3712499141693115, "objective/train/original_loss": 1.371250033378601, "objective/train/theoretical_loss": 3.508897823972508, "objective/train/tokens_used": 56503776, "objective/train/value_avg": -0.004962921142578125, "objective/train/value_loss": 0.0001574193884152919, "objective/train/value_max": -4.297494888305664e-05, "objective/train/value_min": -0.395263671875, "objective/train/value_reward_corr": 0.6838172832132924, "objective/train/value_std": 0.01068115234375, "objective/train/weight_avg": 1.0001729726791382, "objective/train/weighted_lm_loss": 1.3706634044647217, "objective/train/weights_max": 1.4847756624221802, "objective/train/weights_min": 0.3784064054489136, "theoretical_loss": 3.508897823972508, "tokens_seen": 1685979136 }, { "epoch": 0.02, "learning_rate": 0.0004940223060258365, "loss": 0.0711, "theoretical_loss": 3.508897823972508, "tokens_seen": 1685979136 }, { "epoch": 0.02, "learning_rate": 0.000493982187274332, "loss": 0.0687, "theoretical_loss": 3.5088745001223822, "tokens_seen": 1686110208 }, { "epoch": 0.02, "learning_rate": 0.0004939420685228275, "loss": 0.0727, "theoretical_loss": 3.5088511785929226, "tokens_seen": 1686241280 }, { "epoch": 0.02, "learning_rate": 0.0004939019497713232, "loss": 0.071, "theoretical_loss": 3.508827859383717, "tokens_seen": 1686372352 }, { "epoch": 0.02, "learning_rate": 0.0004938618310198187, "loss": 0.0699, "theoretical_loss": 3.508804542494355, "tokens_seen": 1686503424 }, { "epoch": 0.02, "learning_rate": 0.0004938217122683143, "loss": 0.0716, "theoretical_loss": 3.5087812279244246, "tokens_seen": 1686634496 }, { "epoch": 0.02, "learning_rate": 0.0004937815935168098, "loss": 0.0716, "theoretical_loss": 3.508757915673516, "tokens_seen": 1686765568 }, { "epoch": 0.02, "learning_rate": 0.0004937414747653053, "loss": 0.0736, "theoretical_loss": 3.5087346057412176, "tokens_seen": 1686896640 }, { "epoch": 0.02, "learning_rate": 0.0004937013560138008, "loss": 0.0701, "theoretical_loss": 3.5087112981271185, "tokens_seen": 1687027712 }, { "epoch": 0.02, "learning_rate": 0.0004936612372622963, "loss": 0.0688, "theoretical_loss": 3.508687992830809, "tokens_seen": 1687158784 }, { "epoch": 0.02, "learning_rate": 0.000493621118510792, "loss": 0.0711, "theoretical_loss": 3.5086646898518774, "tokens_seen": 1687289856 }, { "epoch": 0.02, "learning_rate": 0.0004935809997592875, "loss": 0.074, "theoretical_loss": 3.5086413891899144, "tokens_seen": 1687420928 }, { "epoch": 0.02, "learning_rate": 0.000493540881007783, "loss": 0.0708, "theoretical_loss": 3.508618090844509, "tokens_seen": 1687552000 }, { "epoch": 0.02, "learning_rate": 0.0004935007622562786, "loss": 0.0688, "theoretical_loss": 3.5085947948152514, "tokens_seen": 1687683072 }, { "epoch": 0.02, "learning_rate": 0.0004934606435047741, "loss": 0.0693, "theoretical_loss": 3.508571501101731, "tokens_seen": 1687814144 }, { "epoch": 0.02, "learning_rate": 0.0004934205247532698, "loss": 0.0704, "theoretical_loss": 3.5085482097035383, "tokens_seen": 1687945216 }, { "epoch": 0.02, "learning_rate": 0.0004933804060017653, "loss": 0.0705, "theoretical_loss": 3.5085249206202627, "tokens_seen": 1688076288 }, { "epoch": 0.02, "learning_rate": 0.0004933402872502608, "loss": 0.0683, "theoretical_loss": 3.508501633851495, "tokens_seen": 1688207360 }, { "epoch": 0.02, "learning_rate": 0.0004933001684987563, "loss": 0.0725, "theoretical_loss": 3.5084783493968255, "tokens_seen": 1688338432 }, { "epoch": 0.02, "learning_rate": 0.0004932600497472518, "loss": 0.0685, "theoretical_loss": 3.5084550672558446, "tokens_seen": 1688469504 }, { "epoch": 0.02, "learning_rate": 0.0004932199309957474, "loss": 0.0667, "theoretical_loss": 3.5084317874281425, "tokens_seen": 1688600576 }, { "epoch": 0.02, "learning_rate": 0.000493179812244243, "loss": 0.072, "theoretical_loss": 3.50840850991331, "tokens_seen": 1688731648 }, { "epoch": 0.02, "learning_rate": 0.0004931396934927385, "loss": 0.0729, "theoretical_loss": 3.508385234710938, "tokens_seen": 1688862720 }, { "epoch": 0.02, "learning_rate": 0.0004930995747412341, "loss": 0.069, "theoretical_loss": 3.5083619618206168, "tokens_seen": 1688993792 }, { "epoch": 0.02, "learning_rate": 0.0004930594559897296, "loss": 0.0702, "theoretical_loss": 3.5083386912419385, "tokens_seen": 1689124864 }, { "epoch": 0.02, "objective/train/advantage_avg": -0.0015202894574031234, "objective/train/docs_used": 617011, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.480752944946289, "objective/train/original_loss": 1.480752944946289, "objective/train/theoretical_loss": 3.5083154229744924, "objective/train/tokens_used": 59780576, "objective/train/value_avg": -0.00447845458984375, "objective/train/value_loss": 0.00021339152590371668, "objective/train/value_max": -1.4603137969970703e-05, "objective/train/value_min": -0.156494140625, "objective/train/value_reward_corr": 0.6257282429459289, "objective/train/value_std": 0.007717132568359375, "objective/train/weight_avg": 0.9985764622688293, "objective/train/weighted_lm_loss": 1.4790672063827515, "objective/train/weights_max": 1.119228720664978, "objective/train/weights_min": 0.3721534311771393, "theoretical_loss": 3.5083154229744924, "tokens_seen": 1689255936 }, { "epoch": 0.02, "learning_rate": 0.0004930193372382252, "loss": 0.0687, "theoretical_loss": 3.5083154229744924, "tokens_seen": 1689255936 }, { "epoch": 0.02, "learning_rate": 0.0004929792184867207, "loss": 0.0704, "theoretical_loss": 3.508292157017871, "tokens_seen": 1689387008 }, { "epoch": 0.02, "learning_rate": 0.0004929390997352163, "loss": 0.0676, "theoretical_loss": 3.5082688933716653, "tokens_seen": 1689518080 }, { "epoch": 0.02, "learning_rate": 0.0004928989809837118, "loss": 0.0752, "theoretical_loss": 3.5082456320354662, "tokens_seen": 1689649152 }, { "epoch": 0.02, "learning_rate": 0.0004928588622322073, "loss": 0.0653, "theoretical_loss": 3.5082223730088655, "tokens_seen": 1689780224 }, { "epoch": 0.02, "learning_rate": 0.0004928187434807029, "loss": 0.0684, "theoretical_loss": 3.508199116291455, "tokens_seen": 1689911296 }, { "epoch": 0.02, "learning_rate": 0.0004927786247291984, "loss": 0.0688, "theoretical_loss": 3.5081758618828256, "tokens_seen": 1690042368 }, { "epoch": 0.02, "learning_rate": 0.0004927385059776939, "loss": 0.0698, "theoretical_loss": 3.5081526097825697, "tokens_seen": 1690173440 }, { "epoch": 0.02, "learning_rate": 0.0004926983872261896, "loss": 0.0683, "theoretical_loss": 3.5081293599902788, "tokens_seen": 1690304512 }, { "epoch": 0.02, "learning_rate": 0.0004926582684746851, "loss": 0.0697, "theoretical_loss": 3.5081061125055455, "tokens_seen": 1690435584 }, { "epoch": 0.02, "learning_rate": 0.0004926181497231807, "loss": 0.0693, "theoretical_loss": 3.5080828673279614, "tokens_seen": 1690566656 }, { "epoch": 0.02, "learning_rate": 0.0004925780309716762, "loss": 0.071, "theoretical_loss": 3.5080596244571183, "tokens_seen": 1690697728 }, { "epoch": 0.02, "learning_rate": 0.0004925379122201717, "loss": 0.0707, "theoretical_loss": 3.5080363838926094, "tokens_seen": 1690828800 }, { "epoch": 0.02, "learning_rate": 0.0004924977934686672, "loss": 0.0691, "theoretical_loss": 3.5080131456340267, "tokens_seen": 1690959872 }, { "epoch": 0.02, "learning_rate": 0.0004924576747171627, "loss": 0.0728, "theoretical_loss": 3.5079899096809624, "tokens_seen": 1691090944 }, { "epoch": 0.03, "learning_rate": 0.0004924175559656584, "loss": 0.0722, "theoretical_loss": 3.507966676033009, "tokens_seen": 1691222016 }, { "epoch": 0.03, "learning_rate": 0.0004923774372141539, "loss": 0.0713, "theoretical_loss": 3.5079434446897597, "tokens_seen": 1691353088 }, { "epoch": 0.03, "learning_rate": 0.0004923373184626495, "loss": 0.0741, "theoretical_loss": 3.5079202156508074, "tokens_seen": 1691484160 }, { "epoch": 0.03, "learning_rate": 0.000492297199711145, "loss": 0.0719, "theoretical_loss": 3.5078969889157445, "tokens_seen": 1691615232 }, { "epoch": 0.03, "learning_rate": 0.0004922570809596405, "loss": 0.0739, "theoretical_loss": 3.507873764484164, "tokens_seen": 1691746304 }, { "epoch": 0.03, "learning_rate": 0.0004922169622081362, "loss": 0.0727, "theoretical_loss": 3.5078505423556594, "tokens_seen": 1691877376 }, { "epoch": 0.03, "learning_rate": 0.0004921768434566317, "loss": 0.0673, "theoretical_loss": 3.5078273225298235, "tokens_seen": 1692008448 }, { "epoch": 0.03, "learning_rate": 0.0004921367247051272, "loss": 0.0738, "theoretical_loss": 3.50780410500625, "tokens_seen": 1692139520 }, { "epoch": 0.03, "learning_rate": 0.0004920966059536227, "loss": 0.0684, "theoretical_loss": 3.5077808897845326, "tokens_seen": 1692270592 }, { "epoch": 0.03, "learning_rate": 0.0004920564872021182, "loss": 0.0674, "theoretical_loss": 3.507757676864264, "tokens_seen": 1692401664 }, { "epoch": 0.03, "objective/train/advantage_avg": 0.0007037267205305398, "objective/train/docs_used": 618329, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4397320747375488, "objective/train/original_loss": 1.4397320747375488, "objective/train/theoretical_loss": 3.5077344662450383, "objective/train/tokens_used": 63057376, "objective/train/value_avg": -0.00554656982421875, "objective/train/value_loss": 8.72205346240662e-05, "objective/train/value_max": -9.5367431640625e-05, "objective/train/value_min": -0.2020263671875, "objective/train/value_reward_corr": 0.5252625078821915, "objective/train/value_std": 0.007251739501953125, "objective/train/weight_avg": 1.0007469654083252, "objective/train/weighted_lm_loss": 1.4405955076217651, "objective/train/weights_max": 1.219853162765503, "objective/train/weights_min": 0.8089848756790161, "theoretical_loss": 3.5077344662450383, "tokens_seen": 1692532736 }, { "epoch": 0.03, "learning_rate": 0.0004920163684506138, "loss": 0.0696, "theoretical_loss": 3.5077344662450383, "tokens_seen": 1692532736 }, { "epoch": 0.03, "learning_rate": 0.0004919762496991094, "loss": 0.0665, "theoretical_loss": 3.5077112579264496, "tokens_seen": 1692663808 }, { "epoch": 0.03, "learning_rate": 0.000491936130947605, "loss": 0.0758, "theoretical_loss": 3.507688051908091, "tokens_seen": 1692794880 }, { "epoch": 0.03, "learning_rate": 0.0004918960121961005, "loss": 0.0721, "theoretical_loss": 3.507664848189557, "tokens_seen": 1692925952 }, { "epoch": 0.03, "learning_rate": 0.000491855893444596, "loss": 0.072, "theoretical_loss": 3.5076416467704408, "tokens_seen": 1693057024 }, { "epoch": 0.03, "learning_rate": 0.0004918157746930916, "loss": 0.067, "theoretical_loss": 3.507618447650337, "tokens_seen": 1693188096 }, { "epoch": 0.03, "learning_rate": 0.0004917756559415871, "loss": 0.0707, "theoretical_loss": 3.507595250828841, "tokens_seen": 1693319168 }, { "epoch": 0.03, "learning_rate": 0.0004917355371900827, "loss": 0.075, "theoretical_loss": 3.5075720563055457, "tokens_seen": 1693450240 }, { "epoch": 0.03, "learning_rate": 0.0004916954184385782, "loss": 0.0675, "theoretical_loss": 3.5075488640800456, "tokens_seen": 1693581312 }, { "epoch": 0.03, "learning_rate": 0.0004916552996870737, "loss": 0.0738, "theoretical_loss": 3.5075256741519363, "tokens_seen": 1693712384 }, { "epoch": 0.03, "learning_rate": 0.0004916151809355693, "loss": 0.0733, "theoretical_loss": 3.5075024865208118, "tokens_seen": 1693843456 }, { "epoch": 0.03, "learning_rate": 0.0004915750621840648, "loss": 0.069, "theoretical_loss": 3.507479301186266, "tokens_seen": 1693974528 }, { "epoch": 0.03, "learning_rate": 0.0004915349434325604, "loss": 0.0733, "theoretical_loss": 3.507456118147896, "tokens_seen": 1694105600 }, { "epoch": 0.03, "learning_rate": 0.000491494824681056, "loss": 0.0695, "theoretical_loss": 3.5074329374052944, "tokens_seen": 1694236672 }, { "epoch": 0.03, "learning_rate": 0.0004914547059295515, "loss": 0.0714, "theoretical_loss": 3.5074097589580577, "tokens_seen": 1694367744 }, { "epoch": 0.03, "learning_rate": 0.0004914145871780471, "loss": 0.0704, "theoretical_loss": 3.507386582805781, "tokens_seen": 1694498816 }, { "epoch": 0.03, "learning_rate": 0.0004913744684265426, "loss": 0.0703, "theoretical_loss": 3.5073634089480583, "tokens_seen": 1694629888 }, { "epoch": 0.03, "learning_rate": 0.0004913343496750381, "loss": 0.0729, "theoretical_loss": 3.5073402373844864, "tokens_seen": 1694760960 }, { "epoch": 0.03, "learning_rate": 0.0004912942309235336, "loss": 0.0691, "theoretical_loss": 3.507317068114661, "tokens_seen": 1694892032 }, { "epoch": 0.03, "learning_rate": 0.0004912541121720291, "loss": 0.0724, "theoretical_loss": 3.5072939011381763, "tokens_seen": 1695023104 }, { "epoch": 0.03, "learning_rate": 0.0004912139934205248, "loss": 0.068, "theoretical_loss": 3.507270736454629, "tokens_seen": 1695154176 }, { "epoch": 0.03, "learning_rate": 0.0004911738746690203, "loss": 0.067, "theoretical_loss": 3.507247574063614, "tokens_seen": 1695285248 }, { "epoch": 0.03, "learning_rate": 0.0004911337559175159, "loss": 0.0736, "theoretical_loss": 3.507224413964728, "tokens_seen": 1695416320 }, { "epoch": 0.03, "learning_rate": 0.0004910936371660114, "loss": 0.0687, "theoretical_loss": 3.5072012561575674, "tokens_seen": 1695547392 }, { "epoch": 0.03, "learning_rate": 0.0004910535184145069, "loss": 0.0681, "theoretical_loss": 3.5071781006417266, "tokens_seen": 1695678464 }, { "epoch": 0.03, "objective/train/advantage_avg": 0.001871579559519887, "objective/train/docs_used": 619583, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.373203992843628, "objective/train/original_loss": 1.3732037544250488, "objective/train/theoretical_loss": 3.5071549474168036, "objective/train/tokens_used": 66334176, "objective/train/value_avg": -0.00833892822265625, "objective/train/value_loss": 0.00014364864910021424, "objective/train/value_max": -6.759166717529297e-05, "objective/train/value_min": -0.1646728515625, "objective/train/value_reward_corr": 0.6889108563924313, "objective/train/value_std": 0.01202392578125, "objective/train/weight_avg": 1.001939296722412, "objective/train/weighted_lm_loss": 1.3754535913467407, "objective/train/weights_max": 1.1076587438583374, "objective/train/weights_min": 0.3802064061164856, "theoretical_loss": 3.5071549474168036, "tokens_seen": 1695809536 }, { "epoch": 0.03, "learning_rate": 0.0004910133996630026, "loss": 0.0706, "theoretical_loss": 3.5071549474168036, "tokens_seen": 1695809536 }, { "epoch": 0.03, "learning_rate": 0.0004909732809114981, "loss": 0.0722, "theoretical_loss": 3.507131796482394, "tokens_seen": 1695940608 }, { "epoch": 0.03, "learning_rate": 0.0004909331621599936, "loss": 0.0714, "theoretical_loss": 3.507108647838094, "tokens_seen": 1696071680 }, { "epoch": 0.03, "learning_rate": 0.0004908930434084891, "loss": 0.0679, "theoretical_loss": 3.5070855014835, "tokens_seen": 1696202752 }, { "epoch": 0.03, "learning_rate": 0.0004908529246569846, "loss": 0.07, "theoretical_loss": 3.507062357418209, "tokens_seen": 1696333824 }, { "epoch": 0.03, "learning_rate": 0.0004908128059054802, "loss": 0.0701, "theoretical_loss": 3.507039215641818, "tokens_seen": 1696464896 }, { "epoch": 0.03, "learning_rate": 0.0004907726871539758, "loss": 0.0695, "theoretical_loss": 3.5070160761539233, "tokens_seen": 1696595968 }, { "epoch": 0.03, "learning_rate": 0.0004907325684024714, "loss": 0.0682, "theoretical_loss": 3.506992938954122, "tokens_seen": 1696727040 }, { "epoch": 0.03, "learning_rate": 0.0004906924496509669, "loss": 0.0681, "theoretical_loss": 3.506969804042011, "tokens_seen": 1696858112 }, { "epoch": 0.03, "learning_rate": 0.0004906523308994624, "loss": 0.0692, "theoretical_loss": 3.5069466714171873, "tokens_seen": 1696989184 }, { "epoch": 0.03, "learning_rate": 0.000490612212147958, "loss": 0.0661, "theoretical_loss": 3.5069235410792485, "tokens_seen": 1697120256 }, { "epoch": 0.03, "learning_rate": 0.0004905720933964535, "loss": 0.067, "theoretical_loss": 3.5069004130277914, "tokens_seen": 1697251328 }, { "epoch": 0.03, "learning_rate": 0.000490531974644949, "loss": 0.066, "theoretical_loss": 3.506877287262414, "tokens_seen": 1697382400 }, { "epoch": 0.03, "learning_rate": 0.0004904918558934446, "loss": 0.0696, "theoretical_loss": 3.5068541637827138, "tokens_seen": 1697513472 }, { "epoch": 0.03, "learning_rate": 0.0004904517371419401, "loss": 0.0699, "theoretical_loss": 3.5068310425882876, "tokens_seen": 1697644544 }, { "epoch": 0.03, "learning_rate": 0.0004904116183904357, "loss": 0.0693, "theoretical_loss": 3.5068079236787346, "tokens_seen": 1697775616 }, { "epoch": 0.03, "learning_rate": 0.0004903714996389312, "loss": 0.0753, "theoretical_loss": 3.506784807053651, "tokens_seen": 1697906688 }, { "epoch": 0.03, "learning_rate": 0.0004903313808874268, "loss": 0.0697, "theoretical_loss": 3.5067616927126357, "tokens_seen": 1698037760 }, { "epoch": 0.03, "learning_rate": 0.0004902912621359224, "loss": 0.0665, "theoretical_loss": 3.506738580655287, "tokens_seen": 1698168832 }, { "epoch": 0.03, "learning_rate": 0.0004902511433844179, "loss": 0.0706, "theoretical_loss": 3.506715470881202, "tokens_seen": 1698299904 }, { "epoch": 0.03, "learning_rate": 0.0004902110246329135, "loss": 0.0698, "theoretical_loss": 3.5066923633899796, "tokens_seen": 1698430976 }, { "epoch": 0.03, "learning_rate": 0.000490170905881409, "loss": 0.0697, "theoretical_loss": 3.506669258181218, "tokens_seen": 1698562048 }, { "epoch": 0.03, "learning_rate": 0.0004901307871299045, "loss": 0.0701, "theoretical_loss": 3.506646155254516, "tokens_seen": 1698693120 }, { "epoch": 0.03, "learning_rate": 0.0004900906683784, "loss": 0.0662, "theoretical_loss": 3.506623054609472, "tokens_seen": 1698824192 }, { "epoch": 0.03, "learning_rate": 0.0004900505496268955, "loss": 0.0702, "theoretical_loss": 3.506599956245684, "tokens_seen": 1698955264 }, { "epoch": 0.03, "objective/train/advantage_avg": 0.0011367995757609606, "objective/train/docs_used": 620634, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.489615797996521, "objective/train/original_loss": 1.4896156787872314, "objective/train/theoretical_loss": 3.5065768601627516, "objective/train/tokens_used": 69610976, "objective/train/value_avg": -0.00661468505859375, "objective/train/value_loss": 0.00022158515639603138, "objective/train/value_max": -8.684396743774414e-05, "objective/train/value_min": -0.331298828125, "objective/train/value_reward_corr": 0.6067519717587337, "objective/train/value_std": 0.011077880859375, "objective/train/weight_avg": 1.0012404918670654, "objective/train/weighted_lm_loss": 1.4908442497253418, "objective/train/weights_max": 1.2564263343811035, "objective/train/weights_min": 0.3688638210296631, "theoretical_loss": 3.5065768601627516, "tokens_seen": 1699086336 }, { "epoch": 0.03, "learning_rate": 0.0004900104308753912, "loss": 0.0684, "theoretical_loss": 3.5065768601627516, "tokens_seen": 1699086336 }, { "epoch": 0.03, "learning_rate": 0.0004899703121238867, "loss": 0.0671, "theoretical_loss": 3.5065537663602737, "tokens_seen": 1699217408 }, { "epoch": 0.03, "learning_rate": 0.0004899301933723823, "loss": 0.0679, "theoretical_loss": 3.5065306748378484, "tokens_seen": 1699348480 }, { "epoch": 0.03, "learning_rate": 0.0004898900746208778, "loss": 0.0752, "theoretical_loss": 3.506507585595075, "tokens_seen": 1699479552 }, { "epoch": 0.03, "learning_rate": 0.0004898499558693733, "loss": 0.0706, "theoretical_loss": 3.5064844986315533, "tokens_seen": 1699610624 }, { "epoch": 0.03, "learning_rate": 0.000489809837117869, "loss": 0.0659, "theoretical_loss": 3.506461413946882, "tokens_seen": 1699741696 }, { "epoch": 0.03, "learning_rate": 0.0004897697183663645, "loss": 0.0681, "theoretical_loss": 3.506438331540661, "tokens_seen": 1699872768 }, { "epoch": 0.03, "learning_rate": 0.00048972959961486, "loss": 0.0634, "theoretical_loss": 3.5064152514124887, "tokens_seen": 1700003840 }, { "epoch": 0.03, "learning_rate": 0.0004896894808633555, "loss": 0.0715, "theoretical_loss": 3.5063921735619656, "tokens_seen": 1700134912 }, { "epoch": 0.03, "learning_rate": 0.0004896493621118511, "loss": 0.0706, "theoretical_loss": 3.506369097988691, "tokens_seen": 1700265984 }, { "epoch": 0.03, "learning_rate": 0.0004896092433603466, "loss": 0.0669, "theoretical_loss": 3.506346024692265, "tokens_seen": 1700397056 }, { "epoch": 0.03, "learning_rate": 0.0004895691246088422, "loss": 0.0735, "theoretical_loss": 3.5063229536722864, "tokens_seen": 1700528128 }, { "epoch": 0.03, "learning_rate": 0.0004895290058573378, "loss": 0.0722, "theoretical_loss": 3.5062998849283566, "tokens_seen": 1700659200 }, { "epoch": 0.03, "learning_rate": 0.0004894888871058333, "loss": 0.071, "theoretical_loss": 3.5062768184600754, "tokens_seen": 1700790272 }, { "epoch": 0.03, "learning_rate": 0.0004894487683543288, "loss": 0.0723, "theoretical_loss": 3.5062537542670418, "tokens_seen": 1700921344 }, { "epoch": 0.03, "learning_rate": 0.0004894086496028244, "loss": 0.0709, "theoretical_loss": 3.5062306923488573, "tokens_seen": 1701052416 }, { "epoch": 0.03, "learning_rate": 0.0004893685308513199, "loss": 0.0656, "theoretical_loss": 3.506207632705122, "tokens_seen": 1701183488 }, { "epoch": 0.03, "learning_rate": 0.0004893284120998155, "loss": 0.0669, "theoretical_loss": 3.5061845753354355, "tokens_seen": 1701314560 }, { "epoch": 0.03, "learning_rate": 0.000489288293348311, "loss": 0.0697, "theoretical_loss": 3.5061615202393996, "tokens_seen": 1701445632 }, { "epoch": 0.03, "learning_rate": 0.0004892481745968066, "loss": 0.0687, "theoretical_loss": 3.5061384674166147, "tokens_seen": 1701576704 }, { "epoch": 0.03, "learning_rate": 0.0004892080558453021, "loss": 0.0645, "theoretical_loss": 3.506115416866681, "tokens_seen": 1701707776 }, { "epoch": 0.03, "learning_rate": 0.0004891679370937976, "loss": 0.0658, "theoretical_loss": 3.5060923685892, "tokens_seen": 1701838848 }, { "epoch": 0.03, "learning_rate": 0.0004891278183422932, "loss": 0.0701, "theoretical_loss": 3.5060693225837722, "tokens_seen": 1701969920 }, { "epoch": 0.03, "learning_rate": 0.0004890876995907888, "loss": 0.0719, "theoretical_loss": 3.5060462788499986, "tokens_seen": 1702100992 }, { "epoch": 0.03, "learning_rate": 0.0004890475808392843, "loss": 0.0679, "theoretical_loss": 3.5060232373874807, "tokens_seen": 1702232064 }, { "epoch": 0.03, "objective/train/advantage_avg": 0.0008590768557041883, "objective/train/docs_used": 621885, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3764843940734863, "objective/train/original_loss": 1.3764843940734863, "objective/train/theoretical_loss": 3.50600019819582, "objective/train/tokens_used": 72887776, "objective/train/value_avg": -0.006542205810546875, "objective/train/value_loss": 0.0002676404546946287, "objective/train/value_max": -6.711483001708984e-05, "objective/train/value_min": -0.64990234375, "objective/train/value_reward_corr": 0.5766965285841935, "objective/train/value_std": 0.0127410888671875, "objective/train/weight_avg": 1.0009690523147583, "objective/train/weighted_lm_loss": 1.377072811126709, "objective/train/weights_max": 1.5681589841842651, "objective/train/weights_min": 0.23039580881595612, "theoretical_loss": 3.50600019819582, "tokens_seen": 1702363136 }, { "epoch": 0.03, "learning_rate": 0.0004890074620877799, "loss": 0.0676, "theoretical_loss": 3.50600019819582, "tokens_seen": 1702363136 }, { "epoch": 0.03, "learning_rate": 0.0004889673433362754, "loss": 0.069, "theoretical_loss": 3.5059771612746173, "tokens_seen": 1702494208 }, { "epoch": 0.03, "learning_rate": 0.0004889272245847709, "loss": 0.0724, "theoretical_loss": 3.5059541266234744, "tokens_seen": 1702625280 }, { "epoch": 0.03, "learning_rate": 0.0004888871058332664, "loss": 0.0714, "theoretical_loss": 3.5059310942419932, "tokens_seen": 1702756352 }, { "epoch": 0.03, "learning_rate": 0.0004888469870817621, "loss": 0.0712, "theoretical_loss": 3.505908064129775, "tokens_seen": 1702887424 }, { "epoch": 0.03, "learning_rate": 0.0004888068683302576, "loss": 0.0733, "theoretical_loss": 3.505885036286421, "tokens_seen": 1703018496 }, { "epoch": 0.03, "learning_rate": 0.0004887667495787531, "loss": 0.0676, "theoretical_loss": 3.5058620107115344, "tokens_seen": 1703149568 }, { "epoch": 0.03, "learning_rate": 0.0004887266308272487, "loss": 0.071, "theoretical_loss": 3.505838987404716, "tokens_seen": 1703280640 }, { "epoch": 0.03, "learning_rate": 0.0004886865120757442, "loss": 0.0669, "theoretical_loss": 3.505815966365568, "tokens_seen": 1703411712 }, { "epoch": 0.03, "learning_rate": 0.0004886463933242397, "loss": 0.0709, "theoretical_loss": 3.5057929475936938, "tokens_seen": 1703542784 }, { "epoch": 0.03, "learning_rate": 0.0004886062745727354, "loss": 0.0714, "theoretical_loss": 3.5057699310886945, "tokens_seen": 1703673856 }, { "epoch": 0.03, "learning_rate": 0.0004885661558212309, "loss": 0.0685, "theoretical_loss": 3.505746916850173, "tokens_seen": 1703804928 }, { "epoch": 0.03, "learning_rate": 0.0004885260370697264, "loss": 0.0703, "theoretical_loss": 3.505723904877731, "tokens_seen": 1703936000 }, { "epoch": 0.03, "learning_rate": 0.0004884859183182219, "loss": 0.0686, "theoretical_loss": 3.505700895170972, "tokens_seen": 1704067072 }, { "epoch": 0.03, "learning_rate": 0.0004884457995667175, "loss": 0.069, "theoretical_loss": 3.5056778877294983, "tokens_seen": 1704198144 }, { "epoch": 0.03, "learning_rate": 0.000488405680815213, "loss": 0.0736, "theoretical_loss": 3.5056548825529132, "tokens_seen": 1704329216 }, { "epoch": 0.03, "learning_rate": 0.0004883655620637086, "loss": 0.0707, "theoretical_loss": 3.5056318796408186, "tokens_seen": 1704460288 }, { "epoch": 0.03, "learning_rate": 0.0004883254433122042, "loss": 0.069, "theoretical_loss": 3.505608878992818, "tokens_seen": 1704591360 }, { "epoch": 0.03, "learning_rate": 0.0004882853245606997, "loss": 0.0732, "theoretical_loss": 3.505585880608515, "tokens_seen": 1704722432 }, { "epoch": 0.03, "learning_rate": 0.00048824520580919526, "loss": 0.0711, "theoretical_loss": 3.5055628844875115, "tokens_seen": 1704853504 }, { "epoch": 0.03, "learning_rate": 0.0004882050870576908, "loss": 0.0732, "theoretical_loss": 3.505539890629412, "tokens_seen": 1704984576 }, { "epoch": 0.03, "learning_rate": 0.00048816496830618634, "loss": 0.0659, "theoretical_loss": 3.5055168990338195, "tokens_seen": 1705115648 }, { "epoch": 0.03, "learning_rate": 0.0004881248495546819, "loss": 0.0718, "theoretical_loss": 3.5054939097003377, "tokens_seen": 1705246720 }, { "epoch": 0.03, "learning_rate": 0.00048808473080317737, "loss": 0.071, "theoretical_loss": 3.5054709226285694, "tokens_seen": 1705377792 }, { "epoch": 0.03, "learning_rate": 0.00048804461205167294, "loss": 0.068, "theoretical_loss": 3.505447937818119, "tokens_seen": 1705508864 }, { "epoch": 0.03, "objective/train/advantage_avg": 0.0014893292682245374, "objective/train/docs_used": 623014, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3583756685256958, "objective/train/original_loss": 1.3583755493164062, "objective/train/theoretical_loss": 3.50542495526859, "objective/train/tokens_used": 76164576, "objective/train/value_avg": -0.00962066650390625, "objective/train/value_loss": 0.0004134947375860065, "objective/train/value_max": -7.31348991394043e-05, "objective/train/value_min": -0.87109375, "objective/train/value_reward_corr": 0.665972764185776, "objective/train/value_std": 0.0188751220703125, "objective/train/weight_avg": 1.0016758441925049, "objective/train/weighted_lm_loss": 1.3601857423782349, "objective/train/weights_max": 1.7966744899749756, "objective/train/weights_min": 0.3703887462615967, "theoretical_loss": 3.50542495526859, "tokens_seen": 1705639936 }, { "epoch": 0.03, "learning_rate": 0.0004880044933001685, "loss": 0.0683, "theoretical_loss": 3.50542495526859, "tokens_seen": 1705639936 }, { "epoch": 0.03, "learning_rate": 0.000487964374548664, "loss": 0.0717, "theoretical_loss": 3.5054019749795864, "tokens_seen": 1705771008 }, { "epoch": 0.03, "learning_rate": 0.0004879242557971596, "loss": 0.0684, "theoretical_loss": 3.5053789969507125, "tokens_seen": 1705902080 }, { "epoch": 0.03, "learning_rate": 0.00048788413704565516, "loss": 0.0705, "theoretical_loss": 3.5053560211815715, "tokens_seen": 1706033152 }, { "epoch": 0.03, "learning_rate": 0.0004878440182941507, "loss": 0.0705, "theoretical_loss": 3.505333047671768, "tokens_seen": 1706164224 }, { "epoch": 0.03, "learning_rate": 0.00048780389954264624, "loss": 0.0707, "theoretical_loss": 3.505310076420907, "tokens_seen": 1706295296 }, { "epoch": 0.03, "learning_rate": 0.0004877637807911418, "loss": 0.0757, "theoretical_loss": 3.5052871074285923, "tokens_seen": 1706426368 }, { "epoch": 0.03, "learning_rate": 0.0004877236620396374, "loss": 0.0746, "theoretical_loss": 3.505264140694428, "tokens_seen": 1706557440 }, { "epoch": 0.03, "learning_rate": 0.00048768354328813284, "loss": 0.0733, "theoretical_loss": 3.505241176218019, "tokens_seen": 1706688512 }, { "epoch": 0.03, "learning_rate": 0.0004876434245366284, "loss": 0.0711, "theoretical_loss": 3.5052182139989707, "tokens_seen": 1706819584 }, { "epoch": 0.03, "learning_rate": 0.000487603305785124, "loss": 0.0677, "theoretical_loss": 3.5051952540368863, "tokens_seen": 1706950656 }, { "epoch": 0.03, "learning_rate": 0.0004875631870336195, "loss": 0.0704, "theoretical_loss": 3.5051722963313723, "tokens_seen": 1707081728 }, { "epoch": 0.03, "learning_rate": 0.00048752306828211506, "loss": 0.0706, "theoretical_loss": 3.5051493408820327, "tokens_seen": 1707212800 }, { "epoch": 0.03, "learning_rate": 0.0004874829495306106, "loss": 0.0677, "theoretical_loss": 3.505126387688473, "tokens_seen": 1707343872 }, { "epoch": 0.03, "learning_rate": 0.0004874428307791062, "loss": 0.0673, "theoretical_loss": 3.5051034367502982, "tokens_seen": 1707474944 }, { "epoch": 0.03, "learning_rate": 0.0004874027120276017, "loss": 0.0696, "theoretical_loss": 3.5050804880671134, "tokens_seen": 1707606016 }, { "epoch": 0.04, "learning_rate": 0.0004873625932760973, "loss": 0.0699, "theoretical_loss": 3.5050575416385246, "tokens_seen": 1707737088 }, { "epoch": 0.04, "learning_rate": 0.00048732247452459284, "loss": 0.0672, "theoretical_loss": 3.505034597464137, "tokens_seen": 1707868160 }, { "epoch": 0.04, "learning_rate": 0.0004872823557730883, "loss": 0.0668, "theoretical_loss": 3.5050116555435555, "tokens_seen": 1707999232 }, { "epoch": 0.04, "learning_rate": 0.0004872422370215839, "loss": 0.0652, "theoretical_loss": 3.5049887158763866, "tokens_seen": 1708130304 }, { "epoch": 0.04, "learning_rate": 0.00048720211827007944, "loss": 0.069, "theoretical_loss": 3.504965778462236, "tokens_seen": 1708261376 }, { "epoch": 0.04, "learning_rate": 0.000487161999518575, "loss": 0.0707, "theoretical_loss": 3.504942843300709, "tokens_seen": 1708392448 }, { "epoch": 0.04, "learning_rate": 0.0004871218807670705, "loss": 0.0669, "theoretical_loss": 3.504919910391412, "tokens_seen": 1708523520 }, { "epoch": 0.04, "learning_rate": 0.0004870817620155661, "loss": 0.0672, "theoretical_loss": 3.5048969797339513, "tokens_seen": 1708654592 }, { "epoch": 0.04, "learning_rate": 0.00048704164326406166, "loss": 0.0698, "theoretical_loss": 3.504874051327933, "tokens_seen": 1708785664 }, { "epoch": 0.04, "objective/train/advantage_avg": 0.0006378982216119766, "objective/train/docs_used": 624223, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2970926761627197, "objective/train/original_loss": 1.2970926761627197, "objective/train/theoretical_loss": 3.5048511251729626, "objective/train/tokens_used": 79441376, "objective/train/value_avg": -0.0075531005859375, "objective/train/value_loss": 0.00021353087504394352, "objective/train/value_max": -7.903575897216797e-05, "objective/train/value_min": -0.28369140625, "objective/train/value_reward_corr": 0.801223791216049, "objective/train/value_std": 0.01509857177734375, "objective/train/weight_avg": 1.0007355213165283, "objective/train/weighted_lm_loss": 1.298920750617981, "objective/train/weights_max": 1.1881715059280396, "objective/train/weights_min": 0.37102511525154114, "theoretical_loss": 3.5048511251729626, "tokens_seen": 1708916736 }, { "epoch": 0.04, "learning_rate": 0.0004870015245125572, "loss": 0.0716, "theoretical_loss": 3.5048511251729626, "tokens_seen": 1708916736 }, { "epoch": 0.04, "learning_rate": 0.00048696140576105274, "loss": 0.0684, "theoretical_loss": 3.504828201268648, "tokens_seen": 1709047808 }, { "epoch": 0.04, "learning_rate": 0.0004869212870095483, "loss": 0.0677, "theoretical_loss": 3.504805279614594, "tokens_seen": 1709178880 }, { "epoch": 0.04, "learning_rate": 0.00048688116825804377, "loss": 0.0631, "theoretical_loss": 3.504782360210408, "tokens_seen": 1709309952 }, { "epoch": 0.04, "learning_rate": 0.00048684104950653934, "loss": 0.0725, "theoretical_loss": 3.504759443055696, "tokens_seen": 1709441024 }, { "epoch": 0.04, "learning_rate": 0.0004868009307550349, "loss": 0.0645, "theoretical_loss": 3.504736528150066, "tokens_seen": 1709572096 }, { "epoch": 0.04, "learning_rate": 0.0004867608120035305, "loss": 0.0694, "theoretical_loss": 3.5047136154931238, "tokens_seen": 1709703168 }, { "epoch": 0.04, "learning_rate": 0.000486720693252026, "loss": 0.0717, "theoretical_loss": 3.504690705084477, "tokens_seen": 1709834240 }, { "epoch": 0.04, "learning_rate": 0.00048668057450052156, "loss": 0.0724, "theoretical_loss": 3.5046677969237328, "tokens_seen": 1709965312 }, { "epoch": 0.04, "learning_rate": 0.00048664045574901713, "loss": 0.071, "theoretical_loss": 3.5046448910104973, "tokens_seen": 1710096384 }, { "epoch": 0.04, "learning_rate": 0.00048660033699751264, "loss": 0.0725, "theoretical_loss": 3.504621987344379, "tokens_seen": 1710227456 }, { "epoch": 0.04, "learning_rate": 0.0004865602182460082, "loss": 0.0698, "theoretical_loss": 3.5045990859249843, "tokens_seen": 1710358528 }, { "epoch": 0.04, "learning_rate": 0.0004865200994945038, "loss": 0.071, "theoretical_loss": 3.504576186751921, "tokens_seen": 1710489600 }, { "epoch": 0.04, "learning_rate": 0.00048647998074299924, "loss": 0.0697, "theoretical_loss": 3.504553289824797, "tokens_seen": 1710620672 }, { "epoch": 0.04, "learning_rate": 0.0004864398619914948, "loss": 0.0682, "theoretical_loss": 3.5045303951432194, "tokens_seen": 1710751744 }, { "epoch": 0.04, "learning_rate": 0.0004863997432399904, "loss": 0.068, "theoretical_loss": 3.5045075027067965, "tokens_seen": 1710882816 }, { "epoch": 0.04, "learning_rate": 0.00048635962448848594, "loss": 0.0676, "theoretical_loss": 3.504484612515136, "tokens_seen": 1711013888 }, { "epoch": 0.04, "learning_rate": 0.00048631950573698146, "loss": 0.0646, "theoretical_loss": 3.5044617245678458, "tokens_seen": 1711144960 }, { "epoch": 0.04, "learning_rate": 0.000486279386985477, "loss": 0.0676, "theoretical_loss": 3.5044388388645333, "tokens_seen": 1711276032 }, { "epoch": 0.04, "learning_rate": 0.0004862392682339726, "loss": 0.0679, "theoretical_loss": 3.5044159554048075, "tokens_seen": 1711407104 }, { "epoch": 0.04, "learning_rate": 0.0004861991494824681, "loss": 0.068, "theoretical_loss": 3.5043930741882763, "tokens_seen": 1711538176 }, { "epoch": 0.04, "learning_rate": 0.0004861590307309637, "loss": 0.072, "theoretical_loss": 3.5043701952145483, "tokens_seen": 1711669248 }, { "epoch": 0.04, "learning_rate": 0.00048611891197945925, "loss": 0.0678, "theoretical_loss": 3.504347318483232, "tokens_seen": 1711800320 }, { "epoch": 0.04, "learning_rate": 0.0004860787932279547, "loss": 0.0679, "theoretical_loss": 3.504324443993935, "tokens_seen": 1711931392 }, { "epoch": 0.04, "learning_rate": 0.0004860386744764503, "loss": 0.0698, "theoretical_loss": 3.504301571746267, "tokens_seen": 1712062464 }, { "epoch": 0.04, "objective/train/advantage_avg": 9.88995743682608e-05, "objective/train/docs_used": 625444, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3475806713104248, "objective/train/original_loss": 1.3475805521011353, "objective/train/theoretical_loss": 3.5042787017398362, "objective/train/tokens_used": 82718176, "objective/train/value_avg": -0.005523681640625, "objective/train/value_loss": 0.00021817903325427324, "objective/train/value_max": -6.866455078125e-05, "objective/train/value_min": -0.3369140625, "objective/train/value_reward_corr": 0.611005556299138, "objective/train/value_std": 0.01020050048828125, "objective/train/weight_avg": 1.0001941919326782, "objective/train/weighted_lm_loss": 1.3477383852005005, "objective/train/weights_max": 1.1710834503173828, "objective/train/weights_min": 0.3691784143447876, "theoretical_loss": 3.5042787017398362, "tokens_seen": 1712193536 }, { "epoch": 0.04, "learning_rate": 0.00048599855572494584, "loss": 0.0693, "theoretical_loss": 3.5042787017398362, "tokens_seen": 1712193536 }, { "epoch": 0.04, "learning_rate": 0.0004859584369734414, "loss": 0.0699, "theoretical_loss": 3.504255833974252, "tokens_seen": 1712324608 }, { "epoch": 0.04, "learning_rate": 0.0004859183182219369, "loss": 0.0707, "theoretical_loss": 3.5042329684491227, "tokens_seen": 1712455680 }, { "epoch": 0.04, "learning_rate": 0.0004858781994704325, "loss": 0.0702, "theoretical_loss": 3.5042101051640575, "tokens_seen": 1712586752 }, { "epoch": 0.04, "learning_rate": 0.00048583808071892806, "loss": 0.0713, "theoretical_loss": 3.5041872441186657, "tokens_seen": 1712717824 }, { "epoch": 0.04, "learning_rate": 0.0004857979619674236, "loss": 0.0695, "theoretical_loss": 3.5041643853125564, "tokens_seen": 1712848896 }, { "epoch": 0.04, "learning_rate": 0.00048575784321591914, "loss": 0.0707, "theoretical_loss": 3.5041415287453397, "tokens_seen": 1712979968 }, { "epoch": 0.04, "learning_rate": 0.0004857177244644147, "loss": 0.0705, "theoretical_loss": 3.504118674416623, "tokens_seen": 1713111040 }, { "epoch": 0.04, "learning_rate": 0.00048567760571291017, "loss": 0.0718, "theoretical_loss": 3.504095822326018, "tokens_seen": 1713242112 }, { "epoch": 0.04, "learning_rate": 0.00048563748696140574, "loss": 0.068, "theoretical_loss": 3.504072972473133, "tokens_seen": 1713373184 }, { "epoch": 0.04, "learning_rate": 0.0004855973682099013, "loss": 0.068, "theoretical_loss": 3.504050124857579, "tokens_seen": 1713504256 }, { "epoch": 0.04, "learning_rate": 0.0004855572494583969, "loss": 0.071, "theoretical_loss": 3.504027279478964, "tokens_seen": 1713635328 }, { "epoch": 0.04, "learning_rate": 0.0004855171307068924, "loss": 0.0634, "theoretical_loss": 3.504004436336899, "tokens_seen": 1713766400 }, { "epoch": 0.04, "learning_rate": 0.00048547701195538796, "loss": 0.069, "theoretical_loss": 3.5039815954309943, "tokens_seen": 1713897472 }, { "epoch": 0.04, "learning_rate": 0.00048543689320388353, "loss": 0.0732, "theoretical_loss": 3.503958756760859, "tokens_seen": 1714028544 }, { "epoch": 0.04, "learning_rate": 0.00048539677445237904, "loss": 0.0737, "theoretical_loss": 3.5039359203261045, "tokens_seen": 1714159616 }, { "epoch": 0.04, "learning_rate": 0.0004853566557008746, "loss": 0.0689, "theoretical_loss": 3.5039130861263406, "tokens_seen": 1714290688 }, { "epoch": 0.04, "learning_rate": 0.0004853165369493702, "loss": 0.0725, "theoretical_loss": 3.5038902541611776, "tokens_seen": 1714421760 }, { "epoch": 0.04, "learning_rate": 0.00048527641819786564, "loss": 0.0709, "theoretical_loss": 3.5038674244302257, "tokens_seen": 1714552832 }, { "epoch": 0.04, "learning_rate": 0.0004852362994463612, "loss": 0.0718, "theoretical_loss": 3.503844596933096, "tokens_seen": 1714683904 }, { "epoch": 0.04, "learning_rate": 0.0004851961806948568, "loss": 0.0685, "theoretical_loss": 3.503821771669399, "tokens_seen": 1714814976 }, { "epoch": 0.04, "learning_rate": 0.00048515606194335235, "loss": 0.0693, "theoretical_loss": 3.503798948638746, "tokens_seen": 1714946048 }, { "epoch": 0.04, "learning_rate": 0.00048511594319184786, "loss": 0.0718, "theoretical_loss": 3.5037761278407467, "tokens_seen": 1715077120 }, { "epoch": 0.04, "learning_rate": 0.00048507582444034343, "loss": 0.0726, "theoretical_loss": 3.503753309275013, "tokens_seen": 1715208192 }, { "epoch": 0.04, "learning_rate": 0.000485035705688839, "loss": 0.0741, "theoretical_loss": 3.503730492941156, "tokens_seen": 1715339264 }, { "epoch": 0.04, "objective/train/advantage_avg": 0.0008936775266192853, "objective/train/docs_used": 626732, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.339908242225647, "objective/train/original_loss": 1.3399081230163574, "objective/train/theoretical_loss": 3.5037076788387864, "objective/train/tokens_used": 85994976, "objective/train/value_avg": -0.007049560546875, "objective/train/value_loss": 0.00011757684842450544, "objective/train/value_max": -9.459257125854492e-05, "objective/train/value_min": -0.34912109375, "objective/train/value_reward_corr": 0.6724100077283933, "objective/train/value_std": 0.0113067626953125, "objective/train/weight_avg": 1.0009512901306152, "objective/train/weighted_lm_loss": 1.3418623208999634, "objective/train/weights_max": 1.4178208112716675, "objective/train/weights_min": 0.611880362033844, "theoretical_loss": 3.5037076788387864, "tokens_seen": 1715470336 }, { "epoch": 0.04, "learning_rate": 0.0004849955869373345, "loss": 0.0683, "theoretical_loss": 3.5037076788387864, "tokens_seen": 1715470336 }, { "epoch": 0.04, "learning_rate": 0.0004849554681858301, "loss": 0.0718, "theoretical_loss": 3.503684866967516, "tokens_seen": 1715601408 }, { "epoch": 0.04, "learning_rate": 0.00048491534943432565, "loss": 0.0713, "theoretical_loss": 3.503662057326956, "tokens_seen": 1715732480 }, { "epoch": 0.04, "learning_rate": 0.0004848752306828211, "loss": 0.071, "theoretical_loss": 3.5036392499167173, "tokens_seen": 1715863552 }, { "epoch": 0.04, "learning_rate": 0.0004848351119313167, "loss": 0.0726, "theoretical_loss": 3.503616444736412, "tokens_seen": 1715994624 }, { "epoch": 0.04, "learning_rate": 0.00048479499317981224, "loss": 0.0721, "theoretical_loss": 3.5035936417856526, "tokens_seen": 1716125696 }, { "epoch": 0.04, "learning_rate": 0.0004847548744283078, "loss": 0.0712, "theoretical_loss": 3.5035708410640494, "tokens_seen": 1716256768 }, { "epoch": 0.04, "learning_rate": 0.0004847147556768033, "loss": 0.0676, "theoretical_loss": 3.503548042571215, "tokens_seen": 1716387840 }, { "epoch": 0.04, "learning_rate": 0.0004846746369252989, "loss": 0.07, "theoretical_loss": 3.5035252463067614, "tokens_seen": 1716518912 }, { "epoch": 0.04, "learning_rate": 0.00048463451817379446, "loss": 0.0712, "theoretical_loss": 3.5035024522703004, "tokens_seen": 1716649984 }, { "epoch": 0.04, "learning_rate": 0.00048459439942229, "loss": 0.0677, "theoretical_loss": 3.503479660461444, "tokens_seen": 1716781056 }, { "epoch": 0.04, "learning_rate": 0.00048455428067078555, "loss": 0.0744, "theoretical_loss": 3.5034568708798046, "tokens_seen": 1716912128 }, { "epoch": 0.04, "learning_rate": 0.0004845141619192811, "loss": 0.071, "theoretical_loss": 3.5034340835249953, "tokens_seen": 1717043200 }, { "epoch": 0.04, "learning_rate": 0.00048447404316777663, "loss": 0.0676, "theoretical_loss": 3.5034112983966277, "tokens_seen": 1717174272 }, { "epoch": 0.04, "learning_rate": 0.00048443392441627214, "loss": 0.0703, "theoretical_loss": 3.503388515494315, "tokens_seen": 1717305344 }, { "epoch": 0.04, "learning_rate": 0.0004843938056647677, "loss": 0.0683, "theoretical_loss": 3.5033657348176686, "tokens_seen": 1717436416 }, { "epoch": 0.04, "learning_rate": 0.0004843536869132633, "loss": 0.0687, "theoretical_loss": 3.5033429563663026, "tokens_seen": 1717567488 }, { "epoch": 0.04, "learning_rate": 0.0004843135681617588, "loss": 0.0685, "theoretical_loss": 3.503320180139829, "tokens_seen": 1717698560 }, { "epoch": 0.04, "learning_rate": 0.00048427344941025436, "loss": 0.0689, "theoretical_loss": 3.503297406137861, "tokens_seen": 1717829632 }, { "epoch": 0.04, "learning_rate": 0.00048423333065874993, "loss": 0.0705, "theoretical_loss": 3.503274634360012, "tokens_seen": 1717960704 }, { "epoch": 0.04, "learning_rate": 0.00048419321190724544, "loss": 0.0733, "theoretical_loss": 3.5032518648058946, "tokens_seen": 1718091776 }, { "epoch": 0.04, "learning_rate": 0.000484153093155741, "loss": 0.0684, "theoretical_loss": 3.503229097475122, "tokens_seen": 1718222848 }, { "epoch": 0.04, "learning_rate": 0.0004841129744042366, "loss": 0.0728, "theoretical_loss": 3.503206332367308, "tokens_seen": 1718353920 }, { "epoch": 0.04, "learning_rate": 0.00048407285565273215, "loss": 0.0736, "theoretical_loss": 3.5031835694820654, "tokens_seen": 1718484992 }, { "epoch": 0.04, "learning_rate": 0.0004840327369012276, "loss": 0.0699, "theoretical_loss": 3.5031608088190085, "tokens_seen": 1718616064 }, { "epoch": 0.04, "objective/train/advantage_avg": 0.00047517355415038764, "objective/train/docs_used": 627826, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.534838318824768, "objective/train/original_loss": 1.5348381996154785, "objective/train/theoretical_loss": 3.5031380503777503, "objective/train/tokens_used": 89271776, "objective/train/value_avg": -0.00787353515625, "objective/train/value_loss": 0.00022689314209856093, "objective/train/value_max": -5.227327346801758e-05, "objective/train/value_min": -0.2294921875, "objective/train/value_reward_corr": 0.6138516400585834, "objective/train/value_std": 0.0113525390625, "objective/train/weight_avg": 1.000576376914978, "objective/train/weighted_lm_loss": 1.5364989042282104, "objective/train/weights_max": 1.0980169773101807, "objective/train/weights_min": 0.3689693808555603, "theoretical_loss": 3.5031380503777503, "tokens_seen": 1718747136 }, { "epoch": 0.04, "learning_rate": 0.0004839926181497232, "loss": 0.0702, "theoretical_loss": 3.5031380503777503, "tokens_seen": 1718747136 }, { "epoch": 0.04, "learning_rate": 0.00048395249939821875, "loss": 0.0712, "theoretical_loss": 3.5031152941579045, "tokens_seen": 1718878208 }, { "epoch": 0.04, "learning_rate": 0.00048391238064671426, "loss": 0.0705, "theoretical_loss": 3.503092540159085, "tokens_seen": 1719009280 }, { "epoch": 0.04, "learning_rate": 0.00048387226189520983, "loss": 0.0743, "theoretical_loss": 3.5030697883809063, "tokens_seen": 1719140352 }, { "epoch": 0.04, "learning_rate": 0.0004838321431437054, "loss": 0.0756, "theoretical_loss": 3.5030470388229817, "tokens_seen": 1719271424 }, { "epoch": 0.04, "learning_rate": 0.0004837920243922009, "loss": 0.0741, "theoretical_loss": 3.5030242914849254, "tokens_seen": 1719402496 }, { "epoch": 0.04, "learning_rate": 0.0004837519056406965, "loss": 0.0747, "theoretical_loss": 3.5030015463663515, "tokens_seen": 1719533568 }, { "epoch": 0.04, "learning_rate": 0.00048371178688919205, "loss": 0.07, "theoretical_loss": 3.502978803466875, "tokens_seen": 1719664640 }, { "epoch": 0.04, "learning_rate": 0.0004836716681376876, "loss": 0.0739, "theoretical_loss": 3.502956062786109, "tokens_seen": 1719795712 }, { "epoch": 0.04, "learning_rate": 0.0004836315493861831, "loss": 0.0723, "theoretical_loss": 3.502933324323669, "tokens_seen": 1719926784 }, { "epoch": 0.04, "learning_rate": 0.00048359143063467864, "loss": 0.0706, "theoretical_loss": 3.5029105880791693, "tokens_seen": 1720057856 }, { "epoch": 0.04, "learning_rate": 0.0004835513118831742, "loss": 0.0707, "theoretical_loss": 3.502887854052225, "tokens_seen": 1720188928 }, { "epoch": 0.04, "learning_rate": 0.00048351119313166973, "loss": 0.0703, "theoretical_loss": 3.50286512224245, "tokens_seen": 1720320000 }, { "epoch": 0.04, "learning_rate": 0.0004834710743801653, "loss": 0.0673, "theoretical_loss": 3.5028423926494594, "tokens_seen": 1720451072 }, { "epoch": 0.04, "learning_rate": 0.00048343095562866086, "loss": 0.0721, "theoretical_loss": 3.5028196652728685, "tokens_seen": 1720582144 }, { "epoch": 0.04, "learning_rate": 0.0004833908368771564, "loss": 0.0693, "theoretical_loss": 3.5027969401122925, "tokens_seen": 1720713216 }, { "epoch": 0.04, "learning_rate": 0.00048335071812565195, "loss": 0.0731, "theoretical_loss": 3.502774217167346, "tokens_seen": 1720844288 }, { "epoch": 0.04, "learning_rate": 0.0004833105993741475, "loss": 0.0725, "theoretical_loss": 3.5027514964376447, "tokens_seen": 1720975360 }, { "epoch": 0.04, "learning_rate": 0.0004832704806226431, "loss": 0.0693, "theoretical_loss": 3.5027287779228033, "tokens_seen": 1721106432 }, { "epoch": 0.04, "learning_rate": 0.00048323036187113854, "loss": 0.0693, "theoretical_loss": 3.502706061622438, "tokens_seen": 1721237504 }, { "epoch": 0.04, "learning_rate": 0.0004831902431196341, "loss": 0.0726, "theoretical_loss": 3.502683347536164, "tokens_seen": 1721368576 }, { "epoch": 0.04, "learning_rate": 0.0004831501243681297, "loss": 0.0721, "theoretical_loss": 3.502660635663597, "tokens_seen": 1721499648 }, { "epoch": 0.04, "learning_rate": 0.0004831100056166252, "loss": 0.0683, "theoretical_loss": 3.502637926004353, "tokens_seen": 1721630720 }, { "epoch": 0.04, "learning_rate": 0.00048306988686512076, "loss": 0.0727, "theoretical_loss": 3.5026152185580472, "tokens_seen": 1721761792 }, { "epoch": 0.04, "learning_rate": 0.00048302976811361633, "loss": 0.0728, "theoretical_loss": 3.502592513324296, "tokens_seen": 1721892864 }, { "epoch": 0.04, "objective/train/advantage_avg": 0.0004706753825303167, "objective/train/docs_used": 629016, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3710834980010986, "objective/train/original_loss": 1.371083378791809, "objective/train/theoretical_loss": 3.502569810302715, "objective/train/tokens_used": 92548576, "objective/train/value_avg": -0.0047760009765625, "objective/train/value_loss": 0.00011703209747793153, "objective/train/value_max": -2.9087066650390625e-05, "objective/train/value_min": -0.2166748046875, "objective/train/value_reward_corr": 0.570175691029673, "objective/train/value_std": 0.00800323486328125, "objective/train/weight_avg": 1.0005247592926025, "objective/train/weighted_lm_loss": 1.3709388971328735, "objective/train/weights_max": 1.0951390266418457, "objective/train/weights_min": 0.3684675693511963, "theoretical_loss": 3.502569810302715, "tokens_seen": 1722023936 }, { "epoch": 0.04, "learning_rate": 0.00048298964936211185, "loss": 0.0709, "theoretical_loss": 3.502569810302715, "tokens_seen": 1722023936 }, { "epoch": 0.04, "learning_rate": 0.0004829495306106074, "loss": 0.0704, "theoretical_loss": 3.502547109492921, "tokens_seen": 1722155008 }, { "epoch": 0.04, "learning_rate": 0.000482909411859103, "loss": 0.0727, "theoretical_loss": 3.5025244108945293, "tokens_seen": 1722286080 }, { "epoch": 0.04, "learning_rate": 0.00048286929310759855, "loss": 0.0702, "theoretical_loss": 3.502501714507157, "tokens_seen": 1722417152 }, { "epoch": 0.04, "learning_rate": 0.000482829174356094, "loss": 0.0677, "theoretical_loss": 3.50247902033042, "tokens_seen": 1722548224 }, { "epoch": 0.04, "learning_rate": 0.0004827890556045896, "loss": 0.0707, "theoretical_loss": 3.5024563283639356, "tokens_seen": 1722679296 }, { "epoch": 0.04, "learning_rate": 0.00048274893685308515, "loss": 0.0729, "theoretical_loss": 3.502433638607319, "tokens_seen": 1722810368 }, { "epoch": 0.04, "learning_rate": 0.00048270881810158066, "loss": 0.0702, "theoretical_loss": 3.502410951060188, "tokens_seen": 1722941440 }, { "epoch": 0.04, "learning_rate": 0.00048266869935007623, "loss": 0.0709, "theoretical_loss": 3.502388265722159, "tokens_seen": 1723072512 }, { "epoch": 0.04, "learning_rate": 0.0004826285805985718, "loss": 0.073, "theoretical_loss": 3.502365582592849, "tokens_seen": 1723203584 }, { "epoch": 0.04, "learning_rate": 0.0004825884618470673, "loss": 0.0703, "theoretical_loss": 3.5023429016718755, "tokens_seen": 1723334656 }, { "epoch": 0.04, "learning_rate": 0.0004825483430955629, "loss": 0.0704, "theoretical_loss": 3.502320222958854, "tokens_seen": 1723465728 }, { "epoch": 0.04, "learning_rate": 0.00048250822434405845, "loss": 0.0714, "theoretical_loss": 3.502297546453403, "tokens_seen": 1723596800 }, { "epoch": 0.04, "learning_rate": 0.000482468105592554, "loss": 0.0703, "theoretical_loss": 3.502274872155139, "tokens_seen": 1723727872 }, { "epoch": 0.04, "learning_rate": 0.0004824279868410495, "loss": 0.0733, "theoretical_loss": 3.5022522000636807, "tokens_seen": 1723858944 }, { "epoch": 0.04, "learning_rate": 0.00048238786808954505, "loss": 0.0701, "theoretical_loss": 3.502229530178644, "tokens_seen": 1723990016 }, { "epoch": 0.04, "learning_rate": 0.0004823477493380406, "loss": 0.0716, "theoretical_loss": 3.502206862499647, "tokens_seen": 1724121088 }, { "epoch": 0.05, "learning_rate": 0.00048230763058653613, "loss": 0.0708, "theoretical_loss": 3.502184197026308, "tokens_seen": 1724252160 }, { "epoch": 0.05, "learning_rate": 0.0004822675118350317, "loss": 0.0697, "theoretical_loss": 3.5021615337582435, "tokens_seen": 1724383232 }, { "epoch": 0.05, "learning_rate": 0.00048222739308352727, "loss": 0.0736, "theoretical_loss": 3.502138872695072, "tokens_seen": 1724514304 }, { "epoch": 0.05, "learning_rate": 0.00048218727433202283, "loss": 0.0708, "theoretical_loss": 3.5021162138364117, "tokens_seen": 1724645376 }, { "epoch": 0.05, "learning_rate": 0.00048214715558051835, "loss": 0.0674, "theoretical_loss": 3.5020935571818805, "tokens_seen": 1724776448 }, { "epoch": 0.05, "learning_rate": 0.0004821070368290139, "loss": 0.0731, "theoretical_loss": 3.502070902731096, "tokens_seen": 1724907520 }, { "epoch": 0.05, "learning_rate": 0.0004820669180775095, "loss": 0.0696, "theoretical_loss": 3.5020482504836767, "tokens_seen": 1725038592 }, { "epoch": 0.05, "learning_rate": 0.00048202679932600494, "loss": 0.0738, "theoretical_loss": 3.502025600439241, "tokens_seen": 1725169664 }, { "epoch": 0.05, "objective/train/advantage_avg": 0.0019234501523897052, "objective/train/docs_used": 630228, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4451926946640015, "objective/train/original_loss": 1.445192813873291, "objective/train/theoretical_loss": 3.5020029525974077, "objective/train/tokens_used": 95825376, "objective/train/value_avg": -0.006778717041015625, "objective/train/value_loss": 0.00015825527952983975, "objective/train/value_max": -7.545948028564453e-05, "objective/train/value_min": -0.260498046875, "objective/train/value_reward_corr": 0.6243890891302375, "objective/train/value_std": 0.01113128662109375, "objective/train/weight_avg": 1.0019947290420532, "objective/train/weighted_lm_loss": 1.447770118713379, "objective/train/weights_max": 1.1102129220962524, "objective/train/weights_min": 0.3684770464897156, "theoretical_loss": 3.5020029525974077, "tokens_seen": 1725300736 }, { "epoch": 0.05, "learning_rate": 0.0004819866805745005, "loss": 0.0729, "theoretical_loss": 3.5020029525974077, "tokens_seen": 1725300736 }, { "epoch": 0.05, "learning_rate": 0.0004819465618229961, "loss": 0.0682, "theoretical_loss": 3.5019803069577944, "tokens_seen": 1725431808 }, { "epoch": 0.05, "learning_rate": 0.0004819064430714916, "loss": 0.0717, "theoretical_loss": 3.5019576635200202, "tokens_seen": 1725562880 }, { "epoch": 0.05, "learning_rate": 0.00048186632431998716, "loss": 0.0722, "theoretical_loss": 3.5019350222837042, "tokens_seen": 1725693952 }, { "epoch": 0.05, "learning_rate": 0.00048182620556848273, "loss": 0.0707, "theoretical_loss": 3.5019123832484644, "tokens_seen": 1725825024 }, { "epoch": 0.05, "learning_rate": 0.0004817860868169783, "loss": 0.071, "theoretical_loss": 3.5018897464139203, "tokens_seen": 1725956096 }, { "epoch": 0.05, "learning_rate": 0.0004817459680654738, "loss": 0.0705, "theoretical_loss": 3.5018671117796902, "tokens_seen": 1726087168 }, { "epoch": 0.05, "learning_rate": 0.0004817058493139694, "loss": 0.0692, "theoretical_loss": 3.501844479345394, "tokens_seen": 1726218240 }, { "epoch": 0.05, "learning_rate": 0.00048166573056246495, "loss": 0.0725, "theoretical_loss": 3.50182184911065, "tokens_seen": 1726349312 }, { "epoch": 0.05, "learning_rate": 0.0004816256118109604, "loss": 0.0688, "theoretical_loss": 3.5017992210750783, "tokens_seen": 1726480384 }, { "epoch": 0.05, "learning_rate": 0.000481585493059456, "loss": 0.0749, "theoretical_loss": 3.5017765952382973, "tokens_seen": 1726611456 }, { "epoch": 0.05, "learning_rate": 0.00048154537430795155, "loss": 0.071, "theoretical_loss": 3.5017539715999275, "tokens_seen": 1726742528 }, { "epoch": 0.05, "learning_rate": 0.00048150525555644706, "loss": 0.0711, "theoretical_loss": 3.501731350159588, "tokens_seen": 1726873600 }, { "epoch": 0.05, "learning_rate": 0.00048146513680494263, "loss": 0.0736, "theoretical_loss": 3.5017087309168975, "tokens_seen": 1727004672 }, { "epoch": 0.05, "learning_rate": 0.0004814250180534382, "loss": 0.0724, "theoretical_loss": 3.501686113871478, "tokens_seen": 1727135744 }, { "epoch": 0.05, "learning_rate": 0.00048138489930193377, "loss": 0.0698, "theoretical_loss": 3.501663499022947, "tokens_seen": 1727266816 }, { "epoch": 0.05, "learning_rate": 0.0004813447805504293, "loss": 0.0739, "theoretical_loss": 3.501640886370925, "tokens_seen": 1727397888 }, { "epoch": 0.05, "learning_rate": 0.00048130466179892485, "loss": 0.0695, "theoretical_loss": 3.5016182759150336, "tokens_seen": 1727528960 }, { "epoch": 0.05, "learning_rate": 0.0004812645430474204, "loss": 0.0692, "theoretical_loss": 3.5015956676548905, "tokens_seen": 1727660032 }, { "epoch": 0.05, "learning_rate": 0.0004812244242959159, "loss": 0.0716, "theoretical_loss": 3.501573061590118, "tokens_seen": 1727791104 }, { "epoch": 0.05, "learning_rate": 0.00048118430554441145, "loss": 0.0702, "theoretical_loss": 3.5015504577203354, "tokens_seen": 1727922176 }, { "epoch": 0.05, "learning_rate": 0.000481144186792907, "loss": 0.0708, "theoretical_loss": 3.5015278560451626, "tokens_seen": 1728053248 }, { "epoch": 0.05, "learning_rate": 0.00048110406804140253, "loss": 0.0691, "theoretical_loss": 3.5015052565642213, "tokens_seen": 1728184320 }, { "epoch": 0.05, "learning_rate": 0.0004810639492898981, "loss": 0.0708, "theoretical_loss": 3.501482659277131, "tokens_seen": 1728315392 }, { "epoch": 0.05, "learning_rate": 0.00048102383053839367, "loss": 0.0737, "theoretical_loss": 3.501460064183513, "tokens_seen": 1728446464 }, { "epoch": 0.05, "objective/train/advantage_avg": 0.00011555381206562743, "objective/train/docs_used": 631392, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3991897106170654, "objective/train/original_loss": 1.3991899490356445, "objective/train/theoretical_loss": 3.501437471282988, "objective/train/tokens_used": 99102176, "objective/train/value_avg": -0.00890350341796875, "objective/train/value_loss": 0.00033496765536256135, "objective/train/value_max": -6.711483001708984e-05, "objective/train/value_min": -0.265869140625, "objective/train/value_reward_corr": 0.6990164158504403, "objective/train/value_std": 0.01617431640625, "objective/train/weight_avg": 1.0002731084823608, "objective/train/weighted_lm_loss": 1.3982025384902954, "objective/train/weights_max": 1.2529038190841675, "objective/train/weights_min": 0.36834144592285156, "theoretical_loss": 3.501437471282988, "tokens_seen": 1728577536 }, { "epoch": 0.05, "learning_rate": 0.00048098371178688923, "loss": 0.0702, "theoretical_loss": 3.501437471282988, "tokens_seen": 1728577536 }, { "epoch": 0.05, "learning_rate": 0.00048094359303538475, "loss": 0.0708, "theoretical_loss": 3.501414880575177, "tokens_seen": 1728708608 }, { "epoch": 0.05, "learning_rate": 0.0004809034742838803, "loss": 0.0701, "theoretical_loss": 3.5013922920597, "tokens_seen": 1728839680 }, { "epoch": 0.05, "learning_rate": 0.0004808633555323759, "loss": 0.0715, "theoretical_loss": 3.5013697057361792, "tokens_seen": 1728970752 }, { "epoch": 0.05, "learning_rate": 0.00048082323678087135, "loss": 0.0709, "theoretical_loss": 3.5013471216042356, "tokens_seen": 1729101824 }, { "epoch": 0.05, "learning_rate": 0.0004807831180293669, "loss": 0.0713, "theoretical_loss": 3.5013245396634898, "tokens_seen": 1729232896 }, { "epoch": 0.05, "learning_rate": 0.0004807429992778625, "loss": 0.0687, "theoretical_loss": 3.5013019599135635, "tokens_seen": 1729363968 }, { "epoch": 0.05, "learning_rate": 0.000480702880526358, "loss": 0.068, "theoretical_loss": 3.5012793823540784, "tokens_seen": 1729495040 }, { "epoch": 0.05, "learning_rate": 0.00048066276177485357, "loss": 0.0679, "theoretical_loss": 3.501256806984656, "tokens_seen": 1729626112 }, { "epoch": 0.05, "learning_rate": 0.00048062264302334913, "loss": 0.074, "theoretical_loss": 3.501234233804918, "tokens_seen": 1729757184 }, { "epoch": 0.05, "learning_rate": 0.0004805825242718447, "loss": 0.068, "theoretical_loss": 3.5012116628144847, "tokens_seen": 1729888256 }, { "epoch": 0.05, "learning_rate": 0.0004805424055203402, "loss": 0.0707, "theoretical_loss": 3.5011890940129797, "tokens_seen": 1730019328 }, { "epoch": 0.05, "learning_rate": 0.0004805022867688358, "loss": 0.0742, "theoretical_loss": 3.5011665274000245, "tokens_seen": 1730150400 }, { "epoch": 0.05, "learning_rate": 0.00048046216801733135, "loss": 0.0701, "theoretical_loss": 3.50114396297524, "tokens_seen": 1730281472 }, { "epoch": 0.05, "learning_rate": 0.0004804220492658268, "loss": 0.0709, "theoretical_loss": 3.50112140073825, "tokens_seen": 1730412544 }, { "epoch": 0.05, "learning_rate": 0.0004803819305143224, "loss": 0.0697, "theoretical_loss": 3.501098840688675, "tokens_seen": 1730543616 }, { "epoch": 0.05, "learning_rate": 0.00048034181176281795, "loss": 0.0707, "theoretical_loss": 3.501076282826139, "tokens_seen": 1730674688 }, { "epoch": 0.05, "learning_rate": 0.00048030169301131346, "loss": 0.071, "theoretical_loss": 3.501053727150263, "tokens_seen": 1730805760 }, { "epoch": 0.05, "learning_rate": 0.00048026157425980903, "loss": 0.0685, "theoretical_loss": 3.5010311736606696, "tokens_seen": 1730936832 }, { "epoch": 0.05, "learning_rate": 0.0004802214555083046, "loss": 0.0701, "theoretical_loss": 3.501008622356982, "tokens_seen": 1731067904 }, { "epoch": 0.05, "learning_rate": 0.00048018133675680017, "loss": 0.0694, "theoretical_loss": 3.5009860732388223, "tokens_seen": 1731198976 }, { "epoch": 0.05, "learning_rate": 0.0004801412180052957, "loss": 0.0674, "theoretical_loss": 3.5009635263058136, "tokens_seen": 1731330048 }, { "epoch": 0.05, "learning_rate": 0.00048010109925379125, "loss": 0.0711, "theoretical_loss": 3.5009409815575787, "tokens_seen": 1731461120 }, { "epoch": 0.05, "learning_rate": 0.0004800609805022868, "loss": 0.0677, "theoretical_loss": 3.5009184389937404, "tokens_seen": 1731592192 }, { "epoch": 0.05, "learning_rate": 0.0004800208617507823, "loss": 0.0699, "theoretical_loss": 3.500895898613922, "tokens_seen": 1731723264 }, { "epoch": 0.05, "objective/train/advantage_avg": 0.0008934079087339342, "objective/train/docs_used": 632543, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4513126611709595, "objective/train/original_loss": 1.45131254196167, "objective/train/theoretical_loss": 3.500873360417746, "objective/train/tokens_used": 102378976, "objective/train/value_avg": -0.00910186767578125, "objective/train/value_loss": 0.0002422734396532178, "objective/train/value_max": -6.109476089477539e-05, "objective/train/value_min": -0.68603515625, "objective/train/value_reward_corr": 0.7157852088972383, "objective/train/value_std": 0.0171661376953125, "objective/train/weight_avg": 1.0010119676589966, "objective/train/weighted_lm_loss": 1.4518685340881348, "objective/train/weights_max": 1.5390300750732422, "objective/train/weights_min": 0.5340648293495178, "theoretical_loss": 3.500873360417746, "tokens_seen": 1731854336 }, { "epoch": 0.05, "learning_rate": 0.00047998074299927785, "loss": 0.0698, "theoretical_loss": 3.500873360417746, "tokens_seen": 1731854336 }, { "epoch": 0.05, "learning_rate": 0.0004799406242477734, "loss": 0.0645, "theoretical_loss": 3.5008508244048366, "tokens_seen": 1731985408 }, { "epoch": 0.05, "learning_rate": 0.00047990050549626893, "loss": 0.0687, "theoretical_loss": 3.500828290574816, "tokens_seen": 1732116480 }, { "epoch": 0.05, "learning_rate": 0.0004798603867447645, "loss": 0.0719, "theoretical_loss": 3.5008057589273083, "tokens_seen": 1732247552 }, { "epoch": 0.05, "learning_rate": 0.00047982026799326007, "loss": 0.075, "theoretical_loss": 3.5007832294619368, "tokens_seen": 1732378624 }, { "epoch": 0.05, "learning_rate": 0.00047978014924175564, "loss": 0.0682, "theoretical_loss": 3.500760702178325, "tokens_seen": 1732509696 }, { "epoch": 0.05, "learning_rate": 0.00047974003049025115, "loss": 0.0685, "theoretical_loss": 3.5007381770760966, "tokens_seen": 1732640768 }, { "epoch": 0.05, "learning_rate": 0.0004796999117387467, "loss": 0.0724, "theoretical_loss": 3.5007156541548756, "tokens_seen": 1732771840 }, { "epoch": 0.05, "learning_rate": 0.0004796597929872423, "loss": 0.0701, "theoretical_loss": 3.500693133414286, "tokens_seen": 1732902912 }, { "epoch": 0.05, "learning_rate": 0.00047961967423573775, "loss": 0.0696, "theoretical_loss": 3.5006706148539513, "tokens_seen": 1733033984 }, { "epoch": 0.05, "learning_rate": 0.0004795795554842333, "loss": 0.0672, "theoretical_loss": 3.500648098473495, "tokens_seen": 1733165056 }, { "epoch": 0.05, "learning_rate": 0.0004795394367327289, "loss": 0.0705, "theoretical_loss": 3.500625584272543, "tokens_seen": 1733296128 }, { "epoch": 0.05, "learning_rate": 0.00047949931798122445, "loss": 0.0677, "theoretical_loss": 3.500603072250718, "tokens_seen": 1733427200 }, { "epoch": 0.05, "learning_rate": 0.00047945919922971997, "loss": 0.0693, "theoretical_loss": 3.5005805624076456, "tokens_seen": 1733558272 }, { "epoch": 0.05, "learning_rate": 0.00047941908047821553, "loss": 0.0697, "theoretical_loss": 3.500558054742949, "tokens_seen": 1733689344 }, { "epoch": 0.05, "learning_rate": 0.0004793789617267111, "loss": 0.0704, "theoretical_loss": 3.500535549256253, "tokens_seen": 1733820416 }, { "epoch": 0.05, "learning_rate": 0.0004793388429752066, "loss": 0.0734, "theoretical_loss": 3.5005130459471827, "tokens_seen": 1733951488 }, { "epoch": 0.05, "learning_rate": 0.0004792987242237022, "loss": 0.0689, "theoretical_loss": 3.5004905448153627, "tokens_seen": 1734082560 }, { "epoch": 0.05, "learning_rate": 0.00047925860547219775, "loss": 0.0736, "theoretical_loss": 3.5004680458604174, "tokens_seen": 1734213632 }, { "epoch": 0.05, "learning_rate": 0.0004792184867206932, "loss": 0.0686, "theoretical_loss": 3.500445549081972, "tokens_seen": 1734344704 }, { "epoch": 0.05, "learning_rate": 0.0004791783679691888, "loss": 0.0679, "theoretical_loss": 3.5004230544796515, "tokens_seen": 1734475776 }, { "epoch": 0.05, "learning_rate": 0.00047913824921768435, "loss": 0.0674, "theoretical_loss": 3.500400562053081, "tokens_seen": 1734606848 }, { "epoch": 0.05, "learning_rate": 0.0004790981304661799, "loss": 0.0695, "theoretical_loss": 3.5003780718018858, "tokens_seen": 1734737920 }, { "epoch": 0.05, "learning_rate": 0.00047905801171467543, "loss": 0.0736, "theoretical_loss": 3.50035558372569, "tokens_seen": 1734868992 }, { "epoch": 0.05, "learning_rate": 0.000479017892963171, "loss": 0.0689, "theoretical_loss": 3.5003330978241207, "tokens_seen": 1735000064 }, { "epoch": 0.05, "objective/train/advantage_avg": -0.0011630074586719275, "objective/train/docs_used": 633815, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3975988626480103, "objective/train/original_loss": 1.3975989818572998, "objective/train/theoretical_loss": 3.5003106140968026, "objective/train/tokens_used": 105655776, "objective/train/value_avg": -0.0081329345703125, "objective/train/value_loss": 0.0005544610321521759, "objective/train/value_max": -4.684925079345703e-05, "objective/train/value_min": -0.7060546875, "objective/train/value_reward_corr": 0.6977885695205902, "objective/train/value_std": 0.021728515625, "objective/train/weight_avg": 0.9990800619125366, "objective/train/weighted_lm_loss": 1.3964039087295532, "objective/train/weights_max": 1.7559118270874023, "objective/train/weights_min": 0.3721534311771393, "theoretical_loss": 3.5003106140968026, "tokens_seen": 1735131136 }, { "epoch": 0.05, "learning_rate": 0.00047897777421166657, "loss": 0.0702, "theoretical_loss": 3.5003106140968026, "tokens_seen": 1735131136 }, { "epoch": 0.05, "learning_rate": 0.0004789376554601621, "loss": 0.0718, "theoretical_loss": 3.5002881325433606, "tokens_seen": 1735262208 }, { "epoch": 0.05, "learning_rate": 0.00047889753670865765, "loss": 0.072, "theoretical_loss": 3.5002656531634213, "tokens_seen": 1735393280 }, { "epoch": 0.05, "learning_rate": 0.0004788574179571532, "loss": 0.0682, "theoretical_loss": 3.50024317595661, "tokens_seen": 1735524352 }, { "epoch": 0.05, "learning_rate": 0.0004788172992056487, "loss": 0.0688, "theoretical_loss": 3.5002207009225526, "tokens_seen": 1735655424 }, { "epoch": 0.05, "learning_rate": 0.00047877718045414425, "loss": 0.0681, "theoretical_loss": 3.500198228060875, "tokens_seen": 1735786496 }, { "epoch": 0.05, "learning_rate": 0.0004787370617026398, "loss": 0.0683, "theoretical_loss": 3.5001757573712036, "tokens_seen": 1735917568 }, { "epoch": 0.05, "learning_rate": 0.0004786969429511354, "loss": 0.0712, "theoretical_loss": 3.5001532888531637, "tokens_seen": 1736048640 }, { "epoch": 0.05, "learning_rate": 0.0004786568241996309, "loss": 0.0766, "theoretical_loss": 3.5001308225063816, "tokens_seen": 1736179712 }, { "epoch": 0.05, "learning_rate": 0.00047861670544812647, "loss": 0.0723, "theoretical_loss": 3.5001083583304844, "tokens_seen": 1736310784 }, { "epoch": 0.05, "learning_rate": 0.00047857658669662204, "loss": 0.0719, "theoretical_loss": 3.500085896325098, "tokens_seen": 1736441856 }, { "epoch": 0.05, "learning_rate": 0.00047853646794511755, "loss": 0.0714, "theoretical_loss": 3.500063436489848, "tokens_seen": 1736572928 }, { "epoch": 0.05, "learning_rate": 0.0004784963491936131, "loss": 0.0712, "theoretical_loss": 3.5000409788243623, "tokens_seen": 1736704000 }, { "epoch": 0.05, "learning_rate": 0.0004784562304421087, "loss": 0.0685, "theoretical_loss": 3.5000185233282677, "tokens_seen": 1736835072 }, { "epoch": 0.05, "learning_rate": 0.00047841611169060415, "loss": 0.0691, "theoretical_loss": 3.4999960700011896, "tokens_seen": 1736966144 }, { "epoch": 0.05, "learning_rate": 0.0004783759929390997, "loss": 0.0686, "theoretical_loss": 3.4999736188427555, "tokens_seen": 1737097216 }, { "epoch": 0.05, "learning_rate": 0.0004783358741875953, "loss": 0.0723, "theoretical_loss": 3.4999511698525927, "tokens_seen": 1737228288 }, { "epoch": 0.05, "learning_rate": 0.00047829575543609085, "loss": 0.0726, "theoretical_loss": 3.4999287230303278, "tokens_seen": 1737359360 }, { "epoch": 0.05, "learning_rate": 0.00047825563668458637, "loss": 0.0722, "theoretical_loss": 3.4999062783755877, "tokens_seen": 1737490432 }, { "epoch": 0.05, "learning_rate": 0.00047821551793308194, "loss": 0.0716, "theoretical_loss": 3.4998838358880002, "tokens_seen": 1737621504 }, { "epoch": 0.05, "learning_rate": 0.0004781753991815775, "loss": 0.0722, "theoretical_loss": 3.4998613955671924, "tokens_seen": 1737752576 }, { "epoch": 0.05, "learning_rate": 0.000478135280430073, "loss": 0.0707, "theoretical_loss": 3.4998389574127913, "tokens_seen": 1737883648 }, { "epoch": 0.05, "learning_rate": 0.0004780951616785686, "loss": 0.0678, "theoretical_loss": 3.4998165214244246, "tokens_seen": 1738014720 }, { "epoch": 0.05, "learning_rate": 0.00047805504292706415, "loss": 0.0681, "theoretical_loss": 3.4997940876017206, "tokens_seen": 1738145792 }, { "epoch": 0.05, "learning_rate": 0.0004780149241755596, "loss": 0.0736, "theoretical_loss": 3.499771655944306, "tokens_seen": 1738276864 }, { "epoch": 0.05, "objective/train/advantage_avg": 0.0012583236675709486, "objective/train/docs_used": 635089, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4876868724822998, "objective/train/original_loss": 1.4876868724822998, "objective/train/theoretical_loss": 3.499749226451809, "objective/train/tokens_used": 108932576, "objective/train/value_avg": -0.00827789306640625, "objective/train/value_loss": 0.00021881554857827723, "objective/train/value_max": -5.7816505432128906e-05, "objective/train/value_min": -0.689453125, "objective/train/value_reward_corr": 0.7919522327231165, "objective/train/value_std": 0.01947021484375, "objective/train/weight_avg": 1.0013540983200073, "objective/train/weighted_lm_loss": 1.4893608093261719, "objective/train/weights_max": 1.2816765308380127, "objective/train/weights_min": 0.3914025127887726, "theoretical_loss": 3.499749226451809, "tokens_seen": 1738407936 }, { "epoch": 0.05, "learning_rate": 0.0004779748054240552, "loss": 0.0703, "theoretical_loss": 3.499749226451809, "tokens_seen": 1738407936 }, { "epoch": 0.05, "learning_rate": 0.00047793468667255075, "loss": 0.0719, "theoretical_loss": 3.4997267991238568, "tokens_seen": 1738539008 }, { "epoch": 0.05, "learning_rate": 0.0004778945679210463, "loss": 0.0683, "theoretical_loss": 3.4997043739600784, "tokens_seen": 1738670080 }, { "epoch": 0.05, "learning_rate": 0.00047785444916954183, "loss": 0.067, "theoretical_loss": 3.4996819509601016, "tokens_seen": 1738801152 }, { "epoch": 0.05, "learning_rate": 0.0004778143304180374, "loss": 0.069, "theoretical_loss": 3.4996595301235542, "tokens_seen": 1738932224 }, { "epoch": 0.05, "learning_rate": 0.00047777421166653297, "loss": 0.0688, "theoretical_loss": 3.499637111450064, "tokens_seen": 1739063296 }, { "epoch": 0.05, "learning_rate": 0.0004777340929150285, "loss": 0.0723, "theoretical_loss": 3.49961469493926, "tokens_seen": 1739194368 }, { "epoch": 0.05, "learning_rate": 0.00047769397416352405, "loss": 0.0688, "theoretical_loss": 3.499592280590771, "tokens_seen": 1739325440 }, { "epoch": 0.05, "learning_rate": 0.0004776538554120196, "loss": 0.0739, "theoretical_loss": 3.4995698684042242, "tokens_seen": 1739456512 }, { "epoch": 0.05, "learning_rate": 0.0004776137366605151, "loss": 0.0708, "theoretical_loss": 3.499547458379249, "tokens_seen": 1739587584 }, { "epoch": 0.05, "learning_rate": 0.00047757361790901065, "loss": 0.0699, "theoretical_loss": 3.499525050515474, "tokens_seen": 1739718656 }, { "epoch": 0.05, "learning_rate": 0.0004775334991575062, "loss": 0.0689, "theoretical_loss": 3.4995026448125284, "tokens_seen": 1739849728 }, { "epoch": 0.05, "learning_rate": 0.0004774933804060018, "loss": 0.07, "theoretical_loss": 3.49948024127004, "tokens_seen": 1739980800 }, { "epoch": 0.05, "learning_rate": 0.0004774532616544973, "loss": 0.0709, "theoretical_loss": 3.499457839887639, "tokens_seen": 1740111872 }, { "epoch": 0.05, "learning_rate": 0.00047741314290299287, "loss": 0.0716, "theoretical_loss": 3.4994354406649535, "tokens_seen": 1740242944 }, { "epoch": 0.05, "learning_rate": 0.00047737302415148844, "loss": 0.0681, "theoretical_loss": 3.4994130436016126, "tokens_seen": 1740374016 }, { "epoch": 0.05, "learning_rate": 0.00047733290539998395, "loss": 0.0666, "theoretical_loss": 3.499390648697246, "tokens_seen": 1740505088 }, { "epoch": 0.05, "learning_rate": 0.0004772927866484795, "loss": 0.0705, "theoretical_loss": 3.4993682559514836, "tokens_seen": 1740636160 }, { "epoch": 0.06, "learning_rate": 0.0004772526678969751, "loss": 0.0717, "theoretical_loss": 3.4993458653639538, "tokens_seen": 1740767232 }, { "epoch": 0.06, "learning_rate": 0.00047721254914547055, "loss": 0.0731, "theoretical_loss": 3.4993234769342862, "tokens_seen": 1740898304 }, { "epoch": 0.06, "learning_rate": 0.0004771724303939661, "loss": 0.0683, "theoretical_loss": 3.4993010906621107, "tokens_seen": 1741029376 }, { "epoch": 0.06, "learning_rate": 0.0004771323116424617, "loss": 0.0691, "theoretical_loss": 3.4992787065470567, "tokens_seen": 1741160448 }, { "epoch": 0.06, "learning_rate": 0.00047709219289095725, "loss": 0.0724, "theoretical_loss": 3.499256324588755, "tokens_seen": 1741291520 }, { "epoch": 0.06, "learning_rate": 0.00047705207413945277, "loss": 0.073, "theoretical_loss": 3.499233944786834, "tokens_seen": 1741422592 }, { "epoch": 0.06, "learning_rate": 0.00047701195538794834, "loss": 0.0677, "theoretical_loss": 3.4992115671409243, "tokens_seen": 1741553664 }, { "epoch": 0.06, "objective/train/advantage_avg": -3.8376856537070125e-05, "objective/train/docs_used": 636395, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.379647970199585, "objective/train/original_loss": 1.379648208618164, "objective/train/theoretical_loss": 3.499189191650656, "objective/train/tokens_used": 112209376, "objective/train/value_avg": -0.0087890625, "objective/train/value_loss": 0.000187960104085505, "objective/train/value_max": -8.481740951538086e-05, "objective/train/value_min": -0.275634765625, "objective/train/value_reward_corr": 0.7500862764656482, "objective/train/value_std": 0.01401519775390625, "objective/train/weight_avg": 1.0000503063201904, "objective/train/weighted_lm_loss": 1.379643440246582, "objective/train/weights_max": 1.1137093305587769, "objective/train/weights_min": 0.36823785305023193, "theoretical_loss": 3.499189191650656, "tokens_seen": 1741684736 }, { "epoch": 0.06, "learning_rate": 0.0004769718366364439, "loss": 0.0702, "theoretical_loss": 3.499189191650656, "tokens_seen": 1741684736 }, { "epoch": 0.06, "learning_rate": 0.0004769317178849394, "loss": 0.0679, "theoretical_loss": 3.4991668183156595, "tokens_seen": 1741815808 }, { "epoch": 0.06, "learning_rate": 0.000476891599133435, "loss": 0.0703, "theoretical_loss": 3.499144447135565, "tokens_seen": 1741946880 }, { "epoch": 0.06, "learning_rate": 0.00047685148038193056, "loss": 0.0728, "theoretical_loss": 3.499122078110002, "tokens_seen": 1742077952 }, { "epoch": 0.06, "learning_rate": 0.00047681136163042607, "loss": 0.0691, "theoretical_loss": 3.499099711238602, "tokens_seen": 1742209024 }, { "epoch": 0.06, "learning_rate": 0.0004767712428789216, "loss": 0.0675, "theoretical_loss": 3.499077346520995, "tokens_seen": 1742340096 }, { "epoch": 0.06, "learning_rate": 0.00047673112412741715, "loss": 0.0722, "theoretical_loss": 3.4990549839568112, "tokens_seen": 1742471168 }, { "epoch": 0.06, "learning_rate": 0.0004766910053759127, "loss": 0.0691, "theoretical_loss": 3.4990326235456823, "tokens_seen": 1742602240 }, { "epoch": 0.06, "learning_rate": 0.00047665088662440824, "loss": 0.0724, "theoretical_loss": 3.4990102652872377, "tokens_seen": 1742733312 }, { "epoch": 0.06, "learning_rate": 0.0004766107678729038, "loss": 0.0682, "theoretical_loss": 3.4989879091811096, "tokens_seen": 1742864384 }, { "epoch": 0.06, "learning_rate": 0.00047657064912139937, "loss": 0.0721, "theoretical_loss": 3.4989655552269285, "tokens_seen": 1742995456 }, { "epoch": 0.06, "learning_rate": 0.0004765305303698949, "loss": 0.068, "theoretical_loss": 3.498943203424326, "tokens_seen": 1743126528 }, { "epoch": 0.06, "learning_rate": 0.00047649041161839045, "loss": 0.0716, "theoretical_loss": 3.4989208537729315, "tokens_seen": 1743257600 }, { "epoch": 0.06, "learning_rate": 0.000476450292866886, "loss": 0.0675, "theoretical_loss": 3.4988985062723783, "tokens_seen": 1743388672 }, { "epoch": 0.06, "learning_rate": 0.00047641017411538154, "loss": 0.0704, "theoretical_loss": 3.498876160922296, "tokens_seen": 1743519744 }, { "epoch": 0.06, "learning_rate": 0.00047637005536387705, "loss": 0.0689, "theoretical_loss": 3.498853817722318, "tokens_seen": 1743650816 }, { "epoch": 0.06, "learning_rate": 0.0004763299366123726, "loss": 0.0716, "theoretical_loss": 3.4988314766720734, "tokens_seen": 1743781888 }, { "epoch": 0.06, "learning_rate": 0.0004762898178608682, "loss": 0.0738, "theoretical_loss": 3.498809137771196, "tokens_seen": 1743912960 }, { "epoch": 0.06, "learning_rate": 0.0004762496991093637, "loss": 0.0731, "theoretical_loss": 3.4987868010193157, "tokens_seen": 1744044032 }, { "epoch": 0.06, "learning_rate": 0.00047620958035785927, "loss": 0.0691, "theoretical_loss": 3.4987644664160658, "tokens_seen": 1744175104 }, { "epoch": 0.06, "learning_rate": 0.00047616946160635484, "loss": 0.0731, "theoretical_loss": 3.4987421339610774, "tokens_seen": 1744306176 }, { "epoch": 0.06, "learning_rate": 0.00047612934285485035, "loss": 0.0758, "theoretical_loss": 3.4987198036539824, "tokens_seen": 1744437248 }, { "epoch": 0.06, "learning_rate": 0.0004760892241033459, "loss": 0.0686, "theoretical_loss": 3.498697475494413, "tokens_seen": 1744568320 }, { "epoch": 0.06, "learning_rate": 0.0004760491053518415, "loss": 0.071, "theoretical_loss": 3.4986751494820014, "tokens_seen": 1744699392 }, { "epoch": 0.06, "learning_rate": 0.00047600898660033706, "loss": 0.0708, "theoretical_loss": 3.49865282561638, "tokens_seen": 1744830464 }, { "epoch": 0.06, "objective/train/advantage_avg": -0.0007827615481801331, "objective/train/docs_used": 637583, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4051017761230469, "objective/train/original_loss": 1.4051015377044678, "objective/train/theoretical_loss": 3.498630503897181, "objective/train/tokens_used": 115486176, "objective/train/value_avg": -0.00916290283203125, "objective/train/value_loss": 0.00040033666300587356, "objective/train/value_max": -3.045797348022461e-05, "objective/train/value_min": -0.95068359375, "objective/train/value_reward_corr": 0.8745166152705092, "objective/train/value_std": 0.0311431884765625, "objective/train/weight_avg": 0.9993942975997925, "objective/train/weighted_lm_loss": 1.4037233591079712, "objective/train/weights_max": 1.336806058883667, "objective/train/weights_min": 0.36893558502197266, "theoretical_loss": 3.498630503897181, "tokens_seen": 1744961536 }, { "epoch": 0.06, "learning_rate": 0.0004759688678488325, "loss": 0.0682, "theoretical_loss": 3.498630503897181, "tokens_seen": 1744961536 }, { "epoch": 0.06, "learning_rate": 0.0004759287490973281, "loss": 0.0673, "theoretical_loss": 3.498608184324037, "tokens_seen": 1745092608 }, { "epoch": 0.06, "learning_rate": 0.00047588863034582366, "loss": 0.0689, "theoretical_loss": 3.4985858668965797, "tokens_seen": 1745223680 }, { "epoch": 0.06, "learning_rate": 0.00047584851159431917, "loss": 0.072, "theoretical_loss": 3.498563551614443, "tokens_seen": 1745354752 }, { "epoch": 0.06, "learning_rate": 0.00047580839284281474, "loss": 0.0693, "theoretical_loss": 3.498541238477258, "tokens_seen": 1745485824 }, { "epoch": 0.06, "learning_rate": 0.0004757682740913103, "loss": 0.0676, "theoretical_loss": 3.4985189274846586, "tokens_seen": 1745616896 }, { "epoch": 0.06, "learning_rate": 0.0004757281553398058, "loss": 0.0711, "theoretical_loss": 3.4984966186362776, "tokens_seen": 1745747968 }, { "epoch": 0.06, "learning_rate": 0.0004756880365883014, "loss": 0.0723, "theoretical_loss": 3.498474311931748, "tokens_seen": 1745879040 }, { "epoch": 0.06, "learning_rate": 0.00047564791783679696, "loss": 0.0684, "theoretical_loss": 3.498452007370702, "tokens_seen": 1746010112 }, { "epoch": 0.06, "learning_rate": 0.0004756077990852925, "loss": 0.0711, "theoretical_loss": 3.4984297049527737, "tokens_seen": 1746141184 }, { "epoch": 0.06, "learning_rate": 0.000475567680333788, "loss": 0.068, "theoretical_loss": 3.498407404677596, "tokens_seen": 1746272256 }, { "epoch": 0.06, "learning_rate": 0.00047552756158228355, "loss": 0.0704, "theoretical_loss": 3.498385106544802, "tokens_seen": 1746403328 }, { "epoch": 0.06, "learning_rate": 0.0004754874428307791, "loss": 0.0689, "theoretical_loss": 3.498362810554026, "tokens_seen": 1746534400 }, { "epoch": 0.06, "learning_rate": 0.00047544732407927464, "loss": 0.0717, "theoretical_loss": 3.4983405167049004, "tokens_seen": 1746665472 }, { "epoch": 0.06, "learning_rate": 0.0004754072053277702, "loss": 0.0692, "theoretical_loss": 3.4983182249970595, "tokens_seen": 1746796544 }, { "epoch": 0.06, "learning_rate": 0.0004753670865762658, "loss": 0.0677, "theoretical_loss": 3.4982959354301366, "tokens_seen": 1746927616 }, { "epoch": 0.06, "learning_rate": 0.0004753269678247613, "loss": 0.0718, "theoretical_loss": 3.4982736480037655, "tokens_seen": 1747058688 }, { "epoch": 0.06, "learning_rate": 0.00047528684907325686, "loss": 0.0703, "theoretical_loss": 3.49825136271758, "tokens_seen": 1747189760 }, { "epoch": 0.06, "learning_rate": 0.0004752467303217524, "loss": 0.0664, "theoretical_loss": 3.4982290795712148, "tokens_seen": 1747320832 }, { "epoch": 0.06, "learning_rate": 0.000475206611570248, "loss": 0.0731, "theoretical_loss": 3.498206798564303, "tokens_seen": 1747451904 }, { "epoch": 0.06, "learning_rate": 0.00047516649281874345, "loss": 0.0724, "theoretical_loss": 3.498184519696479, "tokens_seen": 1747582976 }, { "epoch": 0.06, "learning_rate": 0.000475126374067239, "loss": 0.0724, "theoretical_loss": 3.498162242967377, "tokens_seen": 1747714048 }, { "epoch": 0.06, "learning_rate": 0.0004750862553157346, "loss": 0.0776, "theoretical_loss": 3.498139968376632, "tokens_seen": 1747845120 }, { "epoch": 0.06, "learning_rate": 0.0004750461365642301, "loss": 0.0697, "theoretical_loss": 3.4981176959238773, "tokens_seen": 1747976192 }, { "epoch": 0.06, "learning_rate": 0.00047500601781272567, "loss": 0.0714, "theoretical_loss": 3.4980954256087484, "tokens_seen": 1748107264 }, { "epoch": 0.06, "objective/train/advantage_avg": 0.000656441377941519, "objective/train/docs_used": 638722, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4482041597366333, "objective/train/original_loss": 1.4482040405273438, "objective/train/theoretical_loss": 3.4980731574308788, "objective/train/tokens_used": 118762976, "objective/train/value_avg": -0.006618499755859375, "objective/train/value_loss": 0.0002858877123799175, "objective/train/value_max": -9.459257125854492e-05, "objective/train/value_min": -0.56591796875, "objective/train/value_reward_corr": 0.49852863551481597, "objective/train/value_std": 0.01131439208984375, "objective/train/weight_avg": 1.000779390335083, "objective/train/weighted_lm_loss": 1.4497531652450562, "objective/train/weights_max": 1.1532150506973267, "objective/train/weights_min": 0.3701288402080536, "theoretical_loss": 3.4980731574308788, "tokens_seen": 1748238336 }, { "epoch": 0.06, "learning_rate": 0.00047496589906122124, "loss": 0.0718, "theoretical_loss": 3.4980731574308788, "tokens_seen": 1748238336 }, { "epoch": 0.06, "learning_rate": 0.0004749257803097168, "loss": 0.0736, "theoretical_loss": 3.498050891389904, "tokens_seen": 1748369408 }, { "epoch": 0.06, "learning_rate": 0.0004748856615582123, "loss": 0.0688, "theoretical_loss": 3.498028627485459, "tokens_seen": 1748500480 }, { "epoch": 0.06, "learning_rate": 0.0004748455428067079, "loss": 0.069, "theoretical_loss": 3.4980063657171776, "tokens_seen": 1748631552 }, { "epoch": 0.06, "learning_rate": 0.00047480542405520346, "loss": 0.072, "theoretical_loss": 3.497984106084696, "tokens_seen": 1748762624 }, { "epoch": 0.06, "learning_rate": 0.0004747653053036989, "loss": 0.0794, "theoretical_loss": 3.4979618485876482, "tokens_seen": 1748893696 }, { "epoch": 0.06, "learning_rate": 0.0004747251865521945, "loss": 0.0725, "theoretical_loss": 3.4979395932256696, "tokens_seen": 1749024768 }, { "epoch": 0.06, "learning_rate": 0.00047468506780069006, "loss": 0.0734, "theoretical_loss": 3.497917339998396, "tokens_seen": 1749155840 }, { "epoch": 0.06, "learning_rate": 0.00047464494904918557, "loss": 0.0763, "theoretical_loss": 3.4978950889054614, "tokens_seen": 1749286912 }, { "epoch": 0.06, "learning_rate": 0.00047460483029768114, "loss": 0.0708, "theoretical_loss": 3.4978728399465027, "tokens_seen": 1749417984 }, { "epoch": 0.06, "learning_rate": 0.0004745647115461767, "loss": 0.0688, "theoretical_loss": 3.4978505931211545, "tokens_seen": 1749549056 }, { "epoch": 0.06, "learning_rate": 0.0004745245927946723, "loss": 0.0705, "theoretical_loss": 3.497828348429053, "tokens_seen": 1749680128 }, { "epoch": 0.06, "learning_rate": 0.0004744844740431678, "loss": 0.07, "theoretical_loss": 3.4978061058698326, "tokens_seen": 1749811200 }, { "epoch": 0.06, "learning_rate": 0.00047444435529166336, "loss": 0.0693, "theoretical_loss": 3.49778386544313, "tokens_seen": 1749942272 }, { "epoch": 0.06, "learning_rate": 0.0004744042365401589, "loss": 0.0684, "theoretical_loss": 3.4977616271485816, "tokens_seen": 1750073344 }, { "epoch": 0.06, "learning_rate": 0.0004743641177886544, "loss": 0.0718, "theoretical_loss": 3.497739390985822, "tokens_seen": 1750204416 }, { "epoch": 0.06, "learning_rate": 0.00047432399903714996, "loss": 0.0726, "theoretical_loss": 3.4977171569544883, "tokens_seen": 1750335488 }, { "epoch": 0.06, "learning_rate": 0.0004742838802856455, "loss": 0.0666, "theoretical_loss": 3.497694925054216, "tokens_seen": 1750466560 }, { "epoch": 0.06, "learning_rate": 0.00047424376153414104, "loss": 0.0706, "theoretical_loss": 3.497672695284641, "tokens_seen": 1750597632 }, { "epoch": 0.06, "learning_rate": 0.0004742036427826366, "loss": 0.0727, "theoretical_loss": 3.4976504676454008, "tokens_seen": 1750728704 }, { "epoch": 0.06, "learning_rate": 0.0004741635240311322, "loss": 0.0739, "theoretical_loss": 3.497628242136131, "tokens_seen": 1750859776 }, { "epoch": 0.06, "learning_rate": 0.00047412340527962774, "loss": 0.0673, "theoretical_loss": 3.4976060187564677, "tokens_seen": 1750990848 }, { "epoch": 0.06, "learning_rate": 0.00047408328652812326, "loss": 0.0718, "theoretical_loss": 3.497583797506048, "tokens_seen": 1751121920 }, { "epoch": 0.06, "learning_rate": 0.0004740431677766188, "loss": 0.0699, "theoretical_loss": 3.4975615783845084, "tokens_seen": 1751252992 }, { "epoch": 0.06, "learning_rate": 0.0004740030490251144, "loss": 0.0722, "theoretical_loss": 3.4975393613914854, "tokens_seen": 1751384064 }, { "epoch": 0.06, "objective/train/advantage_avg": -0.0002404685947112739, "objective/train/docs_used": 639884, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2024574279785156, "objective/train/original_loss": 1.2024574279785156, "objective/train/theoretical_loss": 3.497517146526617, "objective/train/tokens_used": 122039776, "objective/train/value_avg": -0.00557708740234375, "objective/train/value_loss": 0.0003040818846784532, "objective/train/value_max": -4.6133995056152344e-05, "objective/train/value_min": -0.91796875, "objective/train/value_reward_corr": 0.7155284529413237, "objective/train/value_std": 0.0166168212890625, "objective/train/weight_avg": 0.9998968839645386, "objective/train/weighted_lm_loss": 1.203224778175354, "objective/train/weights_max": 1.585618257522583, "objective/train/weights_min": 0.3976996839046478, "theoretical_loss": 3.497517146526617, "tokens_seen": 1751515136 }, { "epoch": 0.06, "learning_rate": 0.00047396293027360985, "loss": 0.0681, "theoretical_loss": 3.497517146526617, "tokens_seen": 1751515136 }, { "epoch": 0.06, "learning_rate": 0.0004739228115221054, "loss": 0.0691, "theoretical_loss": 3.497494933789538, "tokens_seen": 1751646208 }, { "epoch": 0.06, "learning_rate": 0.000473882692770601, "loss": 0.0735, "theoretical_loss": 3.4974727231798868, "tokens_seen": 1751777280 }, { "epoch": 0.06, "learning_rate": 0.0004738425740190965, "loss": 0.0703, "theoretical_loss": 3.4974505146973005, "tokens_seen": 1751908352 }, { "epoch": 0.06, "learning_rate": 0.0004738024552675921, "loss": 0.0662, "theoretical_loss": 3.497428308341416, "tokens_seen": 1752039424 }, { "epoch": 0.06, "learning_rate": 0.00047376233651608764, "loss": 0.0721, "theoretical_loss": 3.497406104111871, "tokens_seen": 1752170496 }, { "epoch": 0.06, "learning_rate": 0.0004737222177645832, "loss": 0.068, "theoretical_loss": 3.497383902008302, "tokens_seen": 1752301568 }, { "epoch": 0.06, "learning_rate": 0.0004736820990130787, "loss": 0.0683, "theoretical_loss": 3.4973617020303465, "tokens_seen": 1752432640 }, { "epoch": 0.06, "learning_rate": 0.0004736419802615743, "loss": 0.0708, "theoretical_loss": 3.497339504177643, "tokens_seen": 1752563712 }, { "epoch": 0.06, "learning_rate": 0.00047360186151006986, "loss": 0.0759, "theoretical_loss": 3.497317308449828, "tokens_seen": 1752694784 }, { "epoch": 0.06, "learning_rate": 0.0004735617427585653, "loss": 0.0701, "theoretical_loss": 3.4972951148465405, "tokens_seen": 1752825856 }, { "epoch": 0.06, "learning_rate": 0.0004735216240070609, "loss": 0.072, "theoretical_loss": 3.4972729233674174, "tokens_seen": 1752956928 }, { "epoch": 0.06, "learning_rate": 0.00047348150525555646, "loss": 0.0702, "theoretical_loss": 3.4972507340120966, "tokens_seen": 1753088000 }, { "epoch": 0.06, "learning_rate": 0.00047344138650405197, "loss": 0.0748, "theoretical_loss": 3.4972285467802164, "tokens_seen": 1753219072 }, { "epoch": 0.06, "learning_rate": 0.00047340126775254754, "loss": 0.0719, "theoretical_loss": 3.497206361671414, "tokens_seen": 1753350144 }, { "epoch": 0.06, "learning_rate": 0.0004733611490010431, "loss": 0.0706, "theoretical_loss": 3.4971841786853295, "tokens_seen": 1753481216 }, { "epoch": 0.06, "learning_rate": 0.0004733210302495387, "loss": 0.0723, "theoretical_loss": 3.497161997821599, "tokens_seen": 1753612288 }, { "epoch": 0.06, "learning_rate": 0.0004732809114980342, "loss": 0.0694, "theoretical_loss": 3.4971398190798615, "tokens_seen": 1753743360 }, { "epoch": 0.06, "learning_rate": 0.00047324079274652976, "loss": 0.0722, "theoretical_loss": 3.4971176424597563, "tokens_seen": 1753874432 }, { "epoch": 0.06, "learning_rate": 0.00047320067399502533, "loss": 0.0692, "theoretical_loss": 3.4970954679609214, "tokens_seen": 1754005504 }, { "epoch": 0.06, "learning_rate": 0.0004731605552435208, "loss": 0.0724, "theoretical_loss": 3.497073295582995, "tokens_seen": 1754136576 }, { "epoch": 0.06, "learning_rate": 0.00047312043649201636, "loss": 0.0697, "theoretical_loss": 3.4970511253256156, "tokens_seen": 1754267648 }, { "epoch": 0.06, "learning_rate": 0.0004730803177405119, "loss": 0.0684, "theoretical_loss": 3.497028957188423, "tokens_seen": 1754398720 }, { "epoch": 0.06, "learning_rate": 0.00047304019898900744, "loss": 0.0687, "theoretical_loss": 3.4970067911710556, "tokens_seen": 1754529792 }, { "epoch": 0.06, "learning_rate": 0.000473000080237503, "loss": 0.068, "theoretical_loss": 3.496984627273152, "tokens_seen": 1754660864 }, { "epoch": 0.06, "objective/train/advantage_avg": 0.00021981721511110663, "objective/train/docs_used": 641116, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3605735301971436, "objective/train/original_loss": 1.3605732917785645, "objective/train/theoretical_loss": 3.4969624654943514, "objective/train/tokens_used": 125316576, "objective/train/value_avg": -0.00666046142578125, "objective/train/value_loss": 0.00018791349430102855, "objective/train/value_max": -7.31348991394043e-05, "objective/train/value_min": -0.311279296875, "objective/train/value_reward_corr": 0.6737995875411535, "objective/train/value_std": 0.011993408203125, "objective/train/weight_avg": 1.0003079175949097, "objective/train/weighted_lm_loss": 1.359967827796936, "objective/train/weights_max": 1.2298709154129028, "objective/train/weights_min": 0.39149805903434753, "theoretical_loss": 3.4969624654943514, "tokens_seen": 1754791936 }, { "epoch": 0.06, "learning_rate": 0.0004729599614859986, "loss": 0.0672, "theoretical_loss": 3.4969624654943514, "tokens_seen": 1754791936 }, { "epoch": 0.06, "learning_rate": 0.00047291984273449414, "loss": 0.0685, "theoretical_loss": 3.4969403058342925, "tokens_seen": 1754923008 }, { "epoch": 0.06, "learning_rate": 0.00047287972398298966, "loss": 0.0726, "theoretical_loss": 3.496918148292616, "tokens_seen": 1755054080 }, { "epoch": 0.06, "learning_rate": 0.0004728396052314852, "loss": 0.0742, "theoretical_loss": 3.49689599286896, "tokens_seen": 1755185152 }, { "epoch": 0.06, "learning_rate": 0.0004727994864799808, "loss": 0.0719, "theoretical_loss": 3.496873839562964, "tokens_seen": 1755316224 }, { "epoch": 0.06, "learning_rate": 0.00047275936772847625, "loss": 0.0711, "theoretical_loss": 3.4968516883742673, "tokens_seen": 1755447296 }, { "epoch": 0.06, "learning_rate": 0.0004727192489769718, "loss": 0.072, "theoretical_loss": 3.49682953930251, "tokens_seen": 1755578368 }, { "epoch": 0.06, "learning_rate": 0.0004726791302254674, "loss": 0.0732, "theoretical_loss": 3.496807392347332, "tokens_seen": 1755709440 }, { "epoch": 0.06, "learning_rate": 0.0004726390114739629, "loss": 0.0683, "theoretical_loss": 3.4967852475083725, "tokens_seen": 1755840512 }, { "epoch": 0.06, "learning_rate": 0.0004725988927224585, "loss": 0.0697, "theoretical_loss": 3.496763104785271, "tokens_seen": 1755971584 }, { "epoch": 0.06, "learning_rate": 0.00047255877397095404, "loss": 0.071, "theoretical_loss": 3.4967409641776683, "tokens_seen": 1756102656 }, { "epoch": 0.06, "learning_rate": 0.0004725186552194496, "loss": 0.0736, "theoretical_loss": 3.496718825685204, "tokens_seen": 1756233728 }, { "epoch": 0.06, "learning_rate": 0.0004724785364679451, "loss": 0.0695, "theoretical_loss": 3.4966966893075178, "tokens_seen": 1756364800 }, { "epoch": 0.06, "learning_rate": 0.0004724384177164407, "loss": 0.07, "theoretical_loss": 3.4966745550442506, "tokens_seen": 1756495872 }, { "epoch": 0.06, "learning_rate": 0.00047239829896493626, "loss": 0.0723, "theoretical_loss": 3.496652422895042, "tokens_seen": 1756626944 }, { "epoch": 0.06, "learning_rate": 0.0004723581802134317, "loss": 0.069, "theoretical_loss": 3.496630292859533, "tokens_seen": 1756758016 }, { "epoch": 0.06, "learning_rate": 0.0004723180614619273, "loss": 0.0691, "theoretical_loss": 3.496608164937364, "tokens_seen": 1756889088 }, { "epoch": 0.06, "learning_rate": 0.00047227794271042286, "loss": 0.0699, "theoretical_loss": 3.496586039128175, "tokens_seen": 1757020160 }, { "epoch": 0.06, "learning_rate": 0.0004722378239589184, "loss": 0.0722, "theoretical_loss": 3.496563915431607, "tokens_seen": 1757151232 }, { "epoch": 0.07, "learning_rate": 0.00047219770520741394, "loss": 0.0705, "theoretical_loss": 3.4965417938473005, "tokens_seen": 1757282304 }, { "epoch": 0.07, "learning_rate": 0.0004721575864559095, "loss": 0.0729, "theoretical_loss": 3.4965196743748965, "tokens_seen": 1757413376 }, { "epoch": 0.07, "learning_rate": 0.0004721174677044051, "loss": 0.0726, "theoretical_loss": 3.4964975570140364, "tokens_seen": 1757544448 }, { "epoch": 0.07, "learning_rate": 0.0004720773489529006, "loss": 0.0671, "theoretical_loss": 3.49647544176436, "tokens_seen": 1757675520 }, { "epoch": 0.07, "learning_rate": 0.00047203723020139616, "loss": 0.075, "theoretical_loss": 3.4964533286255093, "tokens_seen": 1757806592 }, { "epoch": 0.07, "learning_rate": 0.00047199711144989173, "loss": 0.0714, "theoretical_loss": 3.4964312175971246, "tokens_seen": 1757937664 }, { "epoch": 0.07, "objective/train/advantage_avg": 0.00044419398182071745, "objective/train/docs_used": 642262, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.5011606216430664, "objective/train/original_loss": 1.5011606216430664, "objective/train/theoretical_loss": 3.496409108678849, "objective/train/tokens_used": 128593376, "objective/train/value_avg": -0.00750732421875, "objective/train/value_loss": 0.00018912047380581498, "objective/train/value_max": -6.0617923736572266e-05, "objective/train/value_min": -0.36279296875, "objective/train/value_reward_corr": 0.7321576472163964, "objective/train/value_std": 0.0143585205078125, "objective/train/weight_avg": 1.0005301237106323, "objective/train/weighted_lm_loss": 1.5015453100204468, "objective/train/weights_max": 1.2235815525054932, "objective/train/weights_min": 0.3684675693511963, "theoretical_loss": 3.496409108678849, "tokens_seen": 1758068736 }, { "epoch": 0.07, "learning_rate": 0.0004719569926983872, "loss": 0.0745, "theoretical_loss": 3.496409108678849, "tokens_seen": 1758068736 }, { "epoch": 0.07, "learning_rate": 0.00047191687394688276, "loss": 0.0717, "theoretical_loss": 3.496387001870321, "tokens_seen": 1758199808 }, { "epoch": 0.07, "learning_rate": 0.0004718767551953783, "loss": 0.0745, "theoretical_loss": 3.4963648971711843, "tokens_seen": 1758330880 }, { "epoch": 0.07, "learning_rate": 0.0004718366364438739, "loss": 0.0676, "theoretical_loss": 3.49634279458108, "tokens_seen": 1758461952 }, { "epoch": 0.07, "learning_rate": 0.0004717965176923694, "loss": 0.0765, "theoretical_loss": 3.4963206940996487, "tokens_seen": 1758593024 }, { "epoch": 0.07, "learning_rate": 0.000471756398940865, "loss": 0.0707, "theoretical_loss": 3.4962985957265333, "tokens_seen": 1758724096 }, { "epoch": 0.07, "learning_rate": 0.00047171628018936054, "loss": 0.0771, "theoretical_loss": 3.4962764994613744, "tokens_seen": 1758855168 }, { "epoch": 0.07, "learning_rate": 0.00047167616143785606, "loss": 0.0659, "theoretical_loss": 3.496254405303815, "tokens_seen": 1758986240 }, { "epoch": 0.07, "learning_rate": 0.00047163604268635163, "loss": 0.0714, "theoretical_loss": 3.496232313253496, "tokens_seen": 1759117312 }, { "epoch": 0.07, "learning_rate": 0.0004715959239348472, "loss": 0.0733, "theoretical_loss": 3.4962102233100607, "tokens_seen": 1759248384 }, { "epoch": 0.07, "learning_rate": 0.00047155580518334266, "loss": 0.0743, "theoretical_loss": 3.49618813547315, "tokens_seen": 1759379456 }, { "epoch": 0.07, "learning_rate": 0.0004715156864318382, "loss": 0.0732, "theoretical_loss": 3.4961660497424063, "tokens_seen": 1759510528 }, { "epoch": 0.07, "learning_rate": 0.0004714755676803338, "loss": 0.0705, "theoretical_loss": 3.4961439661174727, "tokens_seen": 1759641600 }, { "epoch": 0.07, "learning_rate": 0.00047143544892882936, "loss": 0.0716, "theoretical_loss": 3.496121884597991, "tokens_seen": 1759772672 }, { "epoch": 0.07, "learning_rate": 0.0004713953301773249, "loss": 0.0722, "theoretical_loss": 3.4960998051836034, "tokens_seen": 1759903744 }, { "epoch": 0.07, "learning_rate": 0.00047135521142582044, "loss": 0.0718, "theoretical_loss": 3.4960777278739528, "tokens_seen": 1760034816 }, { "epoch": 0.07, "learning_rate": 0.000471315092674316, "loss": 0.0715, "theoretical_loss": 3.496055652668682, "tokens_seen": 1760165888 }, { "epoch": 0.07, "learning_rate": 0.0004712749739228115, "loss": 0.0719, "theoretical_loss": 3.4960335795674338, "tokens_seen": 1760296960 }, { "epoch": 0.07, "learning_rate": 0.0004712348551713071, "loss": 0.0681, "theoretical_loss": 3.4960115085698504, "tokens_seen": 1760428032 }, { "epoch": 0.07, "learning_rate": 0.00047119473641980266, "loss": 0.0712, "theoretical_loss": 3.495989439675575, "tokens_seen": 1760559104 }, { "epoch": 0.07, "learning_rate": 0.0004711546176682981, "loss": 0.072, "theoretical_loss": 3.495967372884251, "tokens_seen": 1760690176 }, { "epoch": 0.07, "learning_rate": 0.0004711144989167937, "loss": 0.0701, "theoretical_loss": 3.4959453081955205, "tokens_seen": 1760821248 }, { "epoch": 0.07, "learning_rate": 0.00047107438016528926, "loss": 0.0698, "theoretical_loss": 3.4959232456090277, "tokens_seen": 1760952320 }, { "epoch": 0.07, "learning_rate": 0.00047103426141378483, "loss": 0.0704, "theoretical_loss": 3.495901185124416, "tokens_seen": 1761083392 }, { "epoch": 0.07, "learning_rate": 0.00047099414266228034, "loss": 0.0715, "theoretical_loss": 3.4958791267413276, "tokens_seen": 1761214464 }, { "epoch": 0.07, "objective/train/advantage_avg": 0.0003957933222409338, "objective/train/docs_used": 643454, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.416270136833191, "objective/train/original_loss": 1.416270136833191, "objective/train/theoretical_loss": 3.4958570704594067, "objective/train/tokens_used": 131870176, "objective/train/value_avg": -0.005306243896484375, "objective/train/value_loss": 0.00012620001507457346, "objective/train/value_max": -3.916025161743164e-05, "objective/train/value_min": -0.1695556640625, "objective/train/value_reward_corr": 0.5879378479550992, "objective/train/value_std": 0.007595062255859375, "objective/train/weight_avg": 1.0004514455795288, "objective/train/weighted_lm_loss": 1.4175769090652466, "objective/train/weights_max": 1.0816538333892822, "objective/train/weights_min": 0.3683270514011383, "theoretical_loss": 3.4958570704594067, "tokens_seen": 1761345536 }, { "epoch": 0.07, "learning_rate": 0.0004709540239107759, "loss": 0.0712, "theoretical_loss": 3.4958570704594067, "tokens_seen": 1761345536 }, { "epoch": 0.07, "learning_rate": 0.0004709139051592715, "loss": 0.0738, "theoretical_loss": 3.4958350162782965, "tokens_seen": 1761476608 }, { "epoch": 0.07, "learning_rate": 0.000470873786407767, "loss": 0.068, "theoretical_loss": 3.495812964197641, "tokens_seen": 1761607680 }, { "epoch": 0.07, "learning_rate": 0.00047083366765626256, "loss": 0.0758, "theoretical_loss": 3.4957909142170838, "tokens_seen": 1761738752 }, { "epoch": 0.07, "learning_rate": 0.00047079354890475813, "loss": 0.0698, "theoretical_loss": 3.4957688663362685, "tokens_seen": 1761869824 }, { "epoch": 0.07, "learning_rate": 0.0004707534301532536, "loss": 0.0707, "theoretical_loss": 3.4957468205548388, "tokens_seen": 1762000896 }, { "epoch": 0.07, "learning_rate": 0.00047071331140174916, "loss": 0.0771, "theoretical_loss": 3.495724776872439, "tokens_seen": 1762131968 }, { "epoch": 0.07, "learning_rate": 0.0004706731926502447, "loss": 0.0718, "theoretical_loss": 3.495702735288713, "tokens_seen": 1762263040 }, { "epoch": 0.07, "learning_rate": 0.0004706330738987403, "loss": 0.0697, "theoretical_loss": 3.4956806958033044, "tokens_seen": 1762394112 }, { "epoch": 0.07, "learning_rate": 0.0004705929551472358, "loss": 0.0684, "theoretical_loss": 3.4956586584158584, "tokens_seen": 1762525184 }, { "epoch": 0.07, "learning_rate": 0.0004705528363957314, "loss": 0.0671, "theoretical_loss": 3.4956366231260185, "tokens_seen": 1762656256 }, { "epoch": 0.07, "learning_rate": 0.00047051271764422695, "loss": 0.0708, "theoretical_loss": 3.49561458993343, "tokens_seen": 1762787328 }, { "epoch": 0.07, "learning_rate": 0.00047047259889272246, "loss": 0.0702, "theoretical_loss": 3.4955925588377363, "tokens_seen": 1762918400 }, { "epoch": 0.07, "learning_rate": 0.00047043248014121803, "loss": 0.0732, "theoretical_loss": 3.4955705298385826, "tokens_seen": 1763049472 }, { "epoch": 0.07, "learning_rate": 0.0004703923613897136, "loss": 0.0726, "theoretical_loss": 3.4955485029356135, "tokens_seen": 1763180544 }, { "epoch": 0.07, "learning_rate": 0.00047035224263820906, "loss": 0.0703, "theoretical_loss": 3.4955264781284727, "tokens_seen": 1763311616 }, { "epoch": 0.07, "learning_rate": 0.0004703121238867046, "loss": 0.0697, "theoretical_loss": 3.495504455416807, "tokens_seen": 1763442688 }, { "epoch": 0.07, "learning_rate": 0.0004702720051352002, "loss": 0.0698, "theoretical_loss": 3.4954824348002598, "tokens_seen": 1763573760 }, { "epoch": 0.07, "learning_rate": 0.00047023188638369576, "loss": 0.0677, "theoretical_loss": 3.495460416278477, "tokens_seen": 1763704832 }, { "epoch": 0.07, "learning_rate": 0.0004701917676321913, "loss": 0.0766, "theoretical_loss": 3.4954383998511025, "tokens_seen": 1763835904 }, { "epoch": 0.07, "learning_rate": 0.00047015164888068684, "loss": 0.073, "theoretical_loss": 3.4954163855177827, "tokens_seen": 1763966976 }, { "epoch": 0.07, "learning_rate": 0.0004701115301291824, "loss": 0.0696, "theoretical_loss": 3.495394373278162, "tokens_seen": 1764098048 }, { "epoch": 0.07, "learning_rate": 0.00047007141137767793, "loss": 0.0708, "theoretical_loss": 3.495372363131886, "tokens_seen": 1764229120 }, { "epoch": 0.07, "learning_rate": 0.0004700312926261735, "loss": 0.0683, "theoretical_loss": 3.4953503550786005, "tokens_seen": 1764360192 }, { "epoch": 0.07, "learning_rate": 0.00046999117387466906, "loss": 0.0708, "theoretical_loss": 3.4953283491179503, "tokens_seen": 1764491264 }, { "epoch": 0.07, "objective/train/advantage_avg": 0.0007020364864729345, "objective/train/docs_used": 644554, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.188506841659546, "objective/train/original_loss": 1.188506841659546, "objective/train/theoretical_loss": 3.495306345249581, "objective/train/tokens_used": 135146976, "objective/train/value_avg": -0.006389617919921875, "objective/train/value_loss": 0.00016776847769506276, "objective/train/value_max": -8.094310760498047e-05, "objective/train/value_min": -0.6123046875, "objective/train/value_reward_corr": 0.7046381739443187, "objective/train/value_std": 0.01302337646484375, "objective/train/weight_avg": 1.0007799863815308, "objective/train/weighted_lm_loss": 1.1894233226776123, "objective/train/weights_max": 1.2994511127471924, "objective/train/weights_min": 0.5192472338676453, "theoretical_loss": 3.495306345249581, "tokens_seen": 1764622336 }, { "epoch": 0.07, "learning_rate": 0.0004699510551231645, "loss": 0.0704, "theoretical_loss": 3.495306345249581, "tokens_seen": 1764622336 }, { "epoch": 0.07, "learning_rate": 0.0004699109363716601, "loss": 0.0712, "theoretical_loss": 3.4952843434731395, "tokens_seen": 1764753408 }, { "epoch": 0.07, "learning_rate": 0.00046987081762015566, "loss": 0.0699, "theoretical_loss": 3.4952623437882706, "tokens_seen": 1764884480 }, { "epoch": 0.07, "learning_rate": 0.00046983069886865123, "loss": 0.0714, "theoretical_loss": 3.49524034619462, "tokens_seen": 1765015552 }, { "epoch": 0.07, "learning_rate": 0.00046979058011714674, "loss": 0.0689, "theoretical_loss": 3.495218350691834, "tokens_seen": 1765146624 }, { "epoch": 0.07, "learning_rate": 0.0004697504613656423, "loss": 0.0696, "theoretical_loss": 3.495196357279559, "tokens_seen": 1765277696 }, { "epoch": 0.07, "learning_rate": 0.0004697103426141379, "loss": 0.0712, "theoretical_loss": 3.49517436595744, "tokens_seen": 1765408768 }, { "epoch": 0.07, "learning_rate": 0.0004696702238626334, "loss": 0.0701, "theoretical_loss": 3.495152376725124, "tokens_seen": 1765539840 }, { "epoch": 0.07, "learning_rate": 0.00046963010511112896, "loss": 0.0683, "theoretical_loss": 3.4951303895822576, "tokens_seen": 1765670912 }, { "epoch": 0.07, "learning_rate": 0.00046958998635962453, "loss": 0.0708, "theoretical_loss": 3.4951084045284864, "tokens_seen": 1765801984 }, { "epoch": 0.07, "learning_rate": 0.00046954986760812005, "loss": 0.0719, "theoretical_loss": 3.4950864215634567, "tokens_seen": 1765933056 }, { "epoch": 0.07, "learning_rate": 0.00046950974885661556, "loss": 0.0693, "theoretical_loss": 3.495064440686816, "tokens_seen": 1766064128 }, { "epoch": 0.07, "learning_rate": 0.00046946963010511113, "loss": 0.0703, "theoretical_loss": 3.495042461898211, "tokens_seen": 1766195200 }, { "epoch": 0.07, "learning_rate": 0.0004694295113536067, "loss": 0.075, "theoretical_loss": 3.495020485197287, "tokens_seen": 1766326272 }, { "epoch": 0.07, "learning_rate": 0.0004693893926021022, "loss": 0.0712, "theoretical_loss": 3.4949985105836925, "tokens_seen": 1766457344 }, { "epoch": 0.07, "learning_rate": 0.0004693492738505978, "loss": 0.0688, "theoretical_loss": 3.494976538057073, "tokens_seen": 1766588416 }, { "epoch": 0.07, "learning_rate": 0.00046930915509909335, "loss": 0.0744, "theoretical_loss": 3.494954567617076, "tokens_seen": 1766719488 }, { "epoch": 0.07, "learning_rate": 0.00046926903634758886, "loss": 0.0675, "theoretical_loss": 3.4949325992633486, "tokens_seen": 1766850560 }, { "epoch": 0.07, "learning_rate": 0.00046922891759608443, "loss": 0.0728, "theoretical_loss": 3.4949106329955386, "tokens_seen": 1766981632 }, { "epoch": 0.07, "learning_rate": 0.00046918879884458, "loss": 0.0735, "theoretical_loss": 3.4948886688132923, "tokens_seen": 1767112704 }, { "epoch": 0.07, "learning_rate": 0.0004691486800930755, "loss": 0.0707, "theoretical_loss": 3.494866706716257, "tokens_seen": 1767243776 }, { "epoch": 0.07, "learning_rate": 0.000469108561341571, "loss": 0.072, "theoretical_loss": 3.4948447467040804, "tokens_seen": 1767374848 }, { "epoch": 0.07, "learning_rate": 0.0004690684425900666, "loss": 0.0684, "theoretical_loss": 3.49482278877641, "tokens_seen": 1767505920 }, { "epoch": 0.07, "learning_rate": 0.00046902832383856216, "loss": 0.071, "theoretical_loss": 3.494800832932894, "tokens_seen": 1767636992 }, { "epoch": 0.07, "learning_rate": 0.0004689882050870577, "loss": 0.0755, "theoretical_loss": 3.494778879173179, "tokens_seen": 1767768064 }, { "epoch": 0.07, "objective/train/advantage_avg": 0.00012373436766210943, "objective/train/docs_used": 645687, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4371914863586426, "objective/train/original_loss": 1.4371914863586426, "objective/train/theoretical_loss": 3.494756927496913, "objective/train/tokens_used": 138423776, "objective/train/value_avg": -0.00583648681640625, "objective/train/value_loss": 0.0001618804526515305, "objective/train/value_max": -6.711483001708984e-05, "objective/train/value_min": -0.2454833984375, "objective/train/value_reward_corr": 0.6888835632642651, "objective/train/value_std": 0.01141357421875, "objective/train/weight_avg": 1.0001959800720215, "objective/train/weighted_lm_loss": 1.437523365020752, "objective/train/weights_max": 1.1046044826507568, "objective/train/weights_min": 0.3696744740009308, "theoretical_loss": 3.494756927496913, "tokens_seen": 1767899136 }, { "epoch": 0.07, "learning_rate": 0.00046894808633555325, "loss": 0.0717, "theoretical_loss": 3.494756927496913, "tokens_seen": 1767899136 }, { "epoch": 0.07, "learning_rate": 0.0004689079675840488, "loss": 0.0713, "theoretical_loss": 3.494734977903744, "tokens_seen": 1768030208 }, { "epoch": 0.07, "learning_rate": 0.00046886784883254433, "loss": 0.0714, "theoretical_loss": 3.49471303039332, "tokens_seen": 1768161280 }, { "epoch": 0.07, "learning_rate": 0.0004688277300810399, "loss": 0.0639, "theoretical_loss": 3.4946910849652886, "tokens_seen": 1768292352 }, { "epoch": 0.07, "learning_rate": 0.00046878761132953547, "loss": 0.0716, "theoretical_loss": 3.4946691416192985, "tokens_seen": 1768423424 }, { "epoch": 0.07, "learning_rate": 0.000468747492578031, "loss": 0.0704, "theoretical_loss": 3.494647200354998, "tokens_seen": 1768554496 }, { "epoch": 0.07, "learning_rate": 0.0004687073738265265, "loss": 0.0679, "theoretical_loss": 3.4946252611720348, "tokens_seen": 1768685568 }, { "epoch": 0.07, "learning_rate": 0.00046866725507502206, "loss": 0.0684, "theoretical_loss": 3.494603324070057, "tokens_seen": 1768816640 }, { "epoch": 0.07, "learning_rate": 0.00046862713632351763, "loss": 0.068, "theoretical_loss": 3.494581389048714, "tokens_seen": 1768947712 }, { "epoch": 0.07, "learning_rate": 0.00046858701757201314, "loss": 0.0701, "theoretical_loss": 3.494559456107653, "tokens_seen": 1769078784 }, { "epoch": 0.07, "learning_rate": 0.0004685468988205087, "loss": 0.066, "theoretical_loss": 3.494537525246524, "tokens_seen": 1769209856 }, { "epoch": 0.07, "learning_rate": 0.0004685067800690043, "loss": 0.0733, "theoretical_loss": 3.4945155964649746, "tokens_seen": 1769340928 }, { "epoch": 0.07, "learning_rate": 0.0004684666613174998, "loss": 0.0711, "theoretical_loss": 3.4944936697626545, "tokens_seen": 1769472000 }, { "epoch": 0.07, "learning_rate": 0.00046842654256599536, "loss": 0.0714, "theoretical_loss": 3.4944717451392115, "tokens_seen": 1769603072 }, { "epoch": 0.07, "learning_rate": 0.00046838642381449093, "loss": 0.0701, "theoretical_loss": 3.4944498225942953, "tokens_seen": 1769734144 }, { "epoch": 0.07, "learning_rate": 0.00046834630506298645, "loss": 0.0692, "theoretical_loss": 3.494427902127555, "tokens_seen": 1769865216 }, { "epoch": 0.07, "learning_rate": 0.00046830618631148196, "loss": 0.0705, "theoretical_loss": 3.4944059837386394, "tokens_seen": 1769996288 }, { "epoch": 0.07, "learning_rate": 0.00046826606755997753, "loss": 0.0715, "theoretical_loss": 3.4943840674271973, "tokens_seen": 1770127360 }, { "epoch": 0.07, "learning_rate": 0.0004682259488084731, "loss": 0.0678, "theoretical_loss": 3.494362153192879, "tokens_seen": 1770258432 }, { "epoch": 0.07, "learning_rate": 0.0004681858300569686, "loss": 0.0677, "theoretical_loss": 3.4943402410353332, "tokens_seen": 1770389504 }, { "epoch": 0.07, "learning_rate": 0.0004681457113054642, "loss": 0.0669, "theoretical_loss": 3.494318330954209, "tokens_seen": 1770520576 }, { "epoch": 0.07, "learning_rate": 0.00046810559255395975, "loss": 0.0722, "theoretical_loss": 3.494296422949157, "tokens_seen": 1770651648 }, { "epoch": 0.07, "learning_rate": 0.00046806547380245526, "loss": 0.0681, "theoretical_loss": 3.494274517019826, "tokens_seen": 1770782720 }, { "epoch": 0.07, "learning_rate": 0.00046802535505095083, "loss": 0.0715, "theoretical_loss": 3.494252613165866, "tokens_seen": 1770913792 }, { "epoch": 0.07, "learning_rate": 0.0004679852362994464, "loss": 0.0729, "theoretical_loss": 3.494230711386926, "tokens_seen": 1771044864 }, { "epoch": 0.07, "objective/train/advantage_avg": 3.506660505081527e-05, "objective/train/docs_used": 646815, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3753331899642944, "objective/train/original_loss": 1.3753330707550049, "objective/train/theoretical_loss": 3.4942088116826575, "objective/train/tokens_used": 141700576, "objective/train/value_avg": -0.007373809814453125, "objective/train/value_loss": 0.00048726150998845696, "objective/train/value_max": -8.153915405273438e-05, "objective/train/value_min": -0.90283203125, "objective/train/value_reward_corr": 0.8425105851209318, "objective/train/value_std": 0.025482177734375, "objective/train/weight_avg": 1.0002514123916626, "objective/train/weighted_lm_loss": 1.3751147985458374, "objective/train/weights_max": 1.9233063459396362, "objective/train/weights_min": 0.3883684575557709, "theoretical_loss": 3.4942088116826575, "tokens_seen": 1771175936 }, { "epoch": 0.07, "learning_rate": 0.00046794511754794197, "loss": 0.0699, "theoretical_loss": 3.4942088116826575, "tokens_seen": 1771175936 }, { "epoch": 0.07, "learning_rate": 0.00046790499879643743, "loss": 0.07, "theoretical_loss": 3.4941869140527095, "tokens_seen": 1771307008 }, { "epoch": 0.07, "learning_rate": 0.000467864880044933, "loss": 0.0686, "theoretical_loss": 3.494165018496732, "tokens_seen": 1771438080 }, { "epoch": 0.07, "learning_rate": 0.00046782476129342856, "loss": 0.0675, "theoretical_loss": 3.494143125014375, "tokens_seen": 1771569152 }, { "epoch": 0.07, "learning_rate": 0.0004677846425419241, "loss": 0.0712, "theoretical_loss": 3.4941212336052896, "tokens_seen": 1771700224 }, { "epoch": 0.07, "learning_rate": 0.00046774452379041965, "loss": 0.067, "theoretical_loss": 3.4940993442691246, "tokens_seen": 1771831296 }, { "epoch": 0.07, "learning_rate": 0.0004677044050389152, "loss": 0.0692, "theoretical_loss": 3.494077457005532, "tokens_seen": 1771962368 }, { "epoch": 0.07, "learning_rate": 0.00046766428628741073, "loss": 0.0682, "theoretical_loss": 3.4940555718141613, "tokens_seen": 1772093440 }, { "epoch": 0.07, "learning_rate": 0.0004676241675359063, "loss": 0.0728, "theoretical_loss": 3.494033688694663, "tokens_seen": 1772224512 }, { "epoch": 0.07, "learning_rate": 0.00046758404878440187, "loss": 0.0692, "theoretical_loss": 3.4940118076466886, "tokens_seen": 1772355584 }, { "epoch": 0.07, "learning_rate": 0.00046754393003289743, "loss": 0.0685, "theoretical_loss": 3.4939899286698877, "tokens_seen": 1772486656 }, { "epoch": 0.07, "learning_rate": 0.0004675038112813929, "loss": 0.0703, "theoretical_loss": 3.493968051763912, "tokens_seen": 1772617728 }, { "epoch": 0.07, "learning_rate": 0.00046746369252988846, "loss": 0.0697, "theoretical_loss": 3.493946176928412, "tokens_seen": 1772748800 }, { "epoch": 0.07, "learning_rate": 0.00046742357377838403, "loss": 0.0706, "theoretical_loss": 3.4939243041630395, "tokens_seen": 1772879872 }, { "epoch": 0.07, "learning_rate": 0.00046738345502687955, "loss": 0.0705, "theoretical_loss": 3.493902433467444, "tokens_seen": 1773010944 }, { "epoch": 0.07, "learning_rate": 0.0004673433362753751, "loss": 0.0718, "theoretical_loss": 3.4938805648412776, "tokens_seen": 1773142016 }, { "epoch": 0.07, "learning_rate": 0.0004673032175238707, "loss": 0.0689, "theoretical_loss": 3.4938586982841917, "tokens_seen": 1773273088 }, { "epoch": 0.07, "learning_rate": 0.00046726309877236625, "loss": 0.0688, "theoretical_loss": 3.4938368337958368, "tokens_seen": 1773404160 }, { "epoch": 0.07, "learning_rate": 0.00046722298002086176, "loss": 0.0702, "theoretical_loss": 3.4938149713758655, "tokens_seen": 1773535232 }, { "epoch": 0.07, "learning_rate": 0.00046718286126935733, "loss": 0.0695, "theoretical_loss": 3.493793111023928, "tokens_seen": 1773666304 }, { "epoch": 0.08, "learning_rate": 0.0004671427425178529, "loss": 0.0693, "theoretical_loss": 3.493771252739677, "tokens_seen": 1773797376 }, { "epoch": 0.08, "learning_rate": 0.00046710262376634836, "loss": 0.0679, "theoretical_loss": 3.4937493965227633, "tokens_seen": 1773928448 }, { "epoch": 0.08, "learning_rate": 0.00046706250501484393, "loss": 0.0645, "theoretical_loss": 3.49372754237284, "tokens_seen": 1774059520 }, { "epoch": 0.08, "learning_rate": 0.0004670223862633395, "loss": 0.0696, "theoretical_loss": 3.4937056902895565, "tokens_seen": 1774190592 }, { "epoch": 0.08, "learning_rate": 0.000466982267511835, "loss": 0.0701, "theoretical_loss": 3.493683840272567, "tokens_seen": 1774321664 }, { "epoch": 0.08, "objective/train/advantage_avg": 0.0007779787993058562, "objective/train/docs_used": 647978, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.344138503074646, "objective/train/original_loss": 1.3441386222839355, "objective/train/theoretical_loss": 3.4936619923215226, "objective/train/tokens_used": 144977376, "objective/train/value_avg": -0.00945281982421875, "objective/train/value_loss": 0.00019143747340422124, "objective/train/value_max": -6.401538848876953e-05, "objective/train/value_min": -0.32763671875, "objective/train/value_reward_corr": 0.7616445872830053, "objective/train/value_std": 0.0162506103515625, "objective/train/weight_avg": 1.000868558883667, "objective/train/weighted_lm_loss": 1.344605803489685, "objective/train/weights_max": 1.2056413888931274, "objective/train/weights_min": 0.36912351846694946, "theoretical_loss": 3.4936619923215226, "tokens_seen": 1774452736 }, { "epoch": 0.08, "learning_rate": 0.0004669421487603306, "loss": 0.0682, "theoretical_loss": 3.4936619923215226, "tokens_seen": 1774452736 }, { "epoch": 0.08, "learning_rate": 0.00046690203000882615, "loss": 0.0748, "theoretical_loss": 3.493640146436076, "tokens_seen": 1774583808 }, { "epoch": 0.08, "learning_rate": 0.0004668619112573217, "loss": 0.0645, "theoretical_loss": 3.493618302615878, "tokens_seen": 1774714880 }, { "epoch": 0.08, "learning_rate": 0.00046682179250581723, "loss": 0.072, "theoretical_loss": 3.4935964608605814, "tokens_seen": 1774845952 }, { "epoch": 0.08, "learning_rate": 0.0004667816737543128, "loss": 0.0683, "theoretical_loss": 3.4935746211698393, "tokens_seen": 1774977024 }, { "epoch": 0.08, "learning_rate": 0.00046674155500280837, "loss": 0.0706, "theoretical_loss": 3.4935527835433033, "tokens_seen": 1775108096 }, { "epoch": 0.08, "learning_rate": 0.00046670143625130383, "loss": 0.0698, "theoretical_loss": 3.4935309479806262, "tokens_seen": 1775239168 }, { "epoch": 0.08, "learning_rate": 0.0004666613174997994, "loss": 0.0686, "theoretical_loss": 3.493509114481461, "tokens_seen": 1775370240 }, { "epoch": 0.08, "learning_rate": 0.00046662119874829497, "loss": 0.0709, "theoretical_loss": 3.49348728304546, "tokens_seen": 1775501312 }, { "epoch": 0.08, "learning_rate": 0.0004665810799967905, "loss": 0.0717, "theoretical_loss": 3.4934654536722753, "tokens_seen": 1775632384 }, { "epoch": 0.08, "learning_rate": 0.00046654096124528605, "loss": 0.068, "theoretical_loss": 3.493443626361561, "tokens_seen": 1775763456 }, { "epoch": 0.08, "learning_rate": 0.0004665008424937816, "loss": 0.0701, "theoretical_loss": 3.493421801112969, "tokens_seen": 1775894528 }, { "epoch": 0.08, "learning_rate": 0.0004664607237422772, "loss": 0.0723, "theoretical_loss": 3.4933999779261526, "tokens_seen": 1776025600 }, { "epoch": 0.08, "learning_rate": 0.0004664206049907727, "loss": 0.0684, "theoretical_loss": 3.493378156800765, "tokens_seen": 1776156672 }, { "epoch": 0.08, "learning_rate": 0.00046638048623926827, "loss": 0.0726, "theoretical_loss": 3.4933563377364596, "tokens_seen": 1776287744 }, { "epoch": 0.08, "learning_rate": 0.00046634036748776384, "loss": 0.0686, "theoretical_loss": 3.4933345207328896, "tokens_seen": 1776418816 }, { "epoch": 0.08, "learning_rate": 0.0004663002487362593, "loss": 0.0698, "theoretical_loss": 3.493312705789708, "tokens_seen": 1776549888 }, { "epoch": 0.08, "learning_rate": 0.00046626012998475486, "loss": 0.07, "theoretical_loss": 3.493290892906568, "tokens_seen": 1776680960 }, { "epoch": 0.08, "learning_rate": 0.00046622001123325043, "loss": 0.0718, "theoretical_loss": 3.493269082083123, "tokens_seen": 1776812032 }, { "epoch": 0.08, "learning_rate": 0.00046617989248174595, "loss": 0.0717, "theoretical_loss": 3.493247273319028, "tokens_seen": 1776943104 }, { "epoch": 0.08, "learning_rate": 0.0004661397737302415, "loss": 0.0686, "theoretical_loss": 3.4932254666139357, "tokens_seen": 1777074176 }, { "epoch": 0.08, "learning_rate": 0.0004660996549787371, "loss": 0.067, "theoretical_loss": 3.4932036619674998, "tokens_seen": 1777205248 }, { "epoch": 0.08, "learning_rate": 0.00046605953622723265, "loss": 0.07, "theoretical_loss": 3.493181859379374, "tokens_seen": 1777336320 }, { "epoch": 0.08, "learning_rate": 0.00046601941747572817, "loss": 0.0684, "theoretical_loss": 3.4931600588492127, "tokens_seen": 1777467392 }, { "epoch": 0.08, "learning_rate": 0.00046597929872422373, "loss": 0.0689, "theoretical_loss": 3.4931382603766696, "tokens_seen": 1777598464 }, { "epoch": 0.08, "objective/train/advantage_avg": 0.0016351552912965417, "objective/train/docs_used": 649180, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4352236986160278, "objective/train/original_loss": 1.4352235794067383, "objective/train/theoretical_loss": 3.493116463961399, "objective/train/tokens_used": 148254176, "objective/train/value_avg": -0.0085601806640625, "objective/train/value_loss": 0.0003497599973343313, "objective/train/value_max": -6.973743438720703e-05, "objective/train/value_min": -0.88916015625, "objective/train/value_reward_corr": 0.6685859123837232, "objective/train/value_std": 0.017608642578125, "objective/train/weight_avg": 1.0017820596694946, "objective/train/weighted_lm_loss": 1.4370558261871338, "objective/train/weights_max": 1.397544503211975, "objective/train/weights_min": 0.260134220123291, "theoretical_loss": 3.493116463961399, "tokens_seen": 1777729536 }, { "epoch": 0.08, "learning_rate": 0.0004659391799727193, "loss": 0.0718, "theoretical_loss": 3.493116463961399, "tokens_seen": 1777729536 }, { "epoch": 0.08, "learning_rate": 0.00046589906122121476, "loss": 0.0736, "theoretical_loss": 3.493094669603055, "tokens_seen": 1777860608 }, { "epoch": 0.08, "learning_rate": 0.00046585894246971033, "loss": 0.0689, "theoretical_loss": 3.493072877301292, "tokens_seen": 1777991680 }, { "epoch": 0.08, "learning_rate": 0.0004658188237182059, "loss": 0.0689, "theoretical_loss": 3.493051087055764, "tokens_seen": 1778122752 }, { "epoch": 0.08, "learning_rate": 0.0004657787049667014, "loss": 0.0707, "theoretical_loss": 3.4930292988661256, "tokens_seen": 1778253824 }, { "epoch": 0.08, "learning_rate": 0.000465738586215197, "loss": 0.0723, "theoretical_loss": 3.493007512732031, "tokens_seen": 1778384896 }, { "epoch": 0.08, "learning_rate": 0.00046569846746369255, "loss": 0.0659, "theoretical_loss": 3.492985728653135, "tokens_seen": 1778515968 }, { "epoch": 0.08, "learning_rate": 0.0004656583487121881, "loss": 0.0711, "theoretical_loss": 3.4929639466290934, "tokens_seen": 1778647040 }, { "epoch": 0.08, "learning_rate": 0.00046561822996068363, "loss": 0.0699, "theoretical_loss": 3.4929421666595593, "tokens_seen": 1778778112 }, { "epoch": 0.08, "learning_rate": 0.0004655781112091792, "loss": 0.0683, "theoretical_loss": 3.492920388744188, "tokens_seen": 1778909184 }, { "epoch": 0.08, "learning_rate": 0.00046553799245767477, "loss": 0.0677, "theoretical_loss": 3.492898612882635, "tokens_seen": 1779040256 }, { "epoch": 0.08, "learning_rate": 0.00046549787370617023, "loss": 0.0699, "theoretical_loss": 3.4928768390745555, "tokens_seen": 1779171328 }, { "epoch": 0.08, "learning_rate": 0.0004654577549546658, "loss": 0.0706, "theoretical_loss": 3.4928550673196033, "tokens_seen": 1779302400 }, { "epoch": 0.08, "learning_rate": 0.00046541763620316137, "loss": 0.0654, "theoretical_loss": 3.4928332976174348, "tokens_seen": 1779433472 }, { "epoch": 0.08, "learning_rate": 0.0004653775174516569, "loss": 0.0711, "theoretical_loss": 3.492811529967704, "tokens_seen": 1779564544 }, { "epoch": 0.08, "learning_rate": 0.00046533739870015245, "loss": 0.0658, "theoretical_loss": 3.492789764370068, "tokens_seen": 1779695616 }, { "epoch": 0.08, "learning_rate": 0.000465297279948648, "loss": 0.0677, "theoretical_loss": 3.4927680008241806, "tokens_seen": 1779826688 }, { "epoch": 0.08, "learning_rate": 0.0004652571611971436, "loss": 0.0682, "theoretical_loss": 3.4927462393296986, "tokens_seen": 1779957760 }, { "epoch": 0.08, "learning_rate": 0.0004652170424456391, "loss": 0.0693, "theoretical_loss": 3.4927244798862764, "tokens_seen": 1780088832 }, { "epoch": 0.08, "learning_rate": 0.00046517692369413467, "loss": 0.0682, "theoretical_loss": 3.492702722493571, "tokens_seen": 1780219904 }, { "epoch": 0.08, "learning_rate": 0.00046513680494263024, "loss": 0.0743, "theoretical_loss": 3.492680967151237, "tokens_seen": 1780350976 }, { "epoch": 0.08, "learning_rate": 0.0004650966861911257, "loss": 0.0702, "theoretical_loss": 3.4926592138589307, "tokens_seen": 1780482048 }, { "epoch": 0.08, "learning_rate": 0.00046505656743962127, "loss": 0.0733, "theoretical_loss": 3.492637462616308, "tokens_seen": 1780613120 }, { "epoch": 0.08, "learning_rate": 0.00046501644868811683, "loss": 0.0709, "theoretical_loss": 3.4926157134230253, "tokens_seen": 1780744192 }, { "epoch": 0.08, "learning_rate": 0.00046497632993661235, "loss": 0.0692, "theoretical_loss": 3.4925939662787377, "tokens_seen": 1780875264 }, { "epoch": 0.08, "objective/train/advantage_avg": 0.0009555669967085123, "objective/train/docs_used": 650314, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2654515504837036, "objective/train/original_loss": 1.265451431274414, "objective/train/theoretical_loss": 3.4925722211831025, "objective/train/tokens_used": 151530976, "objective/train/value_avg": -0.006496429443359375, "objective/train/value_loss": 0.0001401577319484204, "objective/train/value_max": -5.346536636352539e-05, "objective/train/value_min": -0.277587890625, "objective/train/value_reward_corr": 0.6381607810471721, "objective/train/value_std": 0.01044464111328125, "objective/train/weight_avg": 1.0010206699371338, "objective/train/weighted_lm_loss": 1.2663586139678955, "objective/train/weights_max": 1.125909686088562, "objective/train/weights_min": 0.3690946400165558, "theoretical_loss": 3.4925722211831025, "tokens_seen": 1781006336 }, { "epoch": 0.08, "learning_rate": 0.0004649362111851079, "loss": 0.0708, "theoretical_loss": 3.4925722211831025, "tokens_seen": 1781006336 }, { "epoch": 0.08, "learning_rate": 0.0004648960924336035, "loss": 0.0683, "theoretical_loss": 3.4925504781357755, "tokens_seen": 1781137408 }, { "epoch": 0.08, "learning_rate": 0.00046485597368209905, "loss": 0.072, "theoretical_loss": 3.4925287371364124, "tokens_seen": 1781268480 }, { "epoch": 0.08, "learning_rate": 0.00046481585493059457, "loss": 0.0719, "theoretical_loss": 3.4925069981846715, "tokens_seen": 1781399552 }, { "epoch": 0.08, "learning_rate": 0.00046477573617909014, "loss": 0.0674, "theoretical_loss": 3.4924852612802066, "tokens_seen": 1781530624 }, { "epoch": 0.08, "learning_rate": 0.0004647356174275857, "loss": 0.0679, "theoretical_loss": 3.492463526422677, "tokens_seen": 1781661696 }, { "epoch": 0.08, "learning_rate": 0.00046469549867608116, "loss": 0.0665, "theoretical_loss": 3.4924417936117376, "tokens_seen": 1781792768 }, { "epoch": 0.08, "learning_rate": 0.00046465537992457673, "loss": 0.0708, "theoretical_loss": 3.492420062847045, "tokens_seen": 1781923840 }, { "epoch": 0.08, "learning_rate": 0.0004646152611730723, "loss": 0.0677, "theoretical_loss": 3.492398334128258, "tokens_seen": 1782054912 }, { "epoch": 0.08, "learning_rate": 0.00046457514242156787, "loss": 0.0657, "theoretical_loss": 3.4923766074550313, "tokens_seen": 1782185984 }, { "epoch": 0.08, "learning_rate": 0.0004645350236700634, "loss": 0.0702, "theoretical_loss": 3.492354882827023, "tokens_seen": 1782317056 }, { "epoch": 0.08, "learning_rate": 0.00046449490491855895, "loss": 0.0746, "theoretical_loss": 3.4923331602438905, "tokens_seen": 1782448128 }, { "epoch": 0.08, "learning_rate": 0.0004644547861670545, "loss": 0.0691, "theoretical_loss": 3.4923114397052903, "tokens_seen": 1782579200 }, { "epoch": 0.08, "learning_rate": 0.00046441466741555003, "loss": 0.0645, "theoretical_loss": 3.49228972121088, "tokens_seen": 1782710272 }, { "epoch": 0.08, "learning_rate": 0.0004643745486640456, "loss": 0.0686, "theoretical_loss": 3.4922680047603167, "tokens_seen": 1782841344 }, { "epoch": 0.08, "learning_rate": 0.00046433442991254117, "loss": 0.0692, "theoretical_loss": 3.492246290353258, "tokens_seen": 1782972416 }, { "epoch": 0.08, "learning_rate": 0.00046429431116103663, "loss": 0.0691, "theoretical_loss": 3.4922245779893615, "tokens_seen": 1783103488 }, { "epoch": 0.08, "learning_rate": 0.0004642541924095322, "loss": 0.0711, "theoretical_loss": 3.4922028676682846, "tokens_seen": 1783234560 }, { "epoch": 0.08, "learning_rate": 0.00046421407365802777, "loss": 0.0712, "theoretical_loss": 3.492181159389685, "tokens_seen": 1783365632 }, { "epoch": 0.08, "learning_rate": 0.00046417395490652334, "loss": 0.0716, "theoretical_loss": 3.49215945315322, "tokens_seen": 1783496704 }, { "epoch": 0.08, "learning_rate": 0.00046413383615501885, "loss": 0.0702, "theoretical_loss": 3.4921377489585486, "tokens_seen": 1783627776 }, { "epoch": 0.08, "learning_rate": 0.0004640937174035144, "loss": 0.0741, "theoretical_loss": 3.492116046805328, "tokens_seen": 1783758848 }, { "epoch": 0.08, "learning_rate": 0.00046405359865201, "loss": 0.0727, "theoretical_loss": 3.4920943466932153, "tokens_seen": 1783889920 }, { "epoch": 0.08, "learning_rate": 0.0004640134799005055, "loss": 0.068, "theoretical_loss": 3.4920726486218703, "tokens_seen": 1784020992 }, { "epoch": 0.08, "learning_rate": 0.00046397336114900107, "loss": 0.0734, "theoretical_loss": 3.49205095259095, "tokens_seen": 1784152064 }, { "epoch": 0.08, "objective/train/advantage_avg": 0.0009771446930244565, "objective/train/docs_used": 651558, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4218194484710693, "objective/train/original_loss": 1.4218192100524902, "objective/train/theoretical_loss": 3.4920292586001134, "objective/train/tokens_used": 154807776, "objective/train/value_avg": -0.0101165771484375, "objective/train/value_loss": 0.00039580746670253575, "objective/train/value_max": -6.502866744995117e-05, "objective/train/value_min": -0.87646484375, "objective/train/value_reward_corr": 0.6520309264605484, "objective/train/value_std": 0.0176239013671875, "objective/train/weight_avg": 1.001145362854004, "objective/train/weighted_lm_loss": 1.4232776165008545, "objective/train/weights_max": 1.2624222040176392, "objective/train/weights_min": 0.37720733880996704, "theoretical_loss": 3.4920292586001134, "tokens_seen": 1784283136 }, { "epoch": 0.08, "learning_rate": 0.00046393324239749664, "loss": 0.0721, "theoretical_loss": 3.4920292586001134, "tokens_seen": 1784283136 }, { "epoch": 0.08, "learning_rate": 0.0004638931236459921, "loss": 0.0695, "theoretical_loss": 3.492007566649018, "tokens_seen": 1784414208 }, { "epoch": 0.08, "learning_rate": 0.00046385300489448767, "loss": 0.0709, "theoretical_loss": 3.491985876737323, "tokens_seen": 1784545280 }, { "epoch": 0.08, "learning_rate": 0.00046381288614298323, "loss": 0.0684, "theoretical_loss": 3.491964188864686, "tokens_seen": 1784676352 }, { "epoch": 0.08, "learning_rate": 0.0004637727673914788, "loss": 0.073, "theoretical_loss": 3.4919425030307667, "tokens_seen": 1784807424 }, { "epoch": 0.08, "learning_rate": 0.0004637326486399743, "loss": 0.0753, "theoretical_loss": 3.491920819235223, "tokens_seen": 1784938496 }, { "epoch": 0.08, "learning_rate": 0.0004636925298884699, "loss": 0.0712, "theoretical_loss": 3.4918991374777137, "tokens_seen": 1785069568 }, { "epoch": 0.08, "learning_rate": 0.00046365241113696545, "loss": 0.0696, "theoretical_loss": 3.491877457757898, "tokens_seen": 1785200640 }, { "epoch": 0.08, "learning_rate": 0.00046361229238546097, "loss": 0.0715, "theoretical_loss": 3.491855780075434, "tokens_seen": 1785331712 }, { "epoch": 0.08, "learning_rate": 0.00046357217363395654, "loss": 0.0734, "theoretical_loss": 3.491834104429982, "tokens_seen": 1785462784 }, { "epoch": 0.08, "learning_rate": 0.0004635320548824521, "loss": 0.0689, "theoretical_loss": 3.4918124308211995, "tokens_seen": 1785593856 }, { "epoch": 0.08, "learning_rate": 0.00046349193613094757, "loss": 0.0712, "theoretical_loss": 3.491790759248747, "tokens_seen": 1785724928 }, { "epoch": 0.08, "learning_rate": 0.00046345181737944313, "loss": 0.07, "theoretical_loss": 3.4917690897122826, "tokens_seen": 1785856000 }, { "epoch": 0.08, "learning_rate": 0.0004634116986279387, "loss": 0.0695, "theoretical_loss": 3.491747422211467, "tokens_seen": 1785987072 }, { "epoch": 0.08, "learning_rate": 0.00046337157987643427, "loss": 0.07, "theoretical_loss": 3.491725756745958, "tokens_seen": 1786118144 }, { "epoch": 0.08, "learning_rate": 0.0004633314611249298, "loss": 0.0671, "theoretical_loss": 3.491704093315416, "tokens_seen": 1786249216 }, { "epoch": 0.08, "learning_rate": 0.00046329134237342535, "loss": 0.0727, "theoretical_loss": 3.491682431919501, "tokens_seen": 1786380288 }, { "epoch": 0.08, "learning_rate": 0.0004632512236219209, "loss": 0.0712, "theoretical_loss": 3.4916607725578714, "tokens_seen": 1786511360 }, { "epoch": 0.08, "learning_rate": 0.00046321110487041644, "loss": 0.0717, "theoretical_loss": 3.491639115230188, "tokens_seen": 1786642432 }, { "epoch": 0.08, "learning_rate": 0.000463170986118912, "loss": 0.0748, "theoretical_loss": 3.4916174599361103, "tokens_seen": 1786773504 }, { "epoch": 0.08, "learning_rate": 0.00046313086736740757, "loss": 0.0687, "theoretical_loss": 3.4915958066752983, "tokens_seen": 1786904576 }, { "epoch": 0.08, "learning_rate": 0.00046309074861590303, "loss": 0.0689, "theoretical_loss": 3.491574155447411, "tokens_seen": 1787035648 }, { "epoch": 0.08, "learning_rate": 0.0004630506298643986, "loss": 0.0715, "theoretical_loss": 3.4915525062521096, "tokens_seen": 1787166720 }, { "epoch": 0.08, "learning_rate": 0.00046301051111289417, "loss": 0.0705, "theoretical_loss": 3.491530859089054, "tokens_seen": 1787297792 }, { "epoch": 0.08, "learning_rate": 0.00046297039236138974, "loss": 0.0709, "theoretical_loss": 3.4915092139579036, "tokens_seen": 1787428864 }, { "epoch": 0.08, "objective/train/advantage_avg": 0.0011911545880138874, "objective/train/docs_used": 652772, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.5059617757797241, "objective/train/original_loss": 1.5059616565704346, "objective/train/theoretical_loss": 3.49148757085832, "objective/train/tokens_used": 158084576, "objective/train/value_avg": -0.006046295166015625, "objective/train/value_loss": 0.00015016645193099976, "objective/train/value_max": -5.918741226196289e-05, "objective/train/value_min": -0.61083984375, "objective/train/value_reward_corr": 0.6818254053952469, "objective/train/value_std": 0.01305389404296875, "objective/train/weight_avg": 1.0012645721435547, "objective/train/weighted_lm_loss": 1.507313847541809, "objective/train/weights_max": 1.7382326126098633, "objective/train/weights_min": 0.6081292629241943, "theoretical_loss": 3.49148757085832, "tokens_seen": 1787559936 }, { "epoch": 0.08, "learning_rate": 0.00046293027360988525, "loss": 0.0681, "theoretical_loss": 3.49148757085832, "tokens_seen": 1787559936 }, { "epoch": 0.08, "learning_rate": 0.0004628901548583808, "loss": 0.0694, "theoretical_loss": 3.491465929789963, "tokens_seen": 1787691008 }, { "epoch": 0.08, "learning_rate": 0.0004628500361068764, "loss": 0.0716, "theoretical_loss": 3.4914442907524927, "tokens_seen": 1787822080 }, { "epoch": 0.08, "learning_rate": 0.0004628099173553719, "loss": 0.0715, "theoretical_loss": 3.4914226537455697, "tokens_seen": 1787953152 }, { "epoch": 0.08, "learning_rate": 0.00046276979860386747, "loss": 0.0695, "theoretical_loss": 3.4914010187688556, "tokens_seen": 1788084224 }, { "epoch": 0.08, "learning_rate": 0.00046272967985236304, "loss": 0.0693, "theoretical_loss": 3.49137938582201, "tokens_seen": 1788215296 }, { "epoch": 0.08, "learning_rate": 0.0004626895611008585, "loss": 0.0695, "theoretical_loss": 3.4913577549046937, "tokens_seen": 1788346368 }, { "epoch": 0.08, "learning_rate": 0.00046264944234935407, "loss": 0.0713, "theoretical_loss": 3.4913361260165683, "tokens_seen": 1788477440 }, { "epoch": 0.08, "learning_rate": 0.00046260932359784964, "loss": 0.0683, "theoretical_loss": 3.491314499157294, "tokens_seen": 1788608512 }, { "epoch": 0.08, "learning_rate": 0.0004625692048463452, "loss": 0.0708, "theoretical_loss": 3.4912928743265326, "tokens_seen": 1788739584 }, { "epoch": 0.08, "learning_rate": 0.0004625290860948407, "loss": 0.0711, "theoretical_loss": 3.491271251523945, "tokens_seen": 1788870656 }, { "epoch": 0.08, "learning_rate": 0.0004624889673433363, "loss": 0.0725, "theoretical_loss": 3.491249630749192, "tokens_seen": 1789001728 }, { "epoch": 0.08, "learning_rate": 0.00046244884859183186, "loss": 0.07, "theoretical_loss": 3.491228012001935, "tokens_seen": 1789132800 }, { "epoch": 0.08, "learning_rate": 0.00046240872984032737, "loss": 0.0729, "theoretical_loss": 3.4912063952818353, "tokens_seen": 1789263872 }, { "epoch": 0.08, "learning_rate": 0.00046236861108882294, "loss": 0.0713, "theoretical_loss": 3.4911847805885547, "tokens_seen": 1789394944 }, { "epoch": 0.08, "learning_rate": 0.0004623284923373185, "loss": 0.0692, "theoretical_loss": 3.4911631679217545, "tokens_seen": 1789526016 }, { "epoch": 0.08, "learning_rate": 0.00046228837358581397, "loss": 0.0746, "theoretical_loss": 3.491141557281096, "tokens_seen": 1789657088 }, { "epoch": 0.08, "learning_rate": 0.00046224825483430953, "loss": 0.07, "theoretical_loss": 3.491119948666242, "tokens_seen": 1789788160 }, { "epoch": 0.08, "learning_rate": 0.0004622081360828051, "loss": 0.0661, "theoretical_loss": 3.491098342076852, "tokens_seen": 1789919232 }, { "epoch": 0.08, "learning_rate": 0.00046216801733130067, "loss": 0.0684, "theoretical_loss": 3.4910767375125906, "tokens_seen": 1790050304 }, { "epoch": 0.08, "learning_rate": 0.0004621278985797962, "loss": 0.0725, "theoretical_loss": 3.4910551349731183, "tokens_seen": 1790181376 }, { "epoch": 0.09, "learning_rate": 0.00046208777982829175, "loss": 0.0697, "theoretical_loss": 3.4910335344580967, "tokens_seen": 1790312448 }, { "epoch": 0.09, "learning_rate": 0.0004620476610767873, "loss": 0.0701, "theoretical_loss": 3.4910119359671885, "tokens_seen": 1790443520 }, { "epoch": 0.09, "learning_rate": 0.00046200754232528284, "loss": 0.0675, "theoretical_loss": 3.490990339500056, "tokens_seen": 1790574592 }, { "epoch": 0.09, "learning_rate": 0.0004619674235737784, "loss": 0.0702, "theoretical_loss": 3.490968745056361, "tokens_seen": 1790705664 }, { "epoch": 0.09, "objective/train/advantage_avg": 0.0004972352180629969, "objective/train/docs_used": 653832, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4243414402008057, "objective/train/original_loss": 1.4243414402008057, "objective/train/theoretical_loss": 3.4909471526357665, "objective/train/tokens_used": 161361376, "objective/train/value_avg": -0.00708770751953125, "objective/train/value_loss": 0.0002697940217331052, "objective/train/value_max": -6.866455078125e-05, "objective/train/value_min": -0.66015625, "objective/train/value_reward_corr": 0.6426313427235176, "objective/train/value_std": 0.0159759521484375, "objective/train/weight_avg": 1.0006217956542969, "objective/train/weighted_lm_loss": 1.424590826034546, "objective/train/weights_max": 1.751630187034607, "objective/train/weights_min": 0.3687625229358673, "theoretical_loss": 3.4909471526357665, "tokens_seen": 1790836736 }, { "epoch": 0.09, "learning_rate": 0.000461927304822274, "loss": 0.068, "theoretical_loss": 3.4909471526357665, "tokens_seen": 1790836736 }, { "epoch": 0.09, "learning_rate": 0.0004618871860707695, "loss": 0.067, "theoretical_loss": 3.4909255622379343, "tokens_seen": 1790967808 }, { "epoch": 0.09, "learning_rate": 0.000461847067319265, "loss": 0.07, "theoretical_loss": 3.490903973862527, "tokens_seen": 1791098880 }, { "epoch": 0.09, "learning_rate": 0.00046180694856776057, "loss": 0.0694, "theoretical_loss": 3.490882387509207, "tokens_seen": 1791229952 }, { "epoch": 0.09, "learning_rate": 0.00046176682981625614, "loss": 0.0674, "theoretical_loss": 3.490860803177638, "tokens_seen": 1791361024 }, { "epoch": 0.09, "learning_rate": 0.00046172671106475165, "loss": 0.0725, "theoretical_loss": 3.490839220867481, "tokens_seen": 1791492096 }, { "epoch": 0.09, "learning_rate": 0.0004616865923132472, "loss": 0.072, "theoretical_loss": 3.4908176405784004, "tokens_seen": 1791623168 }, { "epoch": 0.09, "learning_rate": 0.0004616464735617428, "loss": 0.0737, "theoretical_loss": 3.490796062310058, "tokens_seen": 1791754240 }, { "epoch": 0.09, "learning_rate": 0.0004616063548102383, "loss": 0.0713, "theoretical_loss": 3.4907744860621177, "tokens_seen": 1791885312 }, { "epoch": 0.09, "learning_rate": 0.00046156623605873387, "loss": 0.0666, "theoretical_loss": 3.4907529118342415, "tokens_seen": 1792016384 }, { "epoch": 0.09, "learning_rate": 0.00046152611730722944, "loss": 0.0712, "theoretical_loss": 3.4907313396260937, "tokens_seen": 1792147456 }, { "epoch": 0.09, "learning_rate": 0.00046148599855572495, "loss": 0.0659, "theoretical_loss": 3.490709769437337, "tokens_seen": 1792278528 }, { "epoch": 0.09, "learning_rate": 0.00046144587980422047, "loss": 0.0715, "theoretical_loss": 3.4906882012676346, "tokens_seen": 1792409600 }, { "epoch": 0.09, "learning_rate": 0.00046140576105271604, "loss": 0.0717, "theoretical_loss": 3.49066663511665, "tokens_seen": 1792540672 }, { "epoch": 0.09, "learning_rate": 0.0004613656423012116, "loss": 0.0689, "theoretical_loss": 3.490645070984046, "tokens_seen": 1792671744 }, { "epoch": 0.09, "learning_rate": 0.0004613255235497071, "loss": 0.069, "theoretical_loss": 3.4906235088694872, "tokens_seen": 1792802816 }, { "epoch": 0.09, "learning_rate": 0.0004612854047982027, "loss": 0.0703, "theoretical_loss": 3.490601948772637, "tokens_seen": 1792933888 }, { "epoch": 0.09, "learning_rate": 0.00046124528604669826, "loss": 0.064, "theoretical_loss": 3.4905803906931587, "tokens_seen": 1793064960 }, { "epoch": 0.09, "learning_rate": 0.00046120516729519377, "loss": 0.0667, "theoretical_loss": 3.490558834630716, "tokens_seen": 1793196032 }, { "epoch": 0.09, "learning_rate": 0.00046116504854368934, "loss": 0.0681, "theoretical_loss": 3.4905372805849737, "tokens_seen": 1793327104 }, { "epoch": 0.09, "learning_rate": 0.0004611249297921849, "loss": 0.0679, "theoretical_loss": 3.4905157285555948, "tokens_seen": 1793458176 }, { "epoch": 0.09, "learning_rate": 0.0004610848110406804, "loss": 0.0687, "theoretical_loss": 3.490494178542243, "tokens_seen": 1793589248 }, { "epoch": 0.09, "learning_rate": 0.00046104469228917594, "loss": 0.0671, "theoretical_loss": 3.490472630544584, "tokens_seen": 1793720320 }, { "epoch": 0.09, "learning_rate": 0.0004610045735376715, "loss": 0.0717, "theoretical_loss": 3.4904510845622805, "tokens_seen": 1793851392 }, { "epoch": 0.09, "learning_rate": 0.00046096445478616707, "loss": 0.069, "theoretical_loss": 3.490429540594997, "tokens_seen": 1793982464 }, { "epoch": 0.09, "objective/train/advantage_avg": 0.0004245015443302691, "objective/train/docs_used": 655001, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.330993890762329, "objective/train/original_loss": 1.330993890762329, "objective/train/theoretical_loss": 3.490407998642399, "objective/train/tokens_used": 164638176, "objective/train/value_avg": -0.00807952880859375, "objective/train/value_loss": 0.0001550638407934457, "objective/train/value_max": -4.00543212890625e-05, "objective/train/value_min": -0.275390625, "objective/train/value_reward_corr": 0.754388356748086, "objective/train/value_std": 0.014129638671875, "objective/train/weight_avg": 1.000496745109558, "objective/train/weighted_lm_loss": 1.3315045833587646, "objective/train/weights_max": 1.217918872833252, "objective/train/weights_min": 0.3683597147464752, "theoretical_loss": 3.490407998642399, "tokens_seen": 1794113536 }, { "epoch": 0.09, "learning_rate": 0.0004609243360346626, "loss": 0.0652, "theoretical_loss": 3.490407998642399, "tokens_seen": 1794113536 }, { "epoch": 0.09, "learning_rate": 0.00046088421728315816, "loss": 0.0649, "theoretical_loss": 3.4903864587041493, "tokens_seen": 1794244608 }, { "epoch": 0.09, "learning_rate": 0.0004608440985316537, "loss": 0.0772, "theoretical_loss": 3.4903649207799137, "tokens_seen": 1794375680 }, { "epoch": 0.09, "learning_rate": 0.00046080397978014924, "loss": 0.0748, "theoretical_loss": 3.490343384869356, "tokens_seen": 1794506752 }, { "epoch": 0.09, "learning_rate": 0.0004607638610286448, "loss": 0.0691, "theoretical_loss": 3.4903218509721414, "tokens_seen": 1794637824 }, { "epoch": 0.09, "learning_rate": 0.0004607237422771404, "loss": 0.0694, "theoretical_loss": 3.4903003190879343, "tokens_seen": 1794768896 }, { "epoch": 0.09, "learning_rate": 0.0004606836235256359, "loss": 0.0665, "theoretical_loss": 3.4902787892163993, "tokens_seen": 1794899968 }, { "epoch": 0.09, "learning_rate": 0.0004606435047741314, "loss": 0.0757, "theoretical_loss": 3.490257261357202, "tokens_seen": 1795031040 }, { "epoch": 0.09, "learning_rate": 0.00046060338602262697, "loss": 0.0666, "theoretical_loss": 3.490235735510007, "tokens_seen": 1795162112 }, { "epoch": 0.09, "learning_rate": 0.00046056326727112254, "loss": 0.0678, "theoretical_loss": 3.490214211674479, "tokens_seen": 1795293184 }, { "epoch": 0.09, "learning_rate": 0.00046052314851961805, "loss": 0.0649, "theoretical_loss": 3.490192689850284, "tokens_seen": 1795424256 }, { "epoch": 0.09, "learning_rate": 0.0004604830297681136, "loss": 0.0653, "theoretical_loss": 3.4901711700370868, "tokens_seen": 1795555328 }, { "epoch": 0.09, "learning_rate": 0.0004604429110166092, "loss": 0.0667, "theoretical_loss": 3.4901496522345528, "tokens_seen": 1795686400 }, { "epoch": 0.09, "learning_rate": 0.0004604027922651047, "loss": 0.0696, "theoretical_loss": 3.4901281364423475, "tokens_seen": 1795817472 }, { "epoch": 0.09, "learning_rate": 0.0004603626735136003, "loss": 0.075, "theoretical_loss": 3.490106622660136, "tokens_seen": 1795948544 }, { "epoch": 0.09, "learning_rate": 0.00046032255476209584, "loss": 0.0702, "theoretical_loss": 3.490085110887583, "tokens_seen": 1796079616 }, { "epoch": 0.09, "learning_rate": 0.0004602824360105914, "loss": 0.0691, "theoretical_loss": 3.4900636011243567, "tokens_seen": 1796210688 }, { "epoch": 0.09, "learning_rate": 0.00046024231725908687, "loss": 0.0694, "theoretical_loss": 3.4900420933701204, "tokens_seen": 1796341760 }, { "epoch": 0.09, "learning_rate": 0.00046020219850758244, "loss": 0.078, "theoretical_loss": 3.4900205876245414, "tokens_seen": 1796472832 }, { "epoch": 0.09, "learning_rate": 0.000460162079756078, "loss": 0.0669, "theoretical_loss": 3.4899990838872847, "tokens_seen": 1796603904 }, { "epoch": 0.09, "learning_rate": 0.0004601219610045735, "loss": 0.0712, "theoretical_loss": 3.4899775821580166, "tokens_seen": 1796734976 }, { "epoch": 0.09, "learning_rate": 0.0004600818422530691, "loss": 0.0705, "theoretical_loss": 3.489956082436403, "tokens_seen": 1796866048 }, { "epoch": 0.09, "learning_rate": 0.00046004172350156466, "loss": 0.0688, "theoretical_loss": 3.4899345847221097, "tokens_seen": 1796997120 }, { "epoch": 0.09, "learning_rate": 0.00046000160475006017, "loss": 0.0701, "theoretical_loss": 3.4899130890148036, "tokens_seen": 1797128192 }, { "epoch": 0.09, "learning_rate": 0.00045996148599855574, "loss": 0.0679, "theoretical_loss": 3.4898915953141505, "tokens_seen": 1797259264 }, { "epoch": 0.09, "objective/train/advantage_avg": -0.0006764016579836607, "objective/train/docs_used": 656301, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3115298748016357, "objective/train/original_loss": 1.3115298748016357, "objective/train/theoretical_loss": 3.489870103619817, "objective/train/tokens_used": 167914976, "objective/train/value_avg": -0.0099945068359375, "objective/train/value_loss": 0.00025322820874862373, "objective/train/value_max": -1.8477439880371094e-05, "objective/train/value_min": -0.30126953125, "objective/train/value_reward_corr": 0.7030536265576671, "objective/train/value_std": 0.0171051025390625, "objective/train/weight_avg": 0.9994475245475769, "objective/train/weighted_lm_loss": 1.3099721670150757, "objective/train/weights_max": 1.208766222000122, "objective/train/weights_min": 0.5680235028266907, "theoretical_loss": 3.489870103619817, "tokens_seen": 1797390336 }, { "epoch": 0.09, "learning_rate": 0.0004599213672470513, "loss": 0.0706, "theoretical_loss": 3.489870103619817, "tokens_seen": 1797390336 }, { "epoch": 0.09, "learning_rate": 0.0004598812484955469, "loss": 0.0687, "theoretical_loss": 3.4898486139314695, "tokens_seen": 1797521408 }, { "epoch": 0.09, "learning_rate": 0.00045984112974404234, "loss": 0.0685, "theoretical_loss": 3.489827126248774, "tokens_seen": 1797652480 }, { "epoch": 0.09, "learning_rate": 0.0004598010109925379, "loss": 0.066, "theoretical_loss": 3.489805640571398, "tokens_seen": 1797783552 }, { "epoch": 0.09, "learning_rate": 0.0004597608922410335, "loss": 0.0721, "theoretical_loss": 3.489784156899008, "tokens_seen": 1797914624 }, { "epoch": 0.09, "learning_rate": 0.000459720773489529, "loss": 0.0702, "theoretical_loss": 3.48976267523127, "tokens_seen": 1798045696 }, { "epoch": 0.09, "learning_rate": 0.00045968065473802456, "loss": 0.0741, "theoretical_loss": 3.4897411955678512, "tokens_seen": 1798176768 }, { "epoch": 0.09, "learning_rate": 0.0004596405359865201, "loss": 0.0691, "theoretical_loss": 3.4897197179084185, "tokens_seen": 1798307840 }, { "epoch": 0.09, "learning_rate": 0.0004596004172350157, "loss": 0.0712, "theoretical_loss": 3.489698242252639, "tokens_seen": 1798438912 }, { "epoch": 0.09, "learning_rate": 0.0004595602984835112, "loss": 0.0698, "theoretical_loss": 3.48967676860018, "tokens_seen": 1798569984 }, { "epoch": 0.09, "learning_rate": 0.0004595201797320068, "loss": 0.0681, "theoretical_loss": 3.4896552969507084, "tokens_seen": 1798701056 }, { "epoch": 0.09, "learning_rate": 0.00045948006098050234, "loss": 0.0695, "theoretical_loss": 3.4896338273038916, "tokens_seen": 1798832128 }, { "epoch": 0.09, "learning_rate": 0.0004594399422289978, "loss": 0.0685, "theoretical_loss": 3.4896123596593966, "tokens_seen": 1798963200 }, { "epoch": 0.09, "learning_rate": 0.00045939982347749337, "loss": 0.0717, "theoretical_loss": 3.4895908940168905, "tokens_seen": 1799094272 }, { "epoch": 0.09, "learning_rate": 0.00045935970472598894, "loss": 0.0702, "theoretical_loss": 3.489569430376042, "tokens_seen": 1799225344 }, { "epoch": 0.09, "learning_rate": 0.00045931958597448445, "loss": 0.0681, "theoretical_loss": 3.489547968736517, "tokens_seen": 1799356416 }, { "epoch": 0.09, "learning_rate": 0.00045927946722298, "loss": 0.0714, "theoretical_loss": 3.489526509097985, "tokens_seen": 1799487488 }, { "epoch": 0.09, "learning_rate": 0.0004592393484714756, "loss": 0.0687, "theoretical_loss": 3.4895050514601116, "tokens_seen": 1799618560 }, { "epoch": 0.09, "learning_rate": 0.00045919922971997116, "loss": 0.0719, "theoretical_loss": 3.4894835958225663, "tokens_seen": 1799749632 }, { "epoch": 0.09, "learning_rate": 0.0004591591109684667, "loss": 0.0669, "theoretical_loss": 3.4894621421850163, "tokens_seen": 1799880704 }, { "epoch": 0.09, "learning_rate": 0.00045911899221696224, "loss": 0.0713, "theoretical_loss": 3.48944069054713, "tokens_seen": 1800011776 }, { "epoch": 0.09, "learning_rate": 0.0004590788734654578, "loss": 0.0698, "theoretical_loss": 3.489419240908574, "tokens_seen": 1800142848 }, { "epoch": 0.09, "learning_rate": 0.00045903875471395327, "loss": 0.073, "theoretical_loss": 3.489397793269018, "tokens_seen": 1800273920 }, { "epoch": 0.09, "learning_rate": 0.00045899863596244884, "loss": 0.072, "theoretical_loss": 3.48937634762813, "tokens_seen": 1800404992 }, { "epoch": 0.09, "learning_rate": 0.0004589585172109444, "loss": 0.0704, "theoretical_loss": 3.489354903985577, "tokens_seen": 1800536064 }, { "epoch": 0.09, "objective/train/advantage_avg": 0.0019156986381858587, "objective/train/docs_used": 657521, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4225683212280273, "objective/train/original_loss": 1.4225683212280273, "objective/train/theoretical_loss": 3.489333462341029, "objective/train/tokens_used": 171191776, "objective/train/value_avg": -0.0103759765625, "objective/train/value_loss": 0.0002948559995274991, "objective/train/value_max": -0.00012934207916259766, "objective/train/value_min": -0.86962890625, "objective/train/value_reward_corr": 0.7419255094073872, "objective/train/value_std": 0.019683837890625, "objective/train/weight_avg": 1.0020540952682495, "objective/train/weighted_lm_loss": 1.4249180555343628, "objective/train/weights_max": 1.389040470123291, "objective/train/weights_min": 0.3843126595020294, "theoretical_loss": 3.489333462341029, "tokens_seen": 1800667136 }, { "epoch": 0.09, "learning_rate": 0.0004589183984594399, "loss": 0.0712, "theoretical_loss": 3.489333462341029, "tokens_seen": 1800667136 }, { "epoch": 0.09, "learning_rate": 0.0004588782797079355, "loss": 0.0701, "theoretical_loss": 3.489312022694153, "tokens_seen": 1800798208 }, { "epoch": 0.09, "learning_rate": 0.00045883816095643106, "loss": 0.0688, "theoretical_loss": 3.4892905850446185, "tokens_seen": 1800929280 }, { "epoch": 0.09, "learning_rate": 0.0004587980422049266, "loss": 0.0723, "theoretical_loss": 3.4892691493920935, "tokens_seen": 1801060352 }, { "epoch": 0.09, "learning_rate": 0.00045875792345342214, "loss": 0.071, "theoretical_loss": 3.489247715736247, "tokens_seen": 1801191424 }, { "epoch": 0.09, "learning_rate": 0.0004587178047019177, "loss": 0.068, "theoretical_loss": 3.4892262840767474, "tokens_seen": 1801322496 }, { "epoch": 0.09, "learning_rate": 0.0004586776859504133, "loss": 0.0663, "theoretical_loss": 3.489204854413264, "tokens_seen": 1801453568 }, { "epoch": 0.09, "learning_rate": 0.00045863756719890874, "loss": 0.0756, "theoretical_loss": 3.4891834267454653, "tokens_seen": 1801584640 }, { "epoch": 0.09, "learning_rate": 0.0004585974484474043, "loss": 0.0735, "theoretical_loss": 3.48916200107302, "tokens_seen": 1801715712 }, { "epoch": 0.09, "learning_rate": 0.0004585573296958999, "loss": 0.0709, "theoretical_loss": 3.4891405773955984, "tokens_seen": 1801846784 }, { "epoch": 0.09, "learning_rate": 0.0004585172109443954, "loss": 0.0727, "theoretical_loss": 3.489119155712868, "tokens_seen": 1801977856 }, { "epoch": 0.09, "learning_rate": 0.00045847709219289096, "loss": 0.0681, "theoretical_loss": 3.4890977360244992, "tokens_seen": 1802108928 }, { "epoch": 0.09, "learning_rate": 0.0004584369734413865, "loss": 0.071, "theoretical_loss": 3.4890763183301607, "tokens_seen": 1802240000 }, { "epoch": 0.09, "learning_rate": 0.0004583968546898821, "loss": 0.0733, "theoretical_loss": 3.489054902629522, "tokens_seen": 1802371072 }, { "epoch": 0.09, "learning_rate": 0.0004583567359383776, "loss": 0.0701, "theoretical_loss": 3.489033488922253, "tokens_seen": 1802502144 }, { "epoch": 0.09, "learning_rate": 0.0004583166171868732, "loss": 0.0699, "theoretical_loss": 3.489012077208022, "tokens_seen": 1802633216 }, { "epoch": 0.09, "learning_rate": 0.00045827649843536874, "loss": 0.0713, "theoretical_loss": 3.4889906674865, "tokens_seen": 1802764288 }, { "epoch": 0.09, "learning_rate": 0.0004582363796838642, "loss": 0.0671, "theoretical_loss": 3.488969259757356, "tokens_seen": 1802895360 }, { "epoch": 0.09, "learning_rate": 0.0004581962609323598, "loss": 0.068, "theoretical_loss": 3.48894785402026, "tokens_seen": 1803026432 }, { "epoch": 0.09, "learning_rate": 0.00045815614218085534, "loss": 0.0717, "theoretical_loss": 3.4889264502748816, "tokens_seen": 1803157504 }, { "epoch": 0.09, "learning_rate": 0.00045811602342935086, "loss": 0.0659, "theoretical_loss": 3.4889050485208912, "tokens_seen": 1803288576 }, { "epoch": 0.09, "learning_rate": 0.0004580759046778464, "loss": 0.0673, "theoretical_loss": 3.488883648757958, "tokens_seen": 1803419648 }, { "epoch": 0.09, "learning_rate": 0.000458035785926342, "loss": 0.0741, "theoretical_loss": 3.4888622509857523, "tokens_seen": 1803550720 }, { "epoch": 0.09, "learning_rate": 0.00045799566717483756, "loss": 0.0637, "theoretical_loss": 3.4888408552039447, "tokens_seen": 1803681792 }, { "epoch": 0.09, "learning_rate": 0.0004579555484233331, "loss": 0.0668, "theoretical_loss": 3.488819461412205, "tokens_seen": 1803812864 }, { "epoch": 0.09, "objective/train/advantage_avg": 0.00039605589699931443, "objective/train/docs_used": 658208, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4333267211914062, "objective/train/original_loss": 1.4333264827728271, "objective/train/theoretical_loss": 3.488798069610204, "objective/train/tokens_used": 174468576, "objective/train/value_avg": -0.0076751708984375, "objective/train/value_loss": 0.00017523010319564492, "objective/train/value_max": -5.918741226196289e-05, "objective/train/value_min": -0.6318359375, "objective/train/value_reward_corr": 0.6531567094953252, "objective/train/value_std": 0.01354217529296875, "objective/train/weight_avg": 1.0004781484603882, "objective/train/weighted_lm_loss": 1.4334138631820679, "objective/train/weights_max": 1.4077882766723633, "objective/train/weights_min": 0.3684026002883911, "theoretical_loss": 3.488798069610204, "tokens_seen": 1803943936 }, { "epoch": 0.09, "learning_rate": 0.00045791542967182864, "loss": 0.069, "theoretical_loss": 3.488798069610204, "tokens_seen": 1803943936 }, { "epoch": 0.09, "learning_rate": 0.0004578753109203242, "loss": 0.0727, "theoretical_loss": 3.4887766797976116, "tokens_seen": 1804075008 }, { "epoch": 0.09, "learning_rate": 0.00045783519216881967, "loss": 0.0687, "theoretical_loss": 3.4887552919740985, "tokens_seen": 1804206080 }, { "epoch": 0.09, "learning_rate": 0.00045779507341731524, "loss": 0.0671, "theoretical_loss": 3.488733906139336, "tokens_seen": 1804337152 }, { "epoch": 0.09, "learning_rate": 0.0004577549546658108, "loss": 0.0677, "theoretical_loss": 3.488712522292993, "tokens_seen": 1804468224 }, { "epoch": 0.09, "learning_rate": 0.0004577148359143063, "loss": 0.0679, "theoretical_loss": 3.4886911404347414, "tokens_seen": 1804599296 }, { "epoch": 0.09, "learning_rate": 0.0004576747171628019, "loss": 0.0712, "theoretical_loss": 3.4886697605642514, "tokens_seen": 1804730368 }, { "epoch": 0.09, "learning_rate": 0.00045763459841129746, "loss": 0.0693, "theoretical_loss": 3.4886483826811947, "tokens_seen": 1804861440 }, { "epoch": 0.09, "learning_rate": 0.00045759447965979303, "loss": 0.0732, "theoretical_loss": 3.488627006785242, "tokens_seen": 1804992512 }, { "epoch": 0.09, "learning_rate": 0.00045755436090828854, "loss": 0.069, "theoretical_loss": 3.4886056328760633, "tokens_seen": 1805123584 }, { "epoch": 0.09, "learning_rate": 0.0004575142421567841, "loss": 0.0682, "theoretical_loss": 3.488584260953331, "tokens_seen": 1805254656 }, { "epoch": 0.09, "learning_rate": 0.0004574741234052797, "loss": 0.0698, "theoretical_loss": 3.4885628910167155, "tokens_seen": 1805385728 }, { "epoch": 0.09, "learning_rate": 0.00045743400465377514, "loss": 0.0695, "theoretical_loss": 3.4885415230658885, "tokens_seen": 1805516800 }, { "epoch": 0.09, "learning_rate": 0.0004573938859022707, "loss": 0.0705, "theoretical_loss": 3.488520157100521, "tokens_seen": 1805647872 }, { "epoch": 0.09, "learning_rate": 0.0004573537671507663, "loss": 0.075, "theoretical_loss": 3.4884987931202844, "tokens_seen": 1805778944 }, { "epoch": 0.09, "learning_rate": 0.0004573136483992618, "loss": 0.0691, "theoretical_loss": 3.4884774311248505, "tokens_seen": 1805910016 }, { "epoch": 0.09, "learning_rate": 0.00045727352964775736, "loss": 0.0701, "theoretical_loss": 3.488456071113891, "tokens_seen": 1806041088 }, { "epoch": 0.09, "learning_rate": 0.0004572334108962529, "loss": 0.0695, "theoretical_loss": 3.4884347130870768, "tokens_seen": 1806172160 }, { "epoch": 0.09, "learning_rate": 0.0004571932921447485, "loss": 0.0695, "theoretical_loss": 3.48841335704408, "tokens_seen": 1806303232 }, { "epoch": 0.09, "learning_rate": 0.000457153173393244, "loss": 0.0694, "theoretical_loss": 3.4883920029845727, "tokens_seen": 1806434304 }, { "epoch": 0.09, "learning_rate": 0.0004571130546417396, "loss": 0.0695, "theoretical_loss": 3.4883706509082266, "tokens_seen": 1806565376 }, { "epoch": 0.1, "learning_rate": 0.00045707293589023515, "loss": 0.0678, "theoretical_loss": 3.488349300814713, "tokens_seen": 1806696448 }, { "epoch": 0.1, "learning_rate": 0.0004570328171387306, "loss": 0.0757, "theoretical_loss": 3.488327952703705, "tokens_seen": 1806827520 }, { "epoch": 0.1, "learning_rate": 0.0004569926983872262, "loss": 0.0675, "theoretical_loss": 3.4883066065748745, "tokens_seen": 1806958592 }, { "epoch": 0.1, "learning_rate": 0.00045695257963572174, "loss": 0.0705, "theoretical_loss": 3.4882852624278935, "tokens_seen": 1807089664 }, { "epoch": 0.1, "objective/train/advantage_avg": 0.0006213452434167266, "objective/train/docs_used": 659298, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4757124185562134, "objective/train/original_loss": 1.4757122993469238, "objective/train/theoretical_loss": 3.488263920262434, "objective/train/tokens_used": 177745376, "objective/train/value_avg": -0.00957489013671875, "objective/train/value_loss": 0.00021776762150693685, "objective/train/value_max": -5.3882598876953125e-05, "objective/train/value_min": -0.295166015625, "objective/train/value_reward_corr": 0.7275385511592731, "objective/train/value_std": 0.0167694091796875, "objective/train/weight_avg": 1.000723123550415, "objective/train/weighted_lm_loss": 1.4764741659164429, "objective/train/weights_max": 1.1238499879837036, "objective/train/weights_min": 0.3681618273258209, "theoretical_loss": 3.488263920262434, "tokens_seen": 1807220736 }, { "epoch": 0.1, "learning_rate": 0.0004569124608842173, "loss": 0.0659, "theoretical_loss": 3.488263920262434, "tokens_seen": 1807220736 }, { "epoch": 0.1, "learning_rate": 0.0004568723421327128, "loss": 0.0729, "theoretical_loss": 3.4882425800781687, "tokens_seen": 1807351808 }, { "epoch": 0.1, "learning_rate": 0.0004568322233812084, "loss": 0.0699, "theoretical_loss": 3.4882212418747693, "tokens_seen": 1807482880 }, { "epoch": 0.1, "learning_rate": 0.00045679210462970396, "loss": 0.07, "theoretical_loss": 3.4881999056519097, "tokens_seen": 1807613952 }, { "epoch": 0.1, "learning_rate": 0.0004567519858781995, "loss": 0.067, "theoretical_loss": 3.4881785714092617, "tokens_seen": 1807745024 }, { "epoch": 0.1, "learning_rate": 0.00045671186712669504, "loss": 0.0709, "theoretical_loss": 3.4881572391464974, "tokens_seen": 1807876096 }, { "epoch": 0.1, "learning_rate": 0.0004566717483751906, "loss": 0.0683, "theoretical_loss": 3.48813590886329, "tokens_seen": 1808007168 }, { "epoch": 0.1, "learning_rate": 0.0004566316296236861, "loss": 0.0695, "theoretical_loss": 3.488114580559313, "tokens_seen": 1808138240 }, { "epoch": 0.1, "learning_rate": 0.00045659151087218164, "loss": 0.0666, "theoretical_loss": 3.488093254234238, "tokens_seen": 1808269312 }, { "epoch": 0.1, "learning_rate": 0.0004565513921206772, "loss": 0.0715, "theoretical_loss": 3.488071929887739, "tokens_seen": 1808400384 }, { "epoch": 0.1, "learning_rate": 0.0004565112733691728, "loss": 0.0722, "theoretical_loss": 3.488050607519489, "tokens_seen": 1808531456 }, { "epoch": 0.1, "learning_rate": 0.0004564711546176683, "loss": 0.0665, "theoretical_loss": 3.4880292871291605, "tokens_seen": 1808662528 }, { "epoch": 0.1, "learning_rate": 0.00045643103586616386, "loss": 0.0725, "theoretical_loss": 3.4880079687164267, "tokens_seen": 1808793600 }, { "epoch": 0.1, "learning_rate": 0.00045639091711465943, "loss": 0.069, "theoretical_loss": 3.487986652280962, "tokens_seen": 1808924672 }, { "epoch": 0.1, "learning_rate": 0.00045635079836315494, "loss": 0.0709, "theoretical_loss": 3.4879653378224384, "tokens_seen": 1809055744 }, { "epoch": 0.1, "learning_rate": 0.0004563106796116505, "loss": 0.0701, "theoretical_loss": 3.4879440253405303, "tokens_seen": 1809186816 }, { "epoch": 0.1, "learning_rate": 0.0004562705608601461, "loss": 0.069, "theoretical_loss": 3.4879227148349106, "tokens_seen": 1809317888 }, { "epoch": 0.1, "learning_rate": 0.00045623044210864154, "loss": 0.0669, "theoretical_loss": 3.4879014063052534, "tokens_seen": 1809448960 }, { "epoch": 0.1, "learning_rate": 0.0004561903233571371, "loss": 0.0694, "theoretical_loss": 3.487880099751232, "tokens_seen": 1809580032 }, { "epoch": 0.1, "learning_rate": 0.0004561502046056327, "loss": 0.0727, "theoretical_loss": 3.48785879517252, "tokens_seen": 1809711104 }, { "epoch": 0.1, "learning_rate": 0.00045611008585412825, "loss": 0.0713, "theoretical_loss": 3.487837492568792, "tokens_seen": 1809842176 }, { "epoch": 0.1, "learning_rate": 0.00045606996710262376, "loss": 0.0685, "theoretical_loss": 3.4878161919397206, "tokens_seen": 1809973248 }, { "epoch": 0.1, "learning_rate": 0.00045602984835111933, "loss": 0.0711, "theoretical_loss": 3.487794893284981, "tokens_seen": 1810104320 }, { "epoch": 0.1, "learning_rate": 0.0004559897295996149, "loss": 0.0705, "theoretical_loss": 3.4877735966042467, "tokens_seen": 1810235392 }, { "epoch": 0.1, "learning_rate": 0.0004559496108481104, "loss": 0.0706, "theoretical_loss": 3.487752301897192, "tokens_seen": 1810366464 }, { "epoch": 0.1, "objective/train/advantage_avg": 0.0014958747196942568, "objective/train/docs_used": 660449, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.5550202131271362, "objective/train/original_loss": 1.5550203323364258, "objective/train/theoretical_loss": 3.487731009163491, "objective/train/tokens_used": 181022176, "objective/train/value_avg": -0.007114410400390625, "objective/train/value_loss": 0.000299563049338758, "objective/train/value_max": -3.737211227416992e-05, "objective/train/value_min": -0.50146484375, "objective/train/value_reward_corr": 0.5884558212379387, "objective/train/value_std": 0.0137481689453125, "objective/train/weight_avg": 1.001632571220398, "objective/train/weighted_lm_loss": 1.5582845211029053, "objective/train/weights_max": 1.5664833784103394, "objective/train/weights_min": 0.38498178124427795, "theoretical_loss": 3.487731009163491, "tokens_seen": 1810497536 }, { "epoch": 0.1, "learning_rate": 0.000455909492096606, "loss": 0.072, "theoretical_loss": 3.487731009163491, "tokens_seen": 1810497536 }, { "epoch": 0.1, "learning_rate": 0.00045586937334510155, "loss": 0.0704, "theoretical_loss": 3.487709718402818, "tokens_seen": 1810628608 }, { "epoch": 0.1, "learning_rate": 0.000455829254593597, "loss": 0.0729, "theoretical_loss": 3.4876884296148476, "tokens_seen": 1810759680 }, { "epoch": 0.1, "learning_rate": 0.0004557891358420926, "loss": 0.0684, "theoretical_loss": 3.4876671427992543, "tokens_seen": 1810890752 }, { "epoch": 0.1, "learning_rate": 0.00045574901709058814, "loss": 0.0692, "theoretical_loss": 3.4876458579557115, "tokens_seen": 1811021824 }, { "epoch": 0.1, "learning_rate": 0.0004557088983390837, "loss": 0.0663, "theoretical_loss": 3.4876245750838955, "tokens_seen": 1811152896 }, { "epoch": 0.1, "learning_rate": 0.0004556687795875792, "loss": 0.0706, "theoretical_loss": 3.48760329418348, "tokens_seen": 1811283968 }, { "epoch": 0.1, "learning_rate": 0.0004556286608360748, "loss": 0.0667, "theoretical_loss": 3.48758201525414, "tokens_seen": 1811415040 }, { "epoch": 0.1, "learning_rate": 0.00045558854208457036, "loss": 0.0717, "theoretical_loss": 3.4875607382955494, "tokens_seen": 1811546112 }, { "epoch": 0.1, "learning_rate": 0.0004555484233330659, "loss": 0.0744, "theoretical_loss": 3.487539463307385, "tokens_seen": 1811677184 }, { "epoch": 0.1, "learning_rate": 0.00045550830458156145, "loss": 0.0683, "theoretical_loss": 3.48751819028932, "tokens_seen": 1811808256 }, { "epoch": 0.1, "learning_rate": 0.000455468185830057, "loss": 0.0692, "theoretical_loss": 3.4874969192410306, "tokens_seen": 1811939328 }, { "epoch": 0.1, "learning_rate": 0.0004554280670785525, "loss": 0.0669, "theoretical_loss": 3.4874756501621915, "tokens_seen": 1812070400 }, { "epoch": 0.1, "learning_rate": 0.00045538794832704804, "loss": 0.0688, "theoretical_loss": 3.4874543830524782, "tokens_seen": 1812201472 }, { "epoch": 0.1, "learning_rate": 0.0004553478295755436, "loss": 0.0693, "theoretical_loss": 3.487433117911565, "tokens_seen": 1812332544 }, { "epoch": 0.1, "learning_rate": 0.0004553077108240392, "loss": 0.0699, "theoretical_loss": 3.487411854739128, "tokens_seen": 1812463616 }, { "epoch": 0.1, "learning_rate": 0.0004552675920725347, "loss": 0.0685, "theoretical_loss": 3.4873905935348435, "tokens_seen": 1812594688 }, { "epoch": 0.1, "learning_rate": 0.00045522747332103026, "loss": 0.0681, "theoretical_loss": 3.487369334298386, "tokens_seen": 1812725760 }, { "epoch": 0.1, "learning_rate": 0.00045518735456952583, "loss": 0.0704, "theoretical_loss": 3.4873480770294307, "tokens_seen": 1812856832 }, { "epoch": 0.1, "learning_rate": 0.00045514723581802134, "loss": 0.0727, "theoretical_loss": 3.4873268217276543, "tokens_seen": 1812987904 }, { "epoch": 0.1, "learning_rate": 0.0004551071170665169, "loss": 0.0724, "theoretical_loss": 3.487305568392732, "tokens_seen": 1813118976 }, { "epoch": 0.1, "learning_rate": 0.0004550669983150125, "loss": 0.0726, "theoretical_loss": 3.48728431702434, "tokens_seen": 1813250048 }, { "epoch": 0.1, "learning_rate": 0.00045502687956350794, "loss": 0.0686, "theoretical_loss": 3.4872630676221537, "tokens_seen": 1813381120 }, { "epoch": 0.1, "learning_rate": 0.0004549867608120035, "loss": 0.0685, "theoretical_loss": 3.4872418201858495, "tokens_seen": 1813512192 }, { "epoch": 0.1, "learning_rate": 0.0004549466420604991, "loss": 0.0738, "theoretical_loss": 3.487220574715103, "tokens_seen": 1813643264 }, { "epoch": 0.1, "objective/train/advantage_avg": 0.000771121762227267, "objective/train/docs_used": 661678, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.427276849746704, "objective/train/original_loss": 1.427276849746704, "objective/train/theoretical_loss": 3.487199331209591, "objective/train/tokens_used": 184298976, "objective/train/value_avg": -0.01129913330078125, "objective/train/value_loss": 0.000233200189541094, "objective/train/value_max": -4.947185516357422e-05, "objective/train/value_min": -0.415283203125, "objective/train/value_reward_corr": 0.8203111321541661, "objective/train/value_std": 0.0224761962890625, "objective/train/weight_avg": 1.0008854866027832, "objective/train/weighted_lm_loss": 1.427241325378418, "objective/train/weights_max": 1.2746552228927612, "objective/train/weights_min": 0.6502756476402283, "theoretical_loss": 3.487199331209591, "tokens_seen": 1813774336 }, { "epoch": 0.1, "learning_rate": 0.00045490652330899465, "loss": 0.0723, "theoretical_loss": 3.487199331209591, "tokens_seen": 1813774336 }, { "epoch": 0.1, "learning_rate": 0.00045486640455749016, "loss": 0.0693, "theoretical_loss": 3.487178089668989, "tokens_seen": 1813905408 }, { "epoch": 0.1, "learning_rate": 0.00045482628580598573, "loss": 0.0717, "theoretical_loss": 3.487156850092974, "tokens_seen": 1814036480 }, { "epoch": 0.1, "learning_rate": 0.0004547861670544813, "loss": 0.0681, "theoretical_loss": 3.4871356124812216, "tokens_seen": 1814167552 }, { "epoch": 0.1, "learning_rate": 0.0004547460483029768, "loss": 0.0696, "theoretical_loss": 3.487114376833409, "tokens_seen": 1814298624 }, { "epoch": 0.1, "learning_rate": 0.0004547059295514724, "loss": 0.0736, "theoretical_loss": 3.4870931431492123, "tokens_seen": 1814429696 }, { "epoch": 0.1, "learning_rate": 0.00045466581079996795, "loss": 0.0666, "theoretical_loss": 3.487071911428308, "tokens_seen": 1814560768 }, { "epoch": 0.1, "learning_rate": 0.0004546256920484634, "loss": 0.0721, "theoretical_loss": 3.4870506816703735, "tokens_seen": 1814691840 }, { "epoch": 0.1, "learning_rate": 0.000454585573296959, "loss": 0.0735, "theoretical_loss": 3.487029453875085, "tokens_seen": 1814822912 }, { "epoch": 0.1, "learning_rate": 0.00045454545454545455, "loss": 0.0711, "theoretical_loss": 3.487008228042119, "tokens_seen": 1814953984 }, { "epoch": 0.1, "learning_rate": 0.0004545053357939501, "loss": 0.0696, "theoretical_loss": 3.4869870041711524, "tokens_seen": 1815085056 }, { "epoch": 0.1, "learning_rate": 0.00045446521704244563, "loss": 0.0722, "theoretical_loss": 3.486965782261863, "tokens_seen": 1815216128 }, { "epoch": 0.1, "learning_rate": 0.0004544250982909412, "loss": 0.0704, "theoretical_loss": 3.4869445623139272, "tokens_seen": 1815347200 }, { "epoch": 0.1, "learning_rate": 0.00045438497953943676, "loss": 0.0688, "theoretical_loss": 3.4869233443270224, "tokens_seen": 1815478272 }, { "epoch": 0.1, "learning_rate": 0.0004543448607879323, "loss": 0.0664, "theoretical_loss": 3.4869021283008257, "tokens_seen": 1815609344 }, { "epoch": 0.1, "learning_rate": 0.00045430474203642785, "loss": 0.0704, "theoretical_loss": 3.4868809142350146, "tokens_seen": 1815740416 }, { "epoch": 0.1, "learning_rate": 0.0004542646232849234, "loss": 0.0698, "theoretical_loss": 3.4868597021292658, "tokens_seen": 1815871488 }, { "epoch": 0.1, "learning_rate": 0.00045422450453341893, "loss": 0.0661, "theoretical_loss": 3.486838491983258, "tokens_seen": 1816002560 }, { "epoch": 0.1, "learning_rate": 0.00045418438578191444, "loss": 0.0711, "theoretical_loss": 3.4868172837966673, "tokens_seen": 1816133632 }, { "epoch": 0.1, "learning_rate": 0.00045414426703041, "loss": 0.0685, "theoretical_loss": 3.4867960775691724, "tokens_seen": 1816264704 }, { "epoch": 0.1, "learning_rate": 0.0004541041482789056, "loss": 0.0672, "theoretical_loss": 3.48677487330045, "tokens_seen": 1816395776 }, { "epoch": 0.1, "learning_rate": 0.0004540640295274011, "loss": 0.0693, "theoretical_loss": 3.4867536709901787, "tokens_seen": 1816526848 }, { "epoch": 0.1, "learning_rate": 0.00045402391077589666, "loss": 0.072, "theoretical_loss": 3.486732470638036, "tokens_seen": 1816657920 }, { "epoch": 0.1, "learning_rate": 0.00045398379202439223, "loss": 0.0676, "theoretical_loss": 3.4867112722436997, "tokens_seen": 1816788992 }, { "epoch": 0.1, "learning_rate": 0.00045394367327288775, "loss": 0.0699, "theoretical_loss": 3.4866900758068478, "tokens_seen": 1816920064 }, { "epoch": 0.1, "objective/train/advantage_avg": 6.047619172022678e-05, "objective/train/docs_used": 662711, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3563425540924072, "objective/train/original_loss": 1.3563426733016968, "objective/train/theoretical_loss": 3.4866688813271587, "objective/train/tokens_used": 187575776, "objective/train/value_avg": -0.00732421875, "objective/train/value_loss": 0.000267098774202168, "objective/train/value_max": -6.812810897827148e-05, "objective/train/value_min": -0.263671875, "objective/train/value_reward_corr": 0.7247055847026068, "objective/train/value_std": 0.0151824951171875, "objective/train/weight_avg": 1.000180959701538, "objective/train/weighted_lm_loss": 1.356947422027588, "objective/train/weights_max": 1.163423776626587, "objective/train/weights_min": 0.36970973014831543, "theoretical_loss": 3.4866688813271587, "tokens_seen": 1817051136 }, { "epoch": 0.1, "learning_rate": 0.0004539035545213833, "loss": 0.0686, "theoretical_loss": 3.4866688813271587, "tokens_seen": 1817051136 }, { "epoch": 0.1, "learning_rate": 0.0004538634357698789, "loss": 0.0684, "theoretical_loss": 3.4866476888043096, "tokens_seen": 1817182208 }, { "epoch": 0.1, "learning_rate": 0.0004538233170183744, "loss": 0.0715, "theoretical_loss": 3.48662649823798, "tokens_seen": 1817313280 }, { "epoch": 0.1, "learning_rate": 0.0004537831982668699, "loss": 0.0672, "theoretical_loss": 3.486605309627847, "tokens_seen": 1817444352 }, { "epoch": 0.1, "learning_rate": 0.0004537430795153655, "loss": 0.0671, "theoretical_loss": 3.4865841229735897, "tokens_seen": 1817575424 }, { "epoch": 0.1, "learning_rate": 0.00045370296076386105, "loss": 0.0715, "theoretical_loss": 3.4865629382748864, "tokens_seen": 1817706496 }, { "epoch": 0.1, "learning_rate": 0.00045366284201235656, "loss": 0.0702, "theoretical_loss": 3.486541755531415, "tokens_seen": 1817837568 }, { "epoch": 0.1, "learning_rate": 0.00045362272326085213, "loss": 0.0696, "theoretical_loss": 3.486520574742855, "tokens_seen": 1817968640 }, { "epoch": 0.1, "learning_rate": 0.0004535826045093477, "loss": 0.0664, "theoretical_loss": 3.4864993959088846, "tokens_seen": 1818099712 }, { "epoch": 0.1, "learning_rate": 0.0004535424857578432, "loss": 0.0723, "theoretical_loss": 3.486478219029183, "tokens_seen": 1818230784 }, { "epoch": 0.1, "learning_rate": 0.0004535023670063388, "loss": 0.0682, "theoretical_loss": 3.486457044103428, "tokens_seen": 1818361856 }, { "epoch": 0.1, "learning_rate": 0.00045346224825483435, "loss": 0.0679, "theoretical_loss": 3.4864358711312997, "tokens_seen": 1818492928 }, { "epoch": 0.1, "learning_rate": 0.00045342212950332986, "loss": 0.0707, "theoretical_loss": 3.486414700112476, "tokens_seen": 1818624000 }, { "epoch": 0.1, "learning_rate": 0.0004533820107518254, "loss": 0.0659, "theoretical_loss": 3.4863935310466365, "tokens_seen": 1818755072 }, { "epoch": 0.1, "learning_rate": 0.00045334189200032095, "loss": 0.0699, "theoretical_loss": 3.4863723639334596, "tokens_seen": 1818886144 }, { "epoch": 0.1, "learning_rate": 0.0004533017732488165, "loss": 0.074, "theoretical_loss": 3.486351198772626, "tokens_seen": 1819017216 }, { "epoch": 0.1, "learning_rate": 0.00045326165449731203, "loss": 0.0691, "theoretical_loss": 3.486330035563814, "tokens_seen": 1819148288 }, { "epoch": 0.1, "learning_rate": 0.0004532215357458076, "loss": 0.0722, "theoretical_loss": 3.4863088743067023, "tokens_seen": 1819279360 }, { "epoch": 0.1, "learning_rate": 0.00045318141699430317, "loss": 0.0688, "theoretical_loss": 3.4862877150009712, "tokens_seen": 1819410432 }, { "epoch": 0.1, "learning_rate": 0.0004531412982427987, "loss": 0.0732, "theoretical_loss": 3.4862665576463003, "tokens_seen": 1819541504 }, { "epoch": 0.1, "learning_rate": 0.00045310117949129425, "loss": 0.0719, "theoretical_loss": 3.4862454022423686, "tokens_seen": 1819672576 }, { "epoch": 0.1, "learning_rate": 0.0004530610607397898, "loss": 0.0653, "theoretical_loss": 3.4862242487888566, "tokens_seen": 1819803648 }, { "epoch": 0.1, "learning_rate": 0.00045302094198828533, "loss": 0.0722, "theoretical_loss": 3.486203097285443, "tokens_seen": 1819934720 }, { "epoch": 0.1, "learning_rate": 0.00045298082323678084, "loss": 0.0669, "theoretical_loss": 3.486181947731808, "tokens_seen": 1820065792 }, { "epoch": 0.1, "learning_rate": 0.0004529407044852764, "loss": 0.0721, "theoretical_loss": 3.4861608001276316, "tokens_seen": 1820196864 }, { "epoch": 0.1, "objective/train/advantage_avg": 0.00040559584158472717, "objective/train/docs_used": 663849, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2499654293060303, "objective/train/original_loss": 1.2499654293060303, "objective/train/theoretical_loss": 3.486139654472594, "objective/train/tokens_used": 190852576, "objective/train/value_avg": -0.00846099853515625, "objective/train/value_loss": 0.00024075692635960877, "objective/train/value_max": -4.363059997558594e-05, "objective/train/value_min": -0.4990234375, "objective/train/value_reward_corr": 0.6729218011699016, "objective/train/value_std": 0.0157928466796875, "objective/train/weight_avg": 1.0005170106887817, "objective/train/weighted_lm_loss": 1.249758243560791, "objective/train/weights_max": 1.2498384714126587, "objective/train/weights_min": 0.370623379945755, "theoretical_loss": 3.486139654472594, "tokens_seen": 1820327936 }, { "epoch": 0.1, "learning_rate": 0.000452900585733772, "loss": 0.0682, "theoretical_loss": 3.486139654472594, "tokens_seen": 1820327936 }, { "epoch": 0.1, "learning_rate": 0.0004528604669822675, "loss": 0.0669, "theoretical_loss": 3.4861185107663744, "tokens_seen": 1820459008 }, { "epoch": 0.1, "learning_rate": 0.00045282034823076306, "loss": 0.0693, "theoretical_loss": 3.486097369008654, "tokens_seen": 1820590080 }, { "epoch": 0.1, "learning_rate": 0.00045278022947925863, "loss": 0.0666, "theoretical_loss": 3.486076229199112, "tokens_seen": 1820721152 }, { "epoch": 0.1, "learning_rate": 0.00045274011072775415, "loss": 0.0718, "theoretical_loss": 3.4860550913374286, "tokens_seen": 1820852224 }, { "epoch": 0.1, "learning_rate": 0.0004526999919762497, "loss": 0.0682, "theoretical_loss": 3.4860339554232853, "tokens_seen": 1820983296 }, { "epoch": 0.1, "learning_rate": 0.0004526598732247453, "loss": 0.0727, "theoretical_loss": 3.4860128214563613, "tokens_seen": 1821114368 }, { "epoch": 0.1, "learning_rate": 0.0004526197544732408, "loss": 0.0712, "theoretical_loss": 3.4859916894363376, "tokens_seen": 1821245440 }, { "epoch": 0.1, "learning_rate": 0.0004525796357217363, "loss": 0.0669, "theoretical_loss": 3.4859705593628947, "tokens_seen": 1821376512 }, { "epoch": 0.1, "learning_rate": 0.0004525395169702319, "loss": 0.0713, "theoretical_loss": 3.485949431235713, "tokens_seen": 1821507584 }, { "epoch": 0.1, "learning_rate": 0.00045249939821872745, "loss": 0.0701, "theoretical_loss": 3.4859283050544736, "tokens_seen": 1821638656 }, { "epoch": 0.1, "learning_rate": 0.00045245927946722296, "loss": 0.0706, "theoretical_loss": 3.485907180818857, "tokens_seen": 1821769728 }, { "epoch": 0.1, "learning_rate": 0.00045241916071571853, "loss": 0.0705, "theoretical_loss": 3.4858860585285445, "tokens_seen": 1821900800 }, { "epoch": 0.1, "learning_rate": 0.0004523790419642141, "loss": 0.0683, "theoretical_loss": 3.4858649381832163, "tokens_seen": 1822031872 }, { "epoch": 0.1, "learning_rate": 0.00045233892321270967, "loss": 0.0715, "theoretical_loss": 3.485843819782554, "tokens_seen": 1822162944 }, { "epoch": 0.1, "learning_rate": 0.0004522988044612052, "loss": 0.0716, "theoretical_loss": 3.485822703326238, "tokens_seen": 1822294016 }, { "epoch": 0.1, "learning_rate": 0.00045225868570970075, "loss": 0.0702, "theoretical_loss": 3.48580158881395, "tokens_seen": 1822425088 }, { "epoch": 0.1, "learning_rate": 0.0004522185669581963, "loss": 0.0698, "theoretical_loss": 3.4857804762453712, "tokens_seen": 1822556160 }, { "epoch": 0.1, "learning_rate": 0.0004521784482066918, "loss": 0.0686, "theoretical_loss": 3.4857593656201833, "tokens_seen": 1822687232 }, { "epoch": 0.1, "learning_rate": 0.00045213832945518735, "loss": 0.0713, "theoretical_loss": 3.485738256938067, "tokens_seen": 1822818304 }, { "epoch": 0.1, "learning_rate": 0.0004520982107036829, "loss": 0.0699, "theoretical_loss": 3.4857171501987034, "tokens_seen": 1822949376 }, { "epoch": 0.1, "learning_rate": 0.00045205809195217843, "loss": 0.0738, "theoretical_loss": 3.4856960454017756, "tokens_seen": 1823080448 }, { "epoch": 0.11, "learning_rate": 0.000452017973200674, "loss": 0.0695, "theoretical_loss": 3.4856749425469635, "tokens_seen": 1823211520 }, { "epoch": 0.11, "learning_rate": 0.00045197785444916957, "loss": 0.065, "theoretical_loss": 3.4856538416339493, "tokens_seen": 1823342592 }, { "epoch": 0.11, "learning_rate": 0.00045193773569766514, "loss": 0.07, "theoretical_loss": 3.485632742662416, "tokens_seen": 1823473664 }, { "epoch": 0.11, "objective/train/advantage_avg": 0.0009314012131653726, "objective/train/docs_used": 664954, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4637941122055054, "objective/train/original_loss": 1.4637939929962158, "objective/train/theoretical_loss": 3.4856116456320434, "objective/train/tokens_used": 194129376, "objective/train/value_avg": -0.00765228271484375, "objective/train/value_loss": 0.0005327301914803684, "objective/train/value_max": -7.033348083496094e-05, "objective/train/value_min": -0.74951171875, "objective/train/value_reward_corr": 0.6373387025451401, "objective/train/value_std": 0.0185089111328125, "objective/train/weight_avg": 1.0011636018753052, "objective/train/weighted_lm_loss": 1.4662120342254639, "objective/train/weights_max": 1.6294406652450562, "objective/train/weights_min": 0.37706345319747925, "theoretical_loss": 3.4856116456320434, "tokens_seen": 1823604736 }, { "epoch": 0.11, "learning_rate": 0.00045189761694616065, "loss": 0.0706, "theoretical_loss": 3.4856116456320434, "tokens_seen": 1823604736 }, { "epoch": 0.11, "learning_rate": 0.0004518574981946562, "loss": 0.0738, "theoretical_loss": 3.4855905505425144, "tokens_seen": 1823735808 }, { "epoch": 0.11, "learning_rate": 0.0004518173794431518, "loss": 0.0745, "theoretical_loss": 3.4855694573935114, "tokens_seen": 1823866880 }, { "epoch": 0.11, "learning_rate": 0.00045177726069164725, "loss": 0.0691, "theoretical_loss": 3.485548366184716, "tokens_seen": 1823997952 }, { "epoch": 0.11, "learning_rate": 0.0004517371419401428, "loss": 0.0681, "theoretical_loss": 3.4855272769158097, "tokens_seen": 1824129024 }, { "epoch": 0.11, "learning_rate": 0.0004516970231886384, "loss": 0.0701, "theoretical_loss": 3.4855061895864763, "tokens_seen": 1824260096 }, { "epoch": 0.11, "learning_rate": 0.0004516569044371339, "loss": 0.0707, "theoretical_loss": 3.485485104196396, "tokens_seen": 1824391168 }, { "epoch": 0.11, "learning_rate": 0.00045161678568562947, "loss": 0.0672, "theoretical_loss": 3.485464020745253, "tokens_seen": 1824522240 }, { "epoch": 0.11, "learning_rate": 0.00045157666693412503, "loss": 0.0716, "theoretical_loss": 3.4854429392327293, "tokens_seen": 1824653312 }, { "epoch": 0.11, "learning_rate": 0.0004515365481826206, "loss": 0.0679, "theoretical_loss": 3.4854218596585067, "tokens_seen": 1824784384 }, { "epoch": 0.11, "learning_rate": 0.0004514964294311161, "loss": 0.0682, "theoretical_loss": 3.4854007820222686, "tokens_seen": 1824915456 }, { "epoch": 0.11, "learning_rate": 0.0004514563106796117, "loss": 0.067, "theoretical_loss": 3.485379706323697, "tokens_seen": 1825046528 }, { "epoch": 0.11, "learning_rate": 0.00045141619192810725, "loss": 0.0693, "theoretical_loss": 3.4853586325624746, "tokens_seen": 1825177600 }, { "epoch": 0.11, "learning_rate": 0.0004513760731766027, "loss": 0.0686, "theoretical_loss": 3.4853375607382846, "tokens_seen": 1825308672 }, { "epoch": 0.11, "learning_rate": 0.0004513359544250983, "loss": 0.0685, "theoretical_loss": 3.48531649085081, "tokens_seen": 1825439744 }, { "epoch": 0.11, "learning_rate": 0.00045129583567359385, "loss": 0.065, "theoretical_loss": 3.4852954228997337, "tokens_seen": 1825570816 }, { "epoch": 0.11, "learning_rate": 0.00045125571692208936, "loss": 0.0667, "theoretical_loss": 3.485274356884738, "tokens_seen": 1825701888 }, { "epoch": 0.11, "learning_rate": 0.00045121559817058493, "loss": 0.0685, "theoretical_loss": 3.485253292805507, "tokens_seen": 1825832960 }, { "epoch": 0.11, "learning_rate": 0.0004511754794190805, "loss": 0.0719, "theoretical_loss": 3.4852322306617234, "tokens_seen": 1825964032 }, { "epoch": 0.11, "learning_rate": 0.00045113536066757607, "loss": 0.0704, "theoretical_loss": 3.48521117045307, "tokens_seen": 1826095104 }, { "epoch": 0.11, "learning_rate": 0.0004510952419160716, "loss": 0.0744, "theoretical_loss": 3.4851901121792315, "tokens_seen": 1826226176 }, { "epoch": 0.11, "learning_rate": 0.00045105512316456715, "loss": 0.0705, "theoretical_loss": 3.4851690558398896, "tokens_seen": 1826357248 }, { "epoch": 0.11, "learning_rate": 0.0004510150044130627, "loss": 0.0667, "theoretical_loss": 3.485148001434729, "tokens_seen": 1826488320 }, { "epoch": 0.11, "learning_rate": 0.0004509748856615582, "loss": 0.0659, "theoretical_loss": 3.4851269489634324, "tokens_seen": 1826619392 }, { "epoch": 0.11, "learning_rate": 0.00045093476691005375, "loss": 0.0684, "theoretical_loss": 3.4851058984256844, "tokens_seen": 1826750464 }, { "epoch": 0.11, "objective/train/advantage_avg": -0.00014127422764431685, "objective/train/docs_used": 666029, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.377249002456665, "objective/train/original_loss": 1.377248764038086, "objective/train/theoretical_loss": 3.4850848498211677, "objective/train/tokens_used": 197406176, "objective/train/value_avg": -0.0073699951171875, "objective/train/value_loss": 0.00016467536624986678, "objective/train/value_max": -3.427267074584961e-05, "objective/train/value_min": -0.276611328125, "objective/train/value_reward_corr": 0.7656492545804316, "objective/train/value_std": 0.01450347900390625, "objective/train/weight_avg": 0.9999366402626038, "objective/train/weighted_lm_loss": 1.377786636352539, "objective/train/weights_max": 1.224328637123108, "objective/train/weights_min": 0.40050098299980164, "theoretical_loss": 3.4850848498211677, "tokens_seen": 1826881536 }, { "epoch": 0.11, "learning_rate": 0.0004508946481585493, "loss": 0.0711, "theoretical_loss": 3.4850848498211677, "tokens_seen": 1826881536 }, { "epoch": 0.11, "learning_rate": 0.00045085452940704483, "loss": 0.0661, "theoretical_loss": 3.485063803149567, "tokens_seen": 1827012608 }, { "epoch": 0.11, "learning_rate": 0.0004508144106555404, "loss": 0.0671, "theoretical_loss": 3.4850427584105654, "tokens_seen": 1827143680 }, { "epoch": 0.11, "learning_rate": 0.00045077429190403597, "loss": 0.0707, "theoretical_loss": 3.4850217156038474, "tokens_seen": 1827274752 }, { "epoch": 0.11, "learning_rate": 0.00045073417315253154, "loss": 0.0719, "theoretical_loss": 3.4850006747290965, "tokens_seen": 1827405824 }, { "epoch": 0.11, "learning_rate": 0.00045069405440102705, "loss": 0.0663, "theoretical_loss": 3.484979635785997, "tokens_seen": 1827536896 }, { "epoch": 0.11, "learning_rate": 0.0004506539356495226, "loss": 0.069, "theoretical_loss": 3.4849585987742326, "tokens_seen": 1827667968 }, { "epoch": 0.11, "learning_rate": 0.0004506138168980182, "loss": 0.0648, "theoretical_loss": 3.4849375636934887, "tokens_seen": 1827799040 }, { "epoch": 0.11, "learning_rate": 0.00045057369814651365, "loss": 0.0708, "theoretical_loss": 3.4849165305434484, "tokens_seen": 1827930112 }, { "epoch": 0.11, "learning_rate": 0.0004505335793950092, "loss": 0.0671, "theoretical_loss": 3.4848954993237964, "tokens_seen": 1828061184 }, { "epoch": 0.11, "learning_rate": 0.0004504934606435048, "loss": 0.0649, "theoretical_loss": 3.4848744700342174, "tokens_seen": 1828192256 }, { "epoch": 0.11, "learning_rate": 0.0004504533418920003, "loss": 0.0682, "theoretical_loss": 3.484853442674396, "tokens_seen": 1828323328 }, { "epoch": 0.11, "learning_rate": 0.00045041322314049587, "loss": 0.0679, "theoretical_loss": 3.484832417244016, "tokens_seen": 1828454400 }, { "epoch": 0.11, "learning_rate": 0.00045037310438899143, "loss": 0.0705, "theoretical_loss": 3.484811393742763, "tokens_seen": 1828585472 }, { "epoch": 0.11, "learning_rate": 0.000450332985637487, "loss": 0.0678, "theoretical_loss": 3.484790372170321, "tokens_seen": 1828716544 }, { "epoch": 0.11, "learning_rate": 0.0004502928668859825, "loss": 0.0727, "theoretical_loss": 3.4847693525263757, "tokens_seen": 1828847616 }, { "epoch": 0.11, "learning_rate": 0.0004502527481344781, "loss": 0.07, "theoretical_loss": 3.484748334810611, "tokens_seen": 1828978688 }, { "epoch": 0.11, "learning_rate": 0.00045021262938297365, "loss": 0.0693, "theoretical_loss": 3.4847273190227126, "tokens_seen": 1829109760 }, { "epoch": 0.11, "learning_rate": 0.0004501725106314691, "loss": 0.0727, "theoretical_loss": 3.484706305162365, "tokens_seen": 1829240832 }, { "epoch": 0.11, "learning_rate": 0.0004501323918799647, "loss": 0.0706, "theoretical_loss": 3.484685293229254, "tokens_seen": 1829371904 }, { "epoch": 0.11, "learning_rate": 0.00045009227312846025, "loss": 0.0684, "theoretical_loss": 3.484664283223064, "tokens_seen": 1829502976 }, { "epoch": 0.11, "learning_rate": 0.00045005215437695577, "loss": 0.0698, "theoretical_loss": 3.4846432751434806, "tokens_seen": 1829634048 }, { "epoch": 0.11, "learning_rate": 0.00045001203562545133, "loss": 0.0707, "theoretical_loss": 3.484622268990189, "tokens_seen": 1829765120 }, { "epoch": 0.11, "learning_rate": 0.0004499719168739469, "loss": 0.0639, "theoretical_loss": 3.4846012647628752, "tokens_seen": 1829896192 }, { "epoch": 0.11, "learning_rate": 0.00044993179812244247, "loss": 0.0712, "theoretical_loss": 3.4845802624612237, "tokens_seen": 1830027264 }, { "epoch": 0.11, "objective/train/advantage_avg": -0.0008778413757681847, "objective/train/docs_used": 667256, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4548566341400146, "objective/train/original_loss": 1.4548566341400146, "objective/train/theoretical_loss": 3.484559262084921, "objective/train/tokens_used": 200682976, "objective/train/value_avg": -0.00989532470703125, "objective/train/value_loss": 0.00033685576636344194, "objective/train/value_max": -3.045797348022461e-05, "objective/train/value_min": -0.267578125, "objective/train/value_reward_corr": 0.6460258687417859, "objective/train/value_std": 0.0149078369140625, "objective/train/weight_avg": 0.9992766380310059, "objective/train/weighted_lm_loss": 1.4535452127456665, "objective/train/weights_max": 1.3067957162857056, "objective/train/weights_min": 0.3875633478164673, "theoretical_loss": 3.484559262084921, "tokens_seen": 1830158336 }, { "epoch": 0.11, "learning_rate": 0.000449891679370938, "loss": 0.0717, "theoretical_loss": 3.484559262084921, "tokens_seen": 1830158336 }, { "epoch": 0.11, "learning_rate": 0.00044985156061943355, "loss": 0.0697, "theoretical_loss": 3.484538263633652, "tokens_seen": 1830289408 }, { "epoch": 0.11, "learning_rate": 0.0004498114418679291, "loss": 0.0695, "theoretical_loss": 3.4845172671071025, "tokens_seen": 1830420480 }, { "epoch": 0.11, "learning_rate": 0.0004497713231164246, "loss": 0.0697, "theoretical_loss": 3.484496272504959, "tokens_seen": 1830551552 }, { "epoch": 0.11, "learning_rate": 0.00044973120436492015, "loss": 0.0674, "theoretical_loss": 3.4844752798269063, "tokens_seen": 1830682624 }, { "epoch": 0.11, "learning_rate": 0.0004496910856134157, "loss": 0.0712, "theoretical_loss": 3.484454289072631, "tokens_seen": 1830813696 }, { "epoch": 0.11, "learning_rate": 0.0004496509668619113, "loss": 0.0719, "theoretical_loss": 3.484433300241819, "tokens_seen": 1830944768 }, { "epoch": 0.11, "learning_rate": 0.0004496108481104068, "loss": 0.0715, "theoretical_loss": 3.4844123133341567, "tokens_seen": 1831075840 }, { "epoch": 0.11, "learning_rate": 0.00044957072935890237, "loss": 0.0682, "theoretical_loss": 3.4843913283493295, "tokens_seen": 1831206912 }, { "epoch": 0.11, "learning_rate": 0.00044953061060739794, "loss": 0.0678, "theoretical_loss": 3.484370345287024, "tokens_seen": 1831337984 }, { "epoch": 0.11, "learning_rate": 0.00044949049185589345, "loss": 0.0703, "theoretical_loss": 3.4843493641469263, "tokens_seen": 1831469056 }, { "epoch": 0.11, "learning_rate": 0.000449450373104389, "loss": 0.069, "theoretical_loss": 3.484328384928723, "tokens_seen": 1831600128 }, { "epoch": 0.11, "learning_rate": 0.0004494102543528846, "loss": 0.0688, "theoretical_loss": 3.484307407632101, "tokens_seen": 1831731200 }, { "epoch": 0.11, "learning_rate": 0.00044937013560138005, "loss": 0.07, "theoretical_loss": 3.484286432256745, "tokens_seen": 1831862272 }, { "epoch": 0.11, "learning_rate": 0.0004493300168498756, "loss": 0.0691, "theoretical_loss": 3.484265458802344, "tokens_seen": 1831993344 }, { "epoch": 0.11, "learning_rate": 0.0004492898980983712, "loss": 0.0707, "theoretical_loss": 3.484244487268583, "tokens_seen": 1832124416 }, { "epoch": 0.11, "learning_rate": 0.00044924977934686675, "loss": 0.0687, "theoretical_loss": 3.4842235176551495, "tokens_seen": 1832255488 }, { "epoch": 0.11, "learning_rate": 0.00044920966059536227, "loss": 0.0711, "theoretical_loss": 3.4842025499617297, "tokens_seen": 1832386560 }, { "epoch": 0.11, "learning_rate": 0.00044916954184385784, "loss": 0.0713, "theoretical_loss": 3.4841815841880113, "tokens_seen": 1832517632 }, { "epoch": 0.11, "learning_rate": 0.0004491294230923534, "loss": 0.0706, "theoretical_loss": 3.4841606203336806, "tokens_seen": 1832648704 }, { "epoch": 0.11, "learning_rate": 0.0004490893043408489, "loss": 0.0703, "theoretical_loss": 3.4841396583984245, "tokens_seen": 1832779776 }, { "epoch": 0.11, "learning_rate": 0.0004490491855893445, "loss": 0.0718, "theoretical_loss": 3.4841186983819306, "tokens_seen": 1832910848 }, { "epoch": 0.11, "learning_rate": 0.00044900906683784006, "loss": 0.073, "theoretical_loss": 3.484097740283886, "tokens_seen": 1833041920 }, { "epoch": 0.11, "learning_rate": 0.0004489689480863355, "loss": 0.0708, "theoretical_loss": 3.4840767841039777, "tokens_seen": 1833172992 }, { "epoch": 0.11, "learning_rate": 0.0004489288293348311, "loss": 0.0705, "theoretical_loss": 3.4840558298418935, "tokens_seen": 1833304064 }, { "epoch": 0.11, "objective/train/advantage_avg": 0.0002795106265693903, "objective/train/docs_used": 668449, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3471722602844238, "objective/train/original_loss": 1.3471722602844238, "objective/train/theoretical_loss": 3.48403487749732, "objective/train/tokens_used": 203959776, "objective/train/value_avg": -0.0097503662109375, "objective/train/value_loss": 0.00032171307248063385, "objective/train/value_max": -5.3048133850097656e-05, "objective/train/value_min": -0.73876953125, "objective/train/value_reward_corr": 0.7435515600066553, "objective/train/value_std": 0.0213165283203125, "objective/train/weight_avg": 1.0004255771636963, "objective/train/weighted_lm_loss": 1.3468366861343384, "objective/train/weights_max": 1.4862264394760132, "objective/train/weights_min": 0.3753184378147125, "theoretical_loss": 3.48403487749732, "tokens_seen": 1833435136 }, { "epoch": 0.11, "learning_rate": 0.00044888871058332665, "loss": 0.0721, "theoretical_loss": 3.48403487749732, "tokens_seen": 1833435136 }, { "epoch": 0.11, "learning_rate": 0.0004488485918318222, "loss": 0.0686, "theoretical_loss": 3.4840139270699453, "tokens_seen": 1833566208 }, { "epoch": 0.11, "learning_rate": 0.00044880847308031773, "loss": 0.0715, "theoretical_loss": 3.4839929785594563, "tokens_seen": 1833697280 }, { "epoch": 0.11, "learning_rate": 0.0004487683543288133, "loss": 0.068, "theoretical_loss": 3.483972031965542, "tokens_seen": 1833828352 }, { "epoch": 0.11, "learning_rate": 0.00044872823557730887, "loss": 0.0711, "theoretical_loss": 3.4839510872878883, "tokens_seen": 1833959424 }, { "epoch": 0.11, "learning_rate": 0.0004486881168258044, "loss": 0.0713, "theoretical_loss": 3.483930144526184, "tokens_seen": 1834090496 }, { "epoch": 0.11, "learning_rate": 0.00044864799807429995, "loss": 0.0727, "theoretical_loss": 3.483909203680117, "tokens_seen": 1834221568 }, { "epoch": 0.11, "learning_rate": 0.0004486078793227955, "loss": 0.0699, "theoretical_loss": 3.4838882647493747, "tokens_seen": 1834352640 }, { "epoch": 0.11, "learning_rate": 0.000448567760571291, "loss": 0.0677, "theoretical_loss": 3.483867327733645, "tokens_seen": 1834483712 }, { "epoch": 0.11, "learning_rate": 0.00044852764181978655, "loss": 0.0694, "theoretical_loss": 3.483846392632617, "tokens_seen": 1834614784 }, { "epoch": 0.11, "learning_rate": 0.0004484875230682821, "loss": 0.0685, "theoretical_loss": 3.4838254594459777, "tokens_seen": 1834745856 }, { "epoch": 0.11, "learning_rate": 0.0004484474043167777, "loss": 0.0711, "theoretical_loss": 3.4838045281734153, "tokens_seen": 1834876928 }, { "epoch": 0.11, "learning_rate": 0.0004484072855652732, "loss": 0.073, "theoretical_loss": 3.483783598814619, "tokens_seen": 1835008000 }, { "epoch": 0.11, "learning_rate": 0.00044836716681376877, "loss": 0.0671, "theoretical_loss": 3.4837626713692766, "tokens_seen": 1835139072 }, { "epoch": 0.11, "learning_rate": 0.00044832704806226434, "loss": 0.0708, "theoretical_loss": 3.4837417458370767, "tokens_seen": 1835270144 }, { "epoch": 0.11, "learning_rate": 0.00044828692931075985, "loss": 0.0754, "theoretical_loss": 3.483720822217707, "tokens_seen": 1835401216 }, { "epoch": 0.11, "learning_rate": 0.0004482468105592554, "loss": 0.072, "theoretical_loss": 3.483699900510857, "tokens_seen": 1835532288 }, { "epoch": 0.11, "learning_rate": 0.000448206691807751, "loss": 0.0705, "theoretical_loss": 3.483678980716215, "tokens_seen": 1835663360 }, { "epoch": 0.11, "learning_rate": 0.00044816657305624645, "loss": 0.0678, "theoretical_loss": 3.4836580628334697, "tokens_seen": 1835794432 }, { "epoch": 0.11, "learning_rate": 0.000448126454304742, "loss": 0.071, "theoretical_loss": 3.4836371468623097, "tokens_seen": 1835925504 }, { "epoch": 0.11, "learning_rate": 0.0004480863355532376, "loss": 0.0703, "theoretical_loss": 3.4836162328024245, "tokens_seen": 1836056576 }, { "epoch": 0.11, "learning_rate": 0.00044804621680173315, "loss": 0.0698, "theoretical_loss": 3.483595320653502, "tokens_seen": 1836187648 }, { "epoch": 0.11, "learning_rate": 0.00044800609805022867, "loss": 0.0677, "theoretical_loss": 3.4835744104152324, "tokens_seen": 1836318720 }, { "epoch": 0.11, "learning_rate": 0.00044796597929872424, "loss": 0.0666, "theoretical_loss": 3.483553502087304, "tokens_seen": 1836449792 }, { "epoch": 0.11, "learning_rate": 0.0004479258605472198, "loss": 0.0699, "theoretical_loss": 3.483532595669406, "tokens_seen": 1836580864 }, { "epoch": 0.11, "objective/train/advantage_avg": -0.0005379770300351083, "objective/train/docs_used": 669661, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3358169794082642, "objective/train/original_loss": 1.3358172178268433, "objective/train/theoretical_loss": 3.483511691161228, "objective/train/tokens_used": 207236576, "objective/train/value_avg": -0.007568359375, "objective/train/value_loss": 0.00023999014229048043, "objective/train/value_max": -4.5418739318847656e-05, "objective/train/value_min": -0.2333984375, "objective/train/value_reward_corr": 0.6807983235301688, "objective/train/value_std": 0.01238250732421875, "objective/train/weight_avg": 0.9995737671852112, "objective/train/weighted_lm_loss": 1.3349759578704834, "objective/train/weights_max": 1.2628846168518066, "objective/train/weights_min": 0.3838789761066437, "theoretical_loss": 3.483511691161228, "tokens_seen": 1836711936 }, { "epoch": 0.11, "learning_rate": 0.0004478857417957153, "loss": 0.0652, "theoretical_loss": 3.483511691161228, "tokens_seen": 1836711936 }, { "epoch": 0.11, "learning_rate": 0.0004478456230442109, "loss": 0.069, "theoretical_loss": 3.4834907885624586, "tokens_seen": 1836843008 }, { "epoch": 0.11, "learning_rate": 0.00044780550429270646, "loss": 0.0724, "theoretical_loss": 3.483469887872787, "tokens_seen": 1836974080 }, { "epoch": 0.11, "learning_rate": 0.0004477653855412019, "loss": 0.0693, "theoretical_loss": 3.4834489890919045, "tokens_seen": 1837105152 }, { "epoch": 0.11, "learning_rate": 0.0004477252667896975, "loss": 0.0691, "theoretical_loss": 3.4834280922194987, "tokens_seen": 1837236224 }, { "epoch": 0.11, "learning_rate": 0.00044768514803819305, "loss": 0.0723, "theoretical_loss": 3.48340719725526, "tokens_seen": 1837367296 }, { "epoch": 0.11, "learning_rate": 0.0004476450292866886, "loss": 0.0705, "theoretical_loss": 3.4833863041988775, "tokens_seen": 1837498368 }, { "epoch": 0.11, "learning_rate": 0.00044760491053518414, "loss": 0.0677, "theoretical_loss": 3.4833654130500413, "tokens_seen": 1837629440 }, { "epoch": 0.11, "learning_rate": 0.0004475647917836797, "loss": 0.0773, "theoretical_loss": 3.4833445238084417, "tokens_seen": 1837760512 }, { "epoch": 0.11, "learning_rate": 0.00044752467303217527, "loss": 0.0731, "theoretical_loss": 3.4833236364737674, "tokens_seen": 1837891584 }, { "epoch": 0.11, "learning_rate": 0.0004474845542806708, "loss": 0.0708, "theoretical_loss": 3.4833027510457093, "tokens_seen": 1838022656 }, { "epoch": 0.11, "learning_rate": 0.00044744443552916635, "loss": 0.0678, "theoretical_loss": 3.4832818675239574, "tokens_seen": 1838153728 }, { "epoch": 0.11, "learning_rate": 0.0004474043167776619, "loss": 0.0707, "theoretical_loss": 3.483260985908201, "tokens_seen": 1838284800 }, { "epoch": 0.11, "learning_rate": 0.0004473641980261574, "loss": 0.0686, "theoretical_loss": 3.483240106198131, "tokens_seen": 1838415872 }, { "epoch": 0.11, "learning_rate": 0.00044732407927465295, "loss": 0.0685, "theoretical_loss": 3.4832192283934376, "tokens_seen": 1838546944 }, { "epoch": 0.11, "learning_rate": 0.0004472839605231485, "loss": 0.0674, "theoretical_loss": 3.483198352493811, "tokens_seen": 1838678016 }, { "epoch": 0.11, "learning_rate": 0.0004472438417716441, "loss": 0.0719, "theoretical_loss": 3.483177478498941, "tokens_seen": 1838809088 }, { "epoch": 0.11, "learning_rate": 0.0004472037230201396, "loss": 0.0693, "theoretical_loss": 3.4831566064085187, "tokens_seen": 1838940160 }, { "epoch": 0.11, "learning_rate": 0.00044716360426863517, "loss": 0.0712, "theoretical_loss": 3.483135736222234, "tokens_seen": 1839071232 }, { "epoch": 0.11, "learning_rate": 0.00044712348551713074, "loss": 0.0731, "theoretical_loss": 3.483114867939779, "tokens_seen": 1839202304 }, { "epoch": 0.11, "learning_rate": 0.00044708336676562625, "loss": 0.0704, "theoretical_loss": 3.4830940015608425, "tokens_seen": 1839333376 }, { "epoch": 0.11, "learning_rate": 0.0004470432480141218, "loss": 0.0719, "theoretical_loss": 3.4830731370851167, "tokens_seen": 1839464448 }, { "epoch": 0.11, "learning_rate": 0.0004470031292626174, "loss": 0.073, "theoretical_loss": 3.483052274512292, "tokens_seen": 1839595520 }, { "epoch": 0.12, "learning_rate": 0.00044696301051111285, "loss": 0.069, "theoretical_loss": 3.483031413842058, "tokens_seen": 1839726592 }, { "epoch": 0.12, "learning_rate": 0.0004469228917596084, "loss": 0.0668, "theoretical_loss": 3.4830105550741077, "tokens_seen": 1839857664 }, { "epoch": 0.12, "objective/train/advantage_avg": 0.0008268663077615201, "objective/train/docs_used": 670853, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.5233300924301147, "objective/train/original_loss": 1.5233298540115356, "objective/train/theoretical_loss": 3.4829896982081303, "objective/train/tokens_used": 210513376, "objective/train/value_avg": -0.006313323974609375, "objective/train/value_loss": 0.0002828619035426527, "objective/train/value_max": -4.684925079345703e-05, "objective/train/value_min": -0.299072265625, "objective/train/value_reward_corr": 0.5748297050694082, "objective/train/value_std": 0.0108184814453125, "objective/train/weight_avg": 1.0009452104568481, "objective/train/weighted_lm_loss": 1.5251843929290771, "objective/train/weights_max": 1.2810508012771606, "objective/train/weights_min": 0.39277857542037964, "theoretical_loss": 3.4829896982081303, "tokens_seen": 1839988736 }, { "epoch": 0.12, "learning_rate": 0.000446882773008104, "loss": 0.0717, "theoretical_loss": 3.4829896982081303, "tokens_seen": 1839988736 }, { "epoch": 0.12, "learning_rate": 0.00044684265425659956, "loss": 0.0755, "theoretical_loss": 3.4829688432438184, "tokens_seen": 1840119808 }, { "epoch": 0.12, "learning_rate": 0.00044680253550509507, "loss": 0.0715, "theoretical_loss": 3.4829479901808624, "tokens_seen": 1840250880 }, { "epoch": 0.12, "learning_rate": 0.00044676241675359064, "loss": 0.0676, "theoretical_loss": 3.482927139018954, "tokens_seen": 1840381952 }, { "epoch": 0.12, "learning_rate": 0.0004467222980020862, "loss": 0.069, "theoretical_loss": 3.482906289757784, "tokens_seen": 1840513024 }, { "epoch": 0.12, "learning_rate": 0.0004466821792505817, "loss": 0.0695, "theoretical_loss": 3.4828854423970435, "tokens_seen": 1840644096 }, { "epoch": 0.12, "learning_rate": 0.0004466420604990773, "loss": 0.0718, "theoretical_loss": 3.482864596936425, "tokens_seen": 1840775168 }, { "epoch": 0.12, "learning_rate": 0.00044660194174757286, "loss": 0.0686, "theoretical_loss": 3.482843753375619, "tokens_seen": 1840906240 }, { "epoch": 0.12, "learning_rate": 0.00044656182299606837, "loss": 0.0665, "theoretical_loss": 3.482822911714318, "tokens_seen": 1841037312 }, { "epoch": 0.12, "learning_rate": 0.0004465217042445639, "loss": 0.0696, "theoretical_loss": 3.4828020719522135, "tokens_seen": 1841168384 }, { "epoch": 0.12, "learning_rate": 0.00044648158549305945, "loss": 0.0679, "theoretical_loss": 3.4827812340889963, "tokens_seen": 1841299456 }, { "epoch": 0.12, "learning_rate": 0.000446441466741555, "loss": 0.0678, "theoretical_loss": 3.4827603981243596, "tokens_seen": 1841430528 }, { "epoch": 0.12, "learning_rate": 0.00044640134799005054, "loss": 0.0744, "theoretical_loss": 3.4827395640579946, "tokens_seen": 1841561600 }, { "epoch": 0.12, "learning_rate": 0.0004463612292385461, "loss": 0.073, "theoretical_loss": 3.482718731889593, "tokens_seen": 1841692672 }, { "epoch": 0.12, "learning_rate": 0.0004463211104870417, "loss": 0.0697, "theoretical_loss": 3.4826979016188475, "tokens_seen": 1841823744 }, { "epoch": 0.12, "learning_rate": 0.0004462809917355372, "loss": 0.0656, "theoretical_loss": 3.4826770732454495, "tokens_seen": 1841954816 }, { "epoch": 0.12, "learning_rate": 0.00044624087298403276, "loss": 0.0682, "theoretical_loss": 3.482656246769092, "tokens_seen": 1842085888 }, { "epoch": 0.12, "learning_rate": 0.0004462007542325283, "loss": 0.0719, "theoretical_loss": 3.4826354221894666, "tokens_seen": 1842216960 }, { "epoch": 0.12, "learning_rate": 0.00044616063548102384, "loss": 0.0714, "theoretical_loss": 3.482614599506266, "tokens_seen": 1842348032 }, { "epoch": 0.12, "learning_rate": 0.00044612051672951935, "loss": 0.0743, "theoretical_loss": 3.482593778719182, "tokens_seen": 1842479104 }, { "epoch": 0.12, "learning_rate": 0.0004460803979780149, "loss": 0.0689, "theoretical_loss": 3.482572959827908, "tokens_seen": 1842610176 }, { "epoch": 0.12, "learning_rate": 0.0004460402792265105, "loss": 0.0703, "theoretical_loss": 3.4825521428321355, "tokens_seen": 1842741248 }, { "epoch": 0.12, "learning_rate": 0.000446000160475006, "loss": 0.0696, "theoretical_loss": 3.482531327731558, "tokens_seen": 1842872320 }, { "epoch": 0.12, "learning_rate": 0.00044596004172350157, "loss": 0.065, "theoretical_loss": 3.4825105145258677, "tokens_seen": 1843003392 }, { "epoch": 0.12, "learning_rate": 0.00044591992297199714, "loss": 0.0688, "theoretical_loss": 3.4824897032147577, "tokens_seen": 1843134464 }, { "epoch": 0.12, "objective/train/advantage_avg": 0.0005049621104262769, "objective/train/docs_used": 672182, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3525536060333252, "objective/train/original_loss": 1.3525536060333252, "objective/train/theoretical_loss": 3.4824688937979205, "objective/train/tokens_used": 213790176, "objective/train/value_avg": -0.00382232666015625, "objective/train/value_loss": 0.00013760769797954708, "objective/train/value_max": -8.285045623779297e-05, "objective/train/value_min": -0.275634765625, "objective/train/value_reward_corr": 0.5471323347337931, "objective/train/value_std": 0.00682830810546875, "objective/train/weight_avg": 1.000564694404602, "objective/train/weighted_lm_loss": 1.3533270359039307, "objective/train/weights_max": 1.1742674112319946, "objective/train/weights_min": 0.3705498278141022, "theoretical_loss": 3.4824688937979205, "tokens_seen": 1843265536 }, { "epoch": 0.12, "learning_rate": 0.00044587980422049265, "loss": 0.0681, "theoretical_loss": 3.4824688937979205, "tokens_seen": 1843265536 }, { "epoch": 0.12, "learning_rate": 0.0004458396854689882, "loss": 0.0708, "theoretical_loss": 3.482448086275049, "tokens_seen": 1843396608 }, { "epoch": 0.12, "learning_rate": 0.0004457995667174838, "loss": 0.0711, "theoretical_loss": 3.4824272806458367, "tokens_seen": 1843527680 }, { "epoch": 0.12, "learning_rate": 0.0004457594479659793, "loss": 0.0701, "theoretical_loss": 3.4824064769099756, "tokens_seen": 1843658752 }, { "epoch": 0.12, "learning_rate": 0.0004457193292144748, "loss": 0.0712, "theoretical_loss": 3.4823856750671602, "tokens_seen": 1843789824 }, { "epoch": 0.12, "learning_rate": 0.0004456792104629704, "loss": 0.0686, "theoretical_loss": 3.4823648751170824, "tokens_seen": 1843920896 }, { "epoch": 0.12, "learning_rate": 0.00044563909171146596, "loss": 0.0709, "theoretical_loss": 3.4823440770594365, "tokens_seen": 1844051968 }, { "epoch": 0.12, "learning_rate": 0.00044559897295996147, "loss": 0.0724, "theoretical_loss": 3.482323280893915, "tokens_seen": 1844183040 }, { "epoch": 0.12, "learning_rate": 0.00044555885420845704, "loss": 0.0688, "theoretical_loss": 3.4823024866202115, "tokens_seen": 1844314112 }, { "epoch": 0.12, "learning_rate": 0.0004455187354569526, "loss": 0.0701, "theoretical_loss": 3.4822816942380195, "tokens_seen": 1844445184 }, { "epoch": 0.12, "learning_rate": 0.0004454786167054481, "loss": 0.0713, "theoretical_loss": 3.482260903747033, "tokens_seen": 1844576256 }, { "epoch": 0.12, "learning_rate": 0.0004454384979539437, "loss": 0.0703, "theoretical_loss": 3.4822401151469453, "tokens_seen": 1844707328 }, { "epoch": 0.12, "learning_rate": 0.00044539837920243926, "loss": 0.0675, "theoretical_loss": 3.4822193284374503, "tokens_seen": 1844838400 }, { "epoch": 0.12, "learning_rate": 0.00044535826045093477, "loss": 0.0694, "theoretical_loss": 3.4821985436182405, "tokens_seen": 1844969472 }, { "epoch": 0.12, "learning_rate": 0.0004453181416994303, "loss": 0.0686, "theoretical_loss": 3.4821777606890114, "tokens_seen": 1845100544 }, { "epoch": 0.12, "learning_rate": 0.00044527802294792586, "loss": 0.0698, "theoretical_loss": 3.4821569796494565, "tokens_seen": 1845231616 }, { "epoch": 0.12, "learning_rate": 0.0004452379041964214, "loss": 0.0729, "theoretical_loss": 3.4821362004992693, "tokens_seen": 1845362688 }, { "epoch": 0.12, "learning_rate": 0.00044519778544491694, "loss": 0.0664, "theoretical_loss": 3.482115423238144, "tokens_seen": 1845493760 }, { "epoch": 0.12, "learning_rate": 0.0004451576666934125, "loss": 0.0682, "theoretical_loss": 3.4820946478657744, "tokens_seen": 1845624832 }, { "epoch": 0.12, "learning_rate": 0.0004451175479419081, "loss": 0.0707, "theoretical_loss": 3.4820738743818556, "tokens_seen": 1845755904 }, { "epoch": 0.12, "learning_rate": 0.0004450774291904036, "loss": 0.069, "theoretical_loss": 3.4820531027860815, "tokens_seen": 1845886976 }, { "epoch": 0.12, "learning_rate": 0.00044503731043889916, "loss": 0.0717, "theoretical_loss": 3.4820323330781457, "tokens_seen": 1846018048 }, { "epoch": 0.12, "learning_rate": 0.0004449971916873947, "loss": 0.0699, "theoretical_loss": 3.482011565257743, "tokens_seen": 1846149120 }, { "epoch": 0.12, "learning_rate": 0.00044495707293589024, "loss": 0.0685, "theoretical_loss": 3.481990799324568, "tokens_seen": 1846280192 }, { "epoch": 0.12, "learning_rate": 0.00044491695418438575, "loss": 0.0664, "theoretical_loss": 3.481970035278316, "tokens_seen": 1846411264 }, { "epoch": 0.12, "objective/train/advantage_avg": -0.000250944314757362, "objective/train/docs_used": 673286, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4174323081970215, "objective/train/original_loss": 1.4174323081970215, "objective/train/theoretical_loss": 3.4819492731186807, "objective/train/tokens_used": 217066976, "objective/train/value_avg": -0.00988006591796875, "objective/train/value_loss": 0.00022567587438970804, "objective/train/value_max": -6.157159805297852e-05, "objective/train/value_min": -0.65087890625, "objective/train/value_reward_corr": 0.7708974795052128, "objective/train/value_std": 0.0169830322265625, "objective/train/weight_avg": 0.9998578429222107, "objective/train/weighted_lm_loss": 1.4170056581497192, "objective/train/weights_max": 1.162855863571167, "objective/train/weights_min": 0.5545605421066284, "theoretical_loss": 3.4819492731186807, "tokens_seen": 1846542336 }, { "epoch": 0.12, "learning_rate": 0.0004448768354328813, "loss": 0.0705, "theoretical_loss": 3.4819492731186807, "tokens_seen": 1846542336 }, { "epoch": 0.12, "learning_rate": 0.0004448367166813769, "loss": 0.0727, "theoretical_loss": 3.4819285128453563, "tokens_seen": 1846673408 }, { "epoch": 0.12, "learning_rate": 0.0004447965979298724, "loss": 0.0727, "theoretical_loss": 3.481907754458039, "tokens_seen": 1846804480 }, { "epoch": 0.12, "learning_rate": 0.000444756479178368, "loss": 0.0667, "theoretical_loss": 3.4818869979564218, "tokens_seen": 1846935552 }, { "epoch": 0.12, "learning_rate": 0.00044471636042686354, "loss": 0.0708, "theoretical_loss": 3.4818662433402014, "tokens_seen": 1847066624 }, { "epoch": 0.12, "learning_rate": 0.0004446762416753591, "loss": 0.0708, "theoretical_loss": 3.481845490609072, "tokens_seen": 1847197696 }, { "epoch": 0.12, "learning_rate": 0.0004446361229238546, "loss": 0.0708, "theoretical_loss": 3.4818247397627284, "tokens_seen": 1847328768 }, { "epoch": 0.12, "learning_rate": 0.0004445960041723502, "loss": 0.0676, "theoretical_loss": 3.481803990800866, "tokens_seen": 1847459840 }, { "epoch": 0.12, "learning_rate": 0.0004445558854208457, "loss": 0.0695, "theoretical_loss": 3.48178324372318, "tokens_seen": 1847590912 }, { "epoch": 0.12, "learning_rate": 0.0004445157666693412, "loss": 0.0693, "theoretical_loss": 3.481762498529365, "tokens_seen": 1847721984 }, { "epoch": 0.12, "learning_rate": 0.0004444756479178368, "loss": 0.0734, "theoretical_loss": 3.481741755219118, "tokens_seen": 1847853056 }, { "epoch": 0.12, "learning_rate": 0.00044443552916633236, "loss": 0.0697, "theoretical_loss": 3.4817210137921326, "tokens_seen": 1847984128 }, { "epoch": 0.12, "learning_rate": 0.00044439541041482787, "loss": 0.0704, "theoretical_loss": 3.481700274248105, "tokens_seen": 1848115200 }, { "epoch": 0.12, "learning_rate": 0.00044435529166332344, "loss": 0.0682, "theoretical_loss": 3.481679536586731, "tokens_seen": 1848246272 }, { "epoch": 0.12, "learning_rate": 0.000444315172911819, "loss": 0.0718, "theoretical_loss": 3.481658800807706, "tokens_seen": 1848377344 }, { "epoch": 0.12, "learning_rate": 0.0004442750541603146, "loss": 0.0732, "theoretical_loss": 3.4816380669107247, "tokens_seen": 1848508416 }, { "epoch": 0.12, "learning_rate": 0.0004442349354088101, "loss": 0.0728, "theoretical_loss": 3.4816173348954846, "tokens_seen": 1848639488 }, { "epoch": 0.12, "learning_rate": 0.00044419481665730566, "loss": 0.0737, "theoretical_loss": 3.48159660476168, "tokens_seen": 1848770560 }, { "epoch": 0.12, "learning_rate": 0.00044415469790580123, "loss": 0.0693, "theoretical_loss": 3.481575876509008, "tokens_seen": 1848901632 }, { "epoch": 0.12, "learning_rate": 0.0004441145791542967, "loss": 0.0672, "theoretical_loss": 3.4815551501371633, "tokens_seen": 1849032704 }, { "epoch": 0.12, "learning_rate": 0.00044407446040279226, "loss": 0.0693, "theoretical_loss": 3.4815344256458434, "tokens_seen": 1849163776 }, { "epoch": 0.12, "learning_rate": 0.0004440343416512878, "loss": 0.0688, "theoretical_loss": 3.481513703034743, "tokens_seen": 1849294848 }, { "epoch": 0.12, "learning_rate": 0.00044399422289978334, "loss": 0.0673, "theoretical_loss": 3.4814929823035596, "tokens_seen": 1849425920 }, { "epoch": 0.12, "learning_rate": 0.0004439541041482789, "loss": 0.0698, "theoretical_loss": 3.4814722634519883, "tokens_seen": 1849556992 }, { "epoch": 0.12, "learning_rate": 0.0004439139853967745, "loss": 0.0744, "theoretical_loss": 3.481451546479726, "tokens_seen": 1849688064 }, { "epoch": 0.12, "objective/train/advantage_avg": 0.0017063074046745896, "objective/train/docs_used": 674404, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3422322273254395, "objective/train/original_loss": 1.3422322273254395, "objective/train/theoretical_loss": 3.4814308313864686, "objective/train/tokens_used": 220343776, "objective/train/value_avg": -0.01386260986328125, "objective/train/value_loss": 0.00038435051101259887, "objective/train/value_max": -2.0444393157958984e-05, "objective/train/value_min": -0.70068359375, "objective/train/value_reward_corr": 0.7760458076003516, "objective/train/value_std": 0.02716064453125, "objective/train/weight_avg": 1.001886248588562, "objective/train/weighted_lm_loss": 1.3434960842132568, "objective/train/weights_max": 1.3542664051055908, "objective/train/weights_min": 0.3698268234729767, "theoretical_loss": 3.4814308313864686, "tokens_seen": 1849819136 }, { "epoch": 0.12, "learning_rate": 0.00044387386664527004, "loss": 0.0699, "theoretical_loss": 3.4814308313864686, "tokens_seen": 1849819136 }, { "epoch": 0.12, "learning_rate": 0.00044383374789376556, "loss": 0.069, "theoretical_loss": 3.4814101181719135, "tokens_seen": 1849950208 }, { "epoch": 0.12, "learning_rate": 0.0004437936291422611, "loss": 0.0728, "theoretical_loss": 3.4813894068357563, "tokens_seen": 1850081280 }, { "epoch": 0.12, "learning_rate": 0.0004437535103907567, "loss": 0.0667, "theoretical_loss": 3.4813686973776936, "tokens_seen": 1850212352 }, { "epoch": 0.12, "learning_rate": 0.00044371339163925216, "loss": 0.0674, "theoretical_loss": 3.4813479897974227, "tokens_seen": 1850343424 }, { "epoch": 0.12, "learning_rate": 0.0004436732728877477, "loss": 0.0642, "theoretical_loss": 3.48132728409464, "tokens_seen": 1850474496 }, { "epoch": 0.12, "learning_rate": 0.0004436331541362433, "loss": 0.0679, "theoretical_loss": 3.481306580269042, "tokens_seen": 1850605568 }, { "epoch": 0.12, "learning_rate": 0.0004435930353847388, "loss": 0.0691, "theoretical_loss": 3.4812858783203264, "tokens_seen": 1850736640 }, { "epoch": 0.12, "learning_rate": 0.0004435529166332344, "loss": 0.0692, "theoretical_loss": 3.4812651782481896, "tokens_seen": 1850867712 }, { "epoch": 0.12, "learning_rate": 0.00044351279788172994, "loss": 0.0679, "theoretical_loss": 3.481244480052329, "tokens_seen": 1850998784 }, { "epoch": 0.12, "learning_rate": 0.0004434726791302255, "loss": 0.0699, "theoretical_loss": 3.4812237837324407, "tokens_seen": 1851129856 }, { "epoch": 0.12, "learning_rate": 0.000443432560378721, "loss": 0.0699, "theoretical_loss": 3.4812030892882224, "tokens_seen": 1851260928 }, { "epoch": 0.12, "learning_rate": 0.0004433924416272166, "loss": 0.0684, "theoretical_loss": 3.4811823967193716, "tokens_seen": 1851392000 }, { "epoch": 0.12, "learning_rate": 0.00044335232287571216, "loss": 0.0653, "theoretical_loss": 3.4811617060255857, "tokens_seen": 1851523072 }, { "epoch": 0.12, "learning_rate": 0.0004433122041242076, "loss": 0.0732, "theoretical_loss": 3.4811410172065615, "tokens_seen": 1851654144 }, { "epoch": 0.12, "learning_rate": 0.0004432720853727032, "loss": 0.0668, "theoretical_loss": 3.481120330261997, "tokens_seen": 1851785216 }, { "epoch": 0.12, "learning_rate": 0.00044323196662119876, "loss": 0.0678, "theoretical_loss": 3.4810996451915894, "tokens_seen": 1851916288 }, { "epoch": 0.12, "learning_rate": 0.0004431918478696943, "loss": 0.0726, "theoretical_loss": 3.4810789619950366, "tokens_seen": 1852047360 }, { "epoch": 0.12, "learning_rate": 0.00044315172911818984, "loss": 0.0735, "theoretical_loss": 3.481058280672036, "tokens_seen": 1852178432 }, { "epoch": 0.12, "learning_rate": 0.0004431116103666854, "loss": 0.0724, "theoretical_loss": 3.481037601222285, "tokens_seen": 1852309504 }, { "epoch": 0.12, "learning_rate": 0.000443071491615181, "loss": 0.071, "theoretical_loss": 3.4810169236454818, "tokens_seen": 1852440576 }, { "epoch": 0.12, "learning_rate": 0.0004430313728636765, "loss": 0.0709, "theoretical_loss": 3.480996247941324, "tokens_seen": 1852571648 }, { "epoch": 0.12, "learning_rate": 0.00044299125411217206, "loss": 0.0709, "theoretical_loss": 3.48097557410951, "tokens_seen": 1852702720 }, { "epoch": 0.12, "learning_rate": 0.00044295113536066763, "loss": 0.072, "theoretical_loss": 3.4809549021497372, "tokens_seen": 1852833792 }, { "epoch": 0.12, "learning_rate": 0.0004429110166091631, "loss": 0.0672, "theoretical_loss": 3.4809342320617045, "tokens_seen": 1852964864 }, { "epoch": 0.12, "objective/train/advantage_avg": -0.0005265201907604933, "objective/train/docs_used": 675642, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.424299716949463, "objective/train/original_loss": 1.424299716949463, "objective/train/theoretical_loss": 3.480913563845109, "objective/train/tokens_used": 223620576, "objective/train/value_avg": -0.00939178466796875, "objective/train/value_loss": 0.0003817184187937528, "objective/train/value_max": -5.918741226196289e-05, "objective/train/value_min": -0.701171875, "objective/train/value_reward_corr": 0.6925513316976313, "objective/train/value_std": 0.0177764892578125, "objective/train/weight_avg": 0.9996446371078491, "objective/train/weighted_lm_loss": 1.4230622053146362, "objective/train/weights_max": 1.4535712003707886, "objective/train/weights_min": 0.36818480491638184, "theoretical_loss": 3.480913563845109, "tokens_seen": 1853095936 }, { "epoch": 0.12, "learning_rate": 0.00044287089785765866, "loss": 0.0703, "theoretical_loss": 3.480913563845109, "tokens_seen": 1853095936 }, { "epoch": 0.12, "learning_rate": 0.0004428307791061542, "loss": 0.0683, "theoretical_loss": 3.4808928974996496, "tokens_seen": 1853227008 }, { "epoch": 0.12, "learning_rate": 0.00044279066035464974, "loss": 0.0664, "theoretical_loss": 3.480872233025024, "tokens_seen": 1853358080 }, { "epoch": 0.12, "learning_rate": 0.0004427505416031453, "loss": 0.0683, "theoretical_loss": 3.4808515704209317, "tokens_seen": 1853489152 }, { "epoch": 0.12, "learning_rate": 0.0004427104228516409, "loss": 0.0689, "theoretical_loss": 3.4808309096870698, "tokens_seen": 1853620224 }, { "epoch": 0.12, "learning_rate": 0.00044267030410013645, "loss": 0.0712, "theoretical_loss": 3.4808102508231378, "tokens_seen": 1853751296 }, { "epoch": 0.12, "learning_rate": 0.00044263018534863196, "loss": 0.0698, "theoretical_loss": 3.480789593828834, "tokens_seen": 1853882368 }, { "epoch": 0.12, "learning_rate": 0.00044259006659712753, "loss": 0.0664, "theoretical_loss": 3.4807689387038563, "tokens_seen": 1854013440 }, { "epoch": 0.12, "learning_rate": 0.0004425499478456231, "loss": 0.0661, "theoretical_loss": 3.4807482854479037, "tokens_seen": 1854144512 }, { "epoch": 0.12, "learning_rate": 0.00044250982909411856, "loss": 0.0676, "theoretical_loss": 3.480727634060676, "tokens_seen": 1854275584 }, { "epoch": 0.12, "learning_rate": 0.0004424697103426141, "loss": 0.0661, "theoretical_loss": 3.4807069845418708, "tokens_seen": 1854406656 }, { "epoch": 0.12, "learning_rate": 0.0004424295915911097, "loss": 0.0702, "theoretical_loss": 3.480686336891188, "tokens_seen": 1854537728 }, { "epoch": 0.12, "learning_rate": 0.0004423894728396052, "loss": 0.067, "theoretical_loss": 3.4806656911083254, "tokens_seen": 1854668800 }, { "epoch": 0.12, "learning_rate": 0.0004423493540881008, "loss": 0.0664, "theoretical_loss": 3.4806450471929833, "tokens_seen": 1854799872 }, { "epoch": 0.12, "learning_rate": 0.00044230923533659634, "loss": 0.0702, "theoretical_loss": 3.48062440514486, "tokens_seen": 1854930944 }, { "epoch": 0.12, "learning_rate": 0.0004422691165850919, "loss": 0.0684, "theoretical_loss": 3.480603764963655, "tokens_seen": 1855062016 }, { "epoch": 0.12, "learning_rate": 0.0004422289978335874, "loss": 0.0675, "theoretical_loss": 3.4805831266490674, "tokens_seen": 1855193088 }, { "epoch": 0.12, "learning_rate": 0.000442188879082083, "loss": 0.0689, "theoretical_loss": 3.4805624902007963, "tokens_seen": 1855324160 }, { "epoch": 0.12, "learning_rate": 0.00044214876033057856, "loss": 0.0706, "theoretical_loss": 3.480541855618542, "tokens_seen": 1855455232 }, { "epoch": 0.12, "learning_rate": 0.000442108641579074, "loss": 0.0678, "theoretical_loss": 3.480521222902003, "tokens_seen": 1855586304 }, { "epoch": 0.12, "learning_rate": 0.0004420685228275696, "loss": 0.0719, "theoretical_loss": 3.4805005920508796, "tokens_seen": 1855717376 }, { "epoch": 0.12, "learning_rate": 0.00044202840407606516, "loss": 0.0682, "theoretical_loss": 3.480479963064871, "tokens_seen": 1855848448 }, { "epoch": 0.12, "learning_rate": 0.00044198828532456073, "loss": 0.0673, "theoretical_loss": 3.480459335943676, "tokens_seen": 1855979520 }, { "epoch": 0.12, "learning_rate": 0.00044194816657305624, "loss": 0.0677, "theoretical_loss": 3.4804387106869967, "tokens_seen": 1856110592 }, { "epoch": 0.13, "learning_rate": 0.0004419080478215518, "loss": 0.0676, "theoretical_loss": 3.4804180872945305, "tokens_seen": 1856241664 }, { "epoch": 0.13, "objective/train/advantage_avg": -0.0004772863758262247, "objective/train/docs_used": 676855, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4409241676330566, "objective/train/original_loss": 1.4409241676330566, "objective/train/theoretical_loss": 3.4803974657659786, "objective/train/tokens_used": 226897376, "objective/train/value_avg": -0.007129669189453125, "objective/train/value_loss": 0.00019282002176623791, "objective/train/value_max": -4.07099723815918e-05, "objective/train/value_min": -0.302001953125, "objective/train/value_reward_corr": 0.7688749998012504, "objective/train/value_std": 0.01568603515625, "objective/train/weight_avg": 0.9996163845062256, "objective/train/weighted_lm_loss": 1.440832257270813, "objective/train/weights_max": 1.2364944219589233, "objective/train/weights_min": 0.7174776792526245, "theoretical_loss": 3.4803974657659786, "tokens_seen": 1856372736 }, { "epoch": 0.13, "learning_rate": 0.0004418679290700474, "loss": 0.0665, "theoretical_loss": 3.4803974657659786, "tokens_seen": 1856372736 }, { "epoch": 0.13, "learning_rate": 0.0004418278103185429, "loss": 0.0702, "theoretical_loss": 3.4803768461010405, "tokens_seen": 1856503808 }, { "epoch": 0.13, "learning_rate": 0.00044178769156703846, "loss": 0.068, "theoretical_loss": 3.4803562282994163, "tokens_seen": 1856634880 }, { "epoch": 0.13, "learning_rate": 0.00044174757281553403, "loss": 0.0699, "theoretical_loss": 3.4803356123608062, "tokens_seen": 1856765952 }, { "epoch": 0.13, "learning_rate": 0.0004417074540640295, "loss": 0.0666, "theoretical_loss": 3.48031499828491, "tokens_seen": 1856897024 }, { "epoch": 0.13, "learning_rate": 0.00044166733531252506, "loss": 0.0662, "theoretical_loss": 3.480294386071429, "tokens_seen": 1857028096 }, { "epoch": 0.13, "learning_rate": 0.0004416272165610206, "loss": 0.071, "theoretical_loss": 3.480273775720062, "tokens_seen": 1857159168 }, { "epoch": 0.13, "learning_rate": 0.0004415870978095162, "loss": 0.0687, "theoretical_loss": 3.4802531672305106, "tokens_seen": 1857290240 }, { "epoch": 0.13, "learning_rate": 0.0004415469790580117, "loss": 0.0689, "theoretical_loss": 3.480232560602474, "tokens_seen": 1857421312 }, { "epoch": 0.13, "learning_rate": 0.0004415068603065073, "loss": 0.0715, "theoretical_loss": 3.480211955835654, "tokens_seen": 1857552384 }, { "epoch": 0.13, "learning_rate": 0.00044146674155500285, "loss": 0.0701, "theoretical_loss": 3.480191352929751, "tokens_seen": 1857683456 }, { "epoch": 0.13, "learning_rate": 0.00044142662280349836, "loss": 0.0739, "theoretical_loss": 3.4801707518844647, "tokens_seen": 1857814528 }, { "epoch": 0.13, "learning_rate": 0.00044138650405199393, "loss": 0.0767, "theoretical_loss": 3.4801501526994967, "tokens_seen": 1857945600 }, { "epoch": 0.13, "learning_rate": 0.0004413463853004895, "loss": 0.0693, "theoretical_loss": 3.480129555374547, "tokens_seen": 1858076672 }, { "epoch": 0.13, "learning_rate": 0.00044130626654898496, "loss": 0.0695, "theoretical_loss": 3.480108959909318, "tokens_seen": 1858207744 }, { "epoch": 0.13, "learning_rate": 0.0004412661477974805, "loss": 0.0704, "theoretical_loss": 3.4800883663035083, "tokens_seen": 1858338816 }, { "epoch": 0.13, "learning_rate": 0.0004412260290459761, "loss": 0.0689, "theoretical_loss": 3.480067774556821, "tokens_seen": 1858469888 }, { "epoch": 0.13, "learning_rate": 0.00044118591029447166, "loss": 0.0701, "theoretical_loss": 3.4800471846689556, "tokens_seen": 1858600960 }, { "epoch": 0.13, "learning_rate": 0.0004411457915429672, "loss": 0.071, "theoretical_loss": 3.4800265966396147, "tokens_seen": 1858732032 }, { "epoch": 0.13, "learning_rate": 0.00044110567279146275, "loss": 0.0657, "theoretical_loss": 3.4800060104684984, "tokens_seen": 1858863104 }, { "epoch": 0.13, "learning_rate": 0.0004410655540399583, "loss": 0.07, "theoretical_loss": 3.4799854261553085, "tokens_seen": 1858994176 }, { "epoch": 0.13, "learning_rate": 0.00044102543528845383, "loss": 0.069, "theoretical_loss": 3.4799648436997463, "tokens_seen": 1859125248 }, { "epoch": 0.13, "learning_rate": 0.0004409853165369494, "loss": 0.0732, "theoretical_loss": 3.4799442631015127, "tokens_seen": 1859256320 }, { "epoch": 0.13, "learning_rate": 0.00044094519778544496, "loss": 0.0677, "theoretical_loss": 3.47992368436031, "tokens_seen": 1859387392 }, { "epoch": 0.13, "learning_rate": 0.0004409050790339404, "loss": 0.0707, "theoretical_loss": 3.479903107475839, "tokens_seen": 1859518464 }, { "epoch": 0.13, "objective/train/advantage_avg": -0.00030761200468987226, "objective/train/docs_used": 678160, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4721029996871948, "objective/train/original_loss": 1.4721031188964844, "objective/train/theoretical_loss": 3.4798825324478018, "objective/train/tokens_used": 230174176, "objective/train/value_avg": -0.0083160400390625, "objective/train/value_loss": 0.00022883627389091998, "objective/train/value_max": -5.4776668548583984e-05, "objective/train/value_min": -0.227783203125, "objective/train/value_reward_corr": 0.7155483030355818, "objective/train/value_std": 0.01367950439453125, "objective/train/weight_avg": 0.9998031258583069, "objective/train/weighted_lm_loss": 1.4711469411849976, "objective/train/weights_max": 1.1252226829528809, "objective/train/weights_min": 0.7237396240234375, "theoretical_loss": 3.4798825324478018, "tokens_seen": 1859649536 }, { "epoch": 0.13, "learning_rate": 0.000440864960282436, "loss": 0.0666, "theoretical_loss": 3.4798825324478018, "tokens_seen": 1859649536 }, { "epoch": 0.13, "learning_rate": 0.00044082484153093156, "loss": 0.0659, "theoretical_loss": 3.4798619592759, "tokens_seen": 1859780608 }, { "epoch": 0.13, "learning_rate": 0.00044078472277942713, "loss": 0.0683, "theoretical_loss": 3.4798413879598353, "tokens_seen": 1859911680 }, { "epoch": 0.13, "learning_rate": 0.00044074460402792264, "loss": 0.0728, "theoretical_loss": 3.4798208184993094, "tokens_seen": 1860042752 }, { "epoch": 0.13, "learning_rate": 0.0004407044852764182, "loss": 0.0703, "theoretical_loss": 3.4798002508940242, "tokens_seen": 1860173824 }, { "epoch": 0.13, "learning_rate": 0.0004406643665249138, "loss": 0.0674, "theoretical_loss": 3.479779685143682, "tokens_seen": 1860304896 }, { "epoch": 0.13, "learning_rate": 0.0004406242477734093, "loss": 0.073, "theoretical_loss": 3.479759121247984, "tokens_seen": 1860435968 }, { "epoch": 0.13, "learning_rate": 0.00044058412902190486, "loss": 0.0759, "theoretical_loss": 3.479738559206633, "tokens_seen": 1860567040 }, { "epoch": 0.13, "learning_rate": 0.00044054401027040043, "loss": 0.0675, "theoretical_loss": 3.479717999019332, "tokens_seen": 1860698112 }, { "epoch": 0.13, "learning_rate": 0.0004405038915188959, "loss": 0.0722, "theoretical_loss": 3.4796974406857815, "tokens_seen": 1860829184 }, { "epoch": 0.13, "learning_rate": 0.00044046377276739146, "loss": 0.0724, "theoretical_loss": 3.4796768842056847, "tokens_seen": 1860960256 }, { "epoch": 0.13, "learning_rate": 0.00044042365401588703, "loss": 0.0692, "theoretical_loss": 3.479656329578744, "tokens_seen": 1861091328 }, { "epoch": 0.13, "learning_rate": 0.0004403835352643826, "loss": 0.0691, "theoretical_loss": 3.4796357768046615, "tokens_seen": 1861222400 }, { "epoch": 0.13, "learning_rate": 0.0004403434165128781, "loss": 0.0705, "theoretical_loss": 3.4796152258831405, "tokens_seen": 1861353472 }, { "epoch": 0.13, "learning_rate": 0.0004403032977613737, "loss": 0.0721, "theoretical_loss": 3.4795946768138823, "tokens_seen": 1861484544 }, { "epoch": 0.13, "learning_rate": 0.00044026317900986925, "loss": 0.0707, "theoretical_loss": 3.4795741295965907, "tokens_seen": 1861615616 }, { "epoch": 0.13, "learning_rate": 0.00044022306025836476, "loss": 0.0733, "theoretical_loss": 3.4795535842309677, "tokens_seen": 1861746688 }, { "epoch": 0.13, "learning_rate": 0.00044018294150686033, "loss": 0.0663, "theoretical_loss": 3.4795330407167167, "tokens_seen": 1861877760 }, { "epoch": 0.13, "learning_rate": 0.0004401428227553559, "loss": 0.0706, "theoretical_loss": 3.4795124990535395, "tokens_seen": 1862008832 }, { "epoch": 0.13, "learning_rate": 0.00044010270400385136, "loss": 0.0684, "theoretical_loss": 3.4794919592411406, "tokens_seen": 1862139904 }, { "epoch": 0.13, "learning_rate": 0.0004400625852523469, "loss": 0.0698, "theoretical_loss": 3.479471421279222, "tokens_seen": 1862270976 }, { "epoch": 0.13, "learning_rate": 0.0004400224665008425, "loss": 0.0697, "theoretical_loss": 3.479450885167487, "tokens_seen": 1862402048 }, { "epoch": 0.13, "learning_rate": 0.00043998234774933806, "loss": 0.067, "theoretical_loss": 3.4794303509056377, "tokens_seen": 1862533120 }, { "epoch": 0.13, "learning_rate": 0.0004399422289978336, "loss": 0.0717, "theoretical_loss": 3.4794098184933793, "tokens_seen": 1862664192 }, { "epoch": 0.13, "learning_rate": 0.00043990211024632915, "loss": 0.0716, "theoretical_loss": 3.479389287930413, "tokens_seen": 1862795264 }, { "epoch": 0.13, "objective/train/advantage_avg": 0.0014318685280159116, "objective/train/docs_used": 679393, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3926169872283936, "objective/train/original_loss": 1.3926169872283936, "objective/train/theoretical_loss": 3.479368759216444, "objective/train/tokens_used": 233450976, "objective/train/value_avg": -0.008758544921875, "objective/train/value_loss": 0.0005360163631848991, "objective/train/value_max": -8.547306060791016e-05, "objective/train/value_min": -0.90771484375, "objective/train/value_reward_corr": 0.5782614468715181, "objective/train/value_std": 0.0179595947265625, "objective/train/weight_avg": 1.0016584396362305, "objective/train/weighted_lm_loss": 1.3947443962097168, "objective/train/weights_max": 1.8476827144622803, "objective/train/weights_min": 0.3681870996952057, "theoretical_loss": 3.479368759216444, "tokens_seen": 1862926336 }, { "epoch": 0.13, "learning_rate": 0.0004398619914948247, "loss": 0.0704, "theoretical_loss": 3.479368759216444, "tokens_seen": 1862926336 }, { "epoch": 0.13, "learning_rate": 0.00043982187274332023, "loss": 0.0689, "theoretical_loss": 3.4793482323511746, "tokens_seen": 1863057408 }, { "epoch": 0.13, "learning_rate": 0.0004397817539918158, "loss": 0.0684, "theoretical_loss": 3.479327707334308, "tokens_seen": 1863188480 }, { "epoch": 0.13, "learning_rate": 0.00043974163524031137, "loss": 0.0642, "theoretical_loss": 3.479307184165549, "tokens_seen": 1863319552 }, { "epoch": 0.13, "learning_rate": 0.0004397015164888068, "loss": 0.0728, "theoretical_loss": 3.4792866628446, "tokens_seen": 1863450624 }, { "epoch": 0.13, "learning_rate": 0.0004396613977373024, "loss": 0.0708, "theoretical_loss": 3.4792661433711656, "tokens_seen": 1863581696 }, { "epoch": 0.13, "learning_rate": 0.00043962127898579796, "loss": 0.0679, "theoretical_loss": 3.4792456257449484, "tokens_seen": 1863712768 }, { "epoch": 0.13, "learning_rate": 0.00043958116023429353, "loss": 0.0692, "theoretical_loss": 3.479225109965653, "tokens_seen": 1863843840 }, { "epoch": 0.13, "learning_rate": 0.00043954104148278904, "loss": 0.0681, "theoretical_loss": 3.4792045960329836, "tokens_seen": 1863974912 }, { "epoch": 0.13, "learning_rate": 0.0004395009227312846, "loss": 0.0718, "theoretical_loss": 3.4791840839466435, "tokens_seen": 1864105984 }, { "epoch": 0.13, "learning_rate": 0.0004394608039797802, "loss": 0.0681, "theoretical_loss": 3.4791635737063373, "tokens_seen": 1864237056 }, { "epoch": 0.13, "learning_rate": 0.0004394206852282757, "loss": 0.0664, "theoretical_loss": 3.479143065311768, "tokens_seen": 1864368128 }, { "epoch": 0.13, "learning_rate": 0.00043938056647677126, "loss": 0.068, "theoretical_loss": 3.479122558762641, "tokens_seen": 1864499200 }, { "epoch": 0.13, "learning_rate": 0.00043934044772526683, "loss": 0.0721, "theoretical_loss": 3.47910205405866, "tokens_seen": 1864630272 }, { "epoch": 0.13, "learning_rate": 0.00043930032897376235, "loss": 0.0685, "theoretical_loss": 3.4790815511995294, "tokens_seen": 1864761344 }, { "epoch": 0.13, "learning_rate": 0.00043926021022225786, "loss": 0.0686, "theoretical_loss": 3.479061050184953, "tokens_seen": 1864892416 }, { "epoch": 0.13, "learning_rate": 0.00043922009147075343, "loss": 0.067, "theoretical_loss": 3.479040551014636, "tokens_seen": 1865023488 }, { "epoch": 0.13, "learning_rate": 0.000439179972719249, "loss": 0.0674, "theoretical_loss": 3.4790200536882825, "tokens_seen": 1865154560 }, { "epoch": 0.13, "learning_rate": 0.0004391398539677445, "loss": 0.0699, "theoretical_loss": 3.4789995582055973, "tokens_seen": 1865285632 }, { "epoch": 0.13, "learning_rate": 0.0004390997352162401, "loss": 0.0687, "theoretical_loss": 3.4789790645662846, "tokens_seen": 1865416704 }, { "epoch": 0.13, "learning_rate": 0.00043905961646473565, "loss": 0.0751, "theoretical_loss": 3.4789585727700496, "tokens_seen": 1865547776 }, { "epoch": 0.13, "learning_rate": 0.00043901949771323116, "loss": 0.0708, "theoretical_loss": 3.478938082816597, "tokens_seen": 1865678848 }, { "epoch": 0.13, "learning_rate": 0.00043897937896172673, "loss": 0.0728, "theoretical_loss": 3.4789175947056314, "tokens_seen": 1865809920 }, { "epoch": 0.13, "learning_rate": 0.0004389392602102223, "loss": 0.0679, "theoretical_loss": 3.4788971084368576, "tokens_seen": 1865940992 }, { "epoch": 0.13, "learning_rate": 0.0004388991414587178, "loss": 0.0695, "theoretical_loss": 3.478876624009981, "tokens_seen": 1866072064 }, { "epoch": 0.13, "objective/train/advantage_avg": 0.0003501854371279478, "objective/train/docs_used": 680643, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4362812042236328, "objective/train/original_loss": 1.4362812042236328, "objective/train/theoretical_loss": 3.478856141424706, "objective/train/tokens_used": 236727776, "objective/train/value_avg": -0.007648468017578125, "objective/train/value_loss": 0.0002105370513163507, "objective/train/value_max": -4.684925079345703e-05, "objective/train/value_min": -0.217041015625, "objective/train/value_reward_corr": 0.709215418266836, "objective/train/value_std": 0.01410675048828125, "objective/train/weight_avg": 1.0004478693008423, "objective/train/weighted_lm_loss": 1.4362444877624512, "objective/train/weights_max": 1.242395043373108, "objective/train/weights_min": 0.3905315399169922, "theoretical_loss": 3.478856141424706, "tokens_seen": 1866203136 }, { "epoch": 0.13, "learning_rate": 0.00043885902270721333, "loss": 0.071, "theoretical_loss": 3.478856141424706, "tokens_seen": 1866203136 }, { "epoch": 0.13, "learning_rate": 0.0004388189039557089, "loss": 0.0679, "theoretical_loss": 3.4788356606807387, "tokens_seen": 1866334208 }, { "epoch": 0.13, "learning_rate": 0.00043877878520420446, "loss": 0.069, "theoretical_loss": 3.478815181777783, "tokens_seen": 1866465280 }, { "epoch": 0.13, "learning_rate": 0.0004387386664527, "loss": 0.0672, "theoretical_loss": 3.478794704715545, "tokens_seen": 1866596352 }, { "epoch": 0.13, "learning_rate": 0.00043869854770119555, "loss": 0.0741, "theoretical_loss": 3.4787742294937303, "tokens_seen": 1866727424 }, { "epoch": 0.13, "learning_rate": 0.0004386584289496911, "loss": 0.0706, "theoretical_loss": 3.478753756112044, "tokens_seen": 1866858496 }, { "epoch": 0.13, "learning_rate": 0.00043861831019818663, "loss": 0.07, "theoretical_loss": 3.4787332845701906, "tokens_seen": 1866989568 }, { "epoch": 0.13, "learning_rate": 0.0004385781914466822, "loss": 0.0702, "theoretical_loss": 3.478712814867877, "tokens_seen": 1867120640 }, { "epoch": 0.13, "learning_rate": 0.00043853807269517777, "loss": 0.0672, "theoretical_loss": 3.4786923470048077, "tokens_seen": 1867251712 }, { "epoch": 0.13, "learning_rate": 0.0004384979539436733, "loss": 0.0659, "theoretical_loss": 3.4786718809806896, "tokens_seen": 1867382784 }, { "epoch": 0.13, "learning_rate": 0.0004384578351921688, "loss": 0.0737, "theoretical_loss": 3.478651416795227, "tokens_seen": 1867513856 }, { "epoch": 0.13, "learning_rate": 0.00043841771644066436, "loss": 0.073, "theoretical_loss": 3.4786309544481266, "tokens_seen": 1867644928 }, { "epoch": 0.13, "learning_rate": 0.00043837759768915993, "loss": 0.0707, "theoretical_loss": 3.478610493939094, "tokens_seen": 1867776000 }, { "epoch": 0.13, "learning_rate": 0.00043833747893765545, "loss": 0.0677, "theoretical_loss": 3.4785900352678345, "tokens_seen": 1867907072 }, { "epoch": 0.13, "learning_rate": 0.000438297360186151, "loss": 0.0754, "theoretical_loss": 3.4785695784340556, "tokens_seen": 1868038144 }, { "epoch": 0.13, "learning_rate": 0.0004382572414346466, "loss": 0.072, "theoretical_loss": 3.4785491234374617, "tokens_seen": 1868169216 }, { "epoch": 0.13, "learning_rate": 0.0004382171226831421, "loss": 0.0732, "theoretical_loss": 3.4785286702777602, "tokens_seen": 1868300288 }, { "epoch": 0.13, "learning_rate": 0.00043817700393163767, "loss": 0.0728, "theoretical_loss": 3.478508218954657, "tokens_seen": 1868431360 }, { "epoch": 0.13, "learning_rate": 0.00043813688518013323, "loss": 0.07, "theoretical_loss": 3.4784877694678573, "tokens_seen": 1868562432 }, { "epoch": 0.13, "learning_rate": 0.00043809676642862875, "loss": 0.0687, "theoretical_loss": 3.4784673218170687, "tokens_seen": 1868693504 }, { "epoch": 0.13, "learning_rate": 0.00043805664767712426, "loss": 0.0747, "theoretical_loss": 3.478446876001997, "tokens_seen": 1868824576 }, { "epoch": 0.13, "learning_rate": 0.00043801652892561983, "loss": 0.0681, "theoretical_loss": 3.4784264320223492, "tokens_seen": 1868955648 }, { "epoch": 0.13, "learning_rate": 0.0004379764101741154, "loss": 0.07, "theoretical_loss": 3.4784059898778312, "tokens_seen": 1869086720 }, { "epoch": 0.13, "learning_rate": 0.0004379362914226109, "loss": 0.0705, "theoretical_loss": 3.47838554956815, "tokens_seen": 1869217792 }, { "epoch": 0.13, "learning_rate": 0.0004378961726711065, "loss": 0.0706, "theoretical_loss": 3.4783651110930123, "tokens_seen": 1869348864 }, { "epoch": 0.13, "objective/train/advantage_avg": -0.00011186942720087245, "objective/train/docs_used": 681749, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3269027471542358, "objective/train/original_loss": 1.3269026279449463, "objective/train/theoretical_loss": 3.4783446744521243, "objective/train/tokens_used": 240004576, "objective/train/value_avg": -0.00551605224609375, "objective/train/value_loss": 0.00018563443154562265, "objective/train/value_max": -1.531839370727539e-05, "objective/train/value_min": -0.369873046875, "objective/train/value_reward_corr": 0.6594851413791856, "objective/train/value_std": 0.01216888427734375, "objective/train/weight_avg": 0.9999720454216003, "objective/train/weighted_lm_loss": 1.3258951902389526, "objective/train/weights_max": 1.1752375364303589, "objective/train/weights_min": 0.37335923314094543, "theoretical_loss": 3.4783446744521243, "tokens_seen": 1869479936 }, { "epoch": 0.13, "learning_rate": 0.00043785605391960205, "loss": 0.0694, "theoretical_loss": 3.4783446744521243, "tokens_seen": 1869479936 }, { "epoch": 0.13, "learning_rate": 0.00043781593516809756, "loss": 0.0716, "theoretical_loss": 3.478324239645193, "tokens_seen": 1869611008 }, { "epoch": 0.13, "learning_rate": 0.00043777581641659313, "loss": 0.0691, "theoretical_loss": 3.4783038066719256, "tokens_seen": 1869742080 }, { "epoch": 0.13, "learning_rate": 0.0004377356976650887, "loss": 0.0667, "theoretical_loss": 3.478283375532029, "tokens_seen": 1869873152 }, { "epoch": 0.13, "learning_rate": 0.0004376955789135842, "loss": 0.0703, "theoretical_loss": 3.47826294622521, "tokens_seen": 1870004224 }, { "epoch": 0.13, "learning_rate": 0.00043765546016207973, "loss": 0.0724, "theoretical_loss": 3.4782425187511756, "tokens_seen": 1870135296 }, { "epoch": 0.13, "learning_rate": 0.0004376153414105753, "loss": 0.0666, "theoretical_loss": 3.4782220931096335, "tokens_seen": 1870266368 }, { "epoch": 0.13, "learning_rate": 0.00043757522265907087, "loss": 0.073, "theoretical_loss": 3.47820166930029, "tokens_seen": 1870397440 }, { "epoch": 0.13, "learning_rate": 0.0004375351039075664, "loss": 0.0726, "theoretical_loss": 3.478181247322853, "tokens_seen": 1870528512 }, { "epoch": 0.13, "learning_rate": 0.00043749498515606195, "loss": 0.0694, "theoretical_loss": 3.47816082717703, "tokens_seen": 1870659584 }, { "epoch": 0.13, "learning_rate": 0.0004374548664045575, "loss": 0.0729, "theoretical_loss": 3.4781404088625276, "tokens_seen": 1870790656 }, { "epoch": 0.13, "learning_rate": 0.00043741474765305303, "loss": 0.0709, "theoretical_loss": 3.478119992379054, "tokens_seen": 1870921728 }, { "epoch": 0.13, "learning_rate": 0.0004373746289015486, "loss": 0.0688, "theoretical_loss": 3.4780995777263164, "tokens_seen": 1871052800 }, { "epoch": 0.13, "learning_rate": 0.00043733451015004417, "loss": 0.0703, "theoretical_loss": 3.478079164904022, "tokens_seen": 1871183872 }, { "epoch": 0.13, "learning_rate": 0.0004372943913985397, "loss": 0.069, "theoretical_loss": 3.47805875391188, "tokens_seen": 1871314944 }, { "epoch": 0.13, "learning_rate": 0.0004372542726470352, "loss": 0.0708, "theoretical_loss": 3.4780383447495966, "tokens_seen": 1871446016 }, { "epoch": 0.13, "learning_rate": 0.00043721415389553076, "loss": 0.0709, "theoretical_loss": 3.4780179374168805, "tokens_seen": 1871577088 }, { "epoch": 0.13, "learning_rate": 0.00043717403514402633, "loss": 0.0707, "theoretical_loss": 3.477997531913439, "tokens_seen": 1871708160 }, { "epoch": 0.13, "learning_rate": 0.00043713391639252185, "loss": 0.072, "theoretical_loss": 3.47797712823898, "tokens_seen": 1871839232 }, { "epoch": 0.13, "learning_rate": 0.0004370937976410174, "loss": 0.071, "theoretical_loss": 3.477956726393212, "tokens_seen": 1871970304 }, { "epoch": 0.13, "learning_rate": 0.000437053678889513, "loss": 0.0721, "theoretical_loss": 3.477936326375843, "tokens_seen": 1872101376 }, { "epoch": 0.13, "learning_rate": 0.00043701356013800855, "loss": 0.075, "theoretical_loss": 3.477915928186581, "tokens_seen": 1872232448 }, { "epoch": 0.13, "learning_rate": 0.00043697344138650407, "loss": 0.0702, "theoretical_loss": 3.4778955318251334, "tokens_seen": 1872363520 }, { "epoch": 0.13, "learning_rate": 0.00043693332263499963, "loss": 0.0701, "theoretical_loss": 3.4778751372912105, "tokens_seen": 1872494592 }, { "epoch": 0.13, "learning_rate": 0.00043689320388349515, "loss": 0.0699, "theoretical_loss": 3.4778547445845187, "tokens_seen": 1872625664 }, { "epoch": 0.13, "objective/train/advantage_avg": 0.0006616510218009353, "objective/train/docs_used": 683408, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3083125352859497, "objective/train/original_loss": 1.3083124160766602, "objective/train/theoretical_loss": 3.4778343537047673, "objective/train/tokens_used": 243281376, "objective/train/value_avg": -0.005035400390625, "objective/train/value_loss": 0.00016289763152599335, "objective/train/value_max": -6.455183029174805e-05, "objective/train/value_min": -0.2037353515625, "objective/train/value_reward_corr": 0.6053039013450519, "objective/train/value_std": 0.00841522216796875, "objective/train/weight_avg": 1.0007331371307373, "objective/train/weighted_lm_loss": 1.309005856513977, "objective/train/weights_max": 1.1209726333618164, "objective/train/weights_min": 0.37588581442832947, "theoretical_loss": 3.4778343537047673, "tokens_seen": 1872756736 }, { "epoch": 0.14, "learning_rate": 0.00043685308513199066, "loss": 0.0698, "theoretical_loss": 3.4778343537047673, "tokens_seen": 1872756736 }, { "epoch": 0.14, "learning_rate": 0.00043681296638048623, "loss": 0.0705, "theoretical_loss": 3.4778139646516646, "tokens_seen": 1872887808 }, { "epoch": 0.14, "learning_rate": 0.0004367728476289818, "loss": 0.0662, "theoretical_loss": 3.4777935774249196, "tokens_seen": 1873018880 }, { "epoch": 0.14, "learning_rate": 0.0004367327288774773, "loss": 0.0698, "theoretical_loss": 3.47777319202424, "tokens_seen": 1873149952 }, { "epoch": 0.14, "learning_rate": 0.0004366926101259729, "loss": 0.0698, "theoretical_loss": 3.4777528084493348, "tokens_seen": 1873281024 }, { "epoch": 0.14, "learning_rate": 0.00043665249137446845, "loss": 0.0747, "theoretical_loss": 3.4777324266999132, "tokens_seen": 1873412096 }, { "epoch": 0.14, "learning_rate": 0.000436612372622964, "loss": 0.0739, "theoretical_loss": 3.477712046775684, "tokens_seen": 1873543168 }, { "epoch": 0.14, "learning_rate": 0.00043657225387145953, "loss": 0.0716, "theoretical_loss": 3.4776916686763557, "tokens_seen": 1873674240 }, { "epoch": 0.14, "learning_rate": 0.0004365321351199551, "loss": 0.0718, "theoretical_loss": 3.477671292401637, "tokens_seen": 1873805312 }, { "epoch": 0.14, "learning_rate": 0.00043649201636845067, "loss": 0.0701, "theoretical_loss": 3.4776509179512374, "tokens_seen": 1873936384 }, { "epoch": 0.14, "learning_rate": 0.00043645189761694613, "loss": 0.0727, "theoretical_loss": 3.477630545324866, "tokens_seen": 1874067456 }, { "epoch": 0.14, "learning_rate": 0.0004364117788654417, "loss": 0.0727, "theoretical_loss": 3.477610174522232, "tokens_seen": 1874198528 }, { "epoch": 0.14, "learning_rate": 0.00043637166011393727, "loss": 0.0695, "theoretical_loss": 3.477589805543044, "tokens_seen": 1874329600 }, { "epoch": 0.14, "learning_rate": 0.0004363315413624328, "loss": 0.0722, "theoretical_loss": 3.477569438387012, "tokens_seen": 1874460672 }, { "epoch": 0.14, "learning_rate": 0.00043629142261092835, "loss": 0.0673, "theoretical_loss": 3.477549073053845, "tokens_seen": 1874591744 }, { "epoch": 0.14, "learning_rate": 0.0004362513038594239, "loss": 0.0695, "theoretical_loss": 3.4775287095432525, "tokens_seen": 1874722816 }, { "epoch": 0.14, "learning_rate": 0.0004362111851079195, "loss": 0.0693, "theoretical_loss": 3.477508347854944, "tokens_seen": 1874853888 }, { "epoch": 0.14, "learning_rate": 0.000436171066356415, "loss": 0.0736, "theoretical_loss": 3.4774879879886287, "tokens_seen": 1874984960 }, { "epoch": 0.14, "learning_rate": 0.00043613094760491057, "loss": 0.0742, "theoretical_loss": 3.477467629944017, "tokens_seen": 1875116032 }, { "epoch": 0.14, "learning_rate": 0.00043609082885340614, "loss": 0.069, "theoretical_loss": 3.4774472737208173, "tokens_seen": 1875247104 }, { "epoch": 0.14, "learning_rate": 0.0004360507101019016, "loss": 0.0706, "theoretical_loss": 3.4774269193187406, "tokens_seen": 1875378176 }, { "epoch": 0.14, "learning_rate": 0.00043601059135039717, "loss": 0.0739, "theoretical_loss": 3.4774065667374963, "tokens_seen": 1875509248 }, { "epoch": 0.14, "learning_rate": 0.00043597047259889273, "loss": 0.067, "theoretical_loss": 3.4773862159767943, "tokens_seen": 1875640320 }, { "epoch": 0.14, "learning_rate": 0.00043593035384738825, "loss": 0.0701, "theoretical_loss": 3.4773658670363448, "tokens_seen": 1875771392 }, { "epoch": 0.14, "learning_rate": 0.0004358902350958838, "loss": 0.074, "theoretical_loss": 3.4773455199158567, "tokens_seen": 1875902464 }, { "epoch": 0.14, "objective/train/advantage_avg": 0.0005015583010390401, "objective/train/docs_used": 684134, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4961943626403809, "objective/train/original_loss": 1.4961941242218018, "objective/train/theoretical_loss": 3.477325174615041, "objective/train/tokens_used": 246558176, "objective/train/value_avg": -0.00518798828125, "objective/train/value_loss": 0.00013814707926940173, "objective/train/value_max": -5.692243576049805e-05, "objective/train/value_min": -0.233154296875, "objective/train/value_reward_corr": 0.5778446413236198, "objective/train/value_std": 0.007965087890625, "objective/train/weight_avg": 1.0005661249160767, "objective/train/weighted_lm_loss": 1.4964805841445923, "objective/train/weights_max": 1.1095370054244995, "objective/train/weights_min": 0.3828728199005127, "theoretical_loss": 3.477325174615041, "tokens_seen": 1876033536 }, { "epoch": 0.14, "learning_rate": 0.0004358501163443794, "loss": 0.0721, "theoretical_loss": 3.477325174615041, "tokens_seen": 1876033536 }, { "epoch": 0.14, "learning_rate": 0.00043580999759287495, "loss": 0.0696, "theoretical_loss": 3.4773048311336083, "tokens_seen": 1876164608 }, { "epoch": 0.14, "learning_rate": 0.00043576987884137047, "loss": 0.0701, "theoretical_loss": 3.477284489471268, "tokens_seen": 1876295680 }, { "epoch": 0.14, "learning_rate": 0.00043572976008986604, "loss": 0.0703, "theoretical_loss": 3.4772641496277306, "tokens_seen": 1876426752 }, { "epoch": 0.14, "learning_rate": 0.0004356896413383616, "loss": 0.0682, "theoretical_loss": 3.4772438116027065, "tokens_seen": 1876557824 }, { "epoch": 0.14, "learning_rate": 0.00043564952258685706, "loss": 0.0759, "theoretical_loss": 3.4772234753959057, "tokens_seen": 1876688896 }, { "epoch": 0.14, "learning_rate": 0.00043560940383535263, "loss": 0.0763, "theoretical_loss": 3.477203141007039, "tokens_seen": 1876819968 }, { "epoch": 0.14, "learning_rate": 0.0004355692850838482, "loss": 0.0703, "theoretical_loss": 3.477182808435818, "tokens_seen": 1876951040 }, { "epoch": 0.14, "learning_rate": 0.0004355291663323437, "loss": 0.0725, "theoretical_loss": 3.4771624776819516, "tokens_seen": 1877082112 }, { "epoch": 0.14, "learning_rate": 0.0004354890475808393, "loss": 0.0725, "theoretical_loss": 3.477142148745151, "tokens_seen": 1877213184 }, { "epoch": 0.14, "learning_rate": 0.00043544892882933485, "loss": 0.0731, "theoretical_loss": 3.4771218216251274, "tokens_seen": 1877344256 }, { "epoch": 0.14, "learning_rate": 0.0004354088100778304, "loss": 0.0697, "theoretical_loss": 3.477101496321591, "tokens_seen": 1877475328 }, { "epoch": 0.14, "learning_rate": 0.00043536869132632593, "loss": 0.0723, "theoretical_loss": 3.4770811728342537, "tokens_seen": 1877606400 }, { "epoch": 0.14, "learning_rate": 0.0004353285725748215, "loss": 0.0747, "theoretical_loss": 3.4770608511628254, "tokens_seen": 1877737472 }, { "epoch": 0.14, "learning_rate": 0.00043528845382331707, "loss": 0.0717, "theoretical_loss": 3.4770405313070176, "tokens_seen": 1877868544 }, { "epoch": 0.14, "learning_rate": 0.00043524833507181253, "loss": 0.0739, "theoretical_loss": 3.477020213266541, "tokens_seen": 1877999616 }, { "epoch": 0.14, "learning_rate": 0.0004352082163203081, "loss": 0.0682, "theoretical_loss": 3.4769998970411073, "tokens_seen": 1878130688 }, { "epoch": 0.14, "learning_rate": 0.00043516809756880367, "loss": 0.0697, "theoretical_loss": 3.476979582630427, "tokens_seen": 1878261760 }, { "epoch": 0.14, "learning_rate": 0.0004351279788172992, "loss": 0.0733, "theoretical_loss": 3.476959270034212, "tokens_seen": 1878392832 }, { "epoch": 0.14, "learning_rate": 0.00043508786006579475, "loss": 0.0754, "theoretical_loss": 3.4769389592521733, "tokens_seen": 1878523904 }, { "epoch": 0.14, "learning_rate": 0.0004350477413142903, "loss": 0.0707, "theoretical_loss": 3.4769186502840226, "tokens_seen": 1878654976 }, { "epoch": 0.14, "learning_rate": 0.0004350076225627859, "loss": 0.0702, "theoretical_loss": 3.476898343129471, "tokens_seen": 1878786048 }, { "epoch": 0.14, "learning_rate": 0.0004349675038112814, "loss": 0.0712, "theoretical_loss": 3.4768780377882305, "tokens_seen": 1878917120 }, { "epoch": 0.14, "learning_rate": 0.00043492738505977697, "loss": 0.0699, "theoretical_loss": 3.476857734260012, "tokens_seen": 1879048192 }, { "epoch": 0.14, "learning_rate": 0.00043488726630827254, "loss": 0.0709, "theoretical_loss": 3.476837432544528, "tokens_seen": 1879179264 }, { "epoch": 0.14, "objective/train/advantage_avg": -0.0026938088703900576, "objective/train/docs_used": 685300, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2741111516952515, "objective/train/original_loss": 1.274111032485962, "objective/train/theoretical_loss": 3.4768171326414894, "objective/train/tokens_used": 249834976, "objective/train/value_avg": -0.006580352783203125, "objective/train/value_loss": 0.00033966192859224975, "objective/train/value_max": -3.272294998168945e-05, "objective/train/value_min": -0.77392578125, "objective/train/value_reward_corr": 0.7659797829656868, "objective/train/value_std": 0.019500732421875, "objective/train/weight_avg": 0.9974640607833862, "objective/train/weighted_lm_loss": 1.2707902193069458, "objective/train/weights_max": 1.7102184295654297, "objective/train/weights_min": 0.39301836490631104, "theoretical_loss": 3.4768171326414894, "tokens_seen": 1879310336 }, { "epoch": 0.14, "learning_rate": 0.000434847147556768, "loss": 0.0679, "theoretical_loss": 3.4768171326414894, "tokens_seen": 1879310336 }, { "epoch": 0.14, "learning_rate": 0.00043480702880526357, "loss": 0.07, "theoretical_loss": 3.4767968345506093, "tokens_seen": 1879441408 }, { "epoch": 0.14, "learning_rate": 0.00043476691005375914, "loss": 0.0661, "theoretical_loss": 3.4767765382715976, "tokens_seen": 1879572480 }, { "epoch": 0.14, "learning_rate": 0.00043472679130225465, "loss": 0.0732, "theoretical_loss": 3.476756243804168, "tokens_seen": 1879703552 }, { "epoch": 0.14, "learning_rate": 0.0004346866725507502, "loss": 0.0666, "theoretical_loss": 3.4767359511480316, "tokens_seen": 1879834624 }, { "epoch": 0.14, "learning_rate": 0.0004346465537992458, "loss": 0.0743, "theoretical_loss": 3.476715660302901, "tokens_seen": 1879965696 }, { "epoch": 0.14, "learning_rate": 0.00043460643504774135, "loss": 0.067, "theoretical_loss": 3.4766953712684874, "tokens_seen": 1880096768 }, { "epoch": 0.14, "learning_rate": 0.00043456631629623687, "loss": 0.0708, "theoretical_loss": 3.476675084044504, "tokens_seen": 1880227840 }, { "epoch": 0.14, "learning_rate": 0.00043452619754473244, "loss": 0.0697, "theoretical_loss": 3.476654798630663, "tokens_seen": 1880358912 }, { "epoch": 0.14, "learning_rate": 0.000434486078793228, "loss": 0.076, "theoretical_loss": 3.4766345150266758, "tokens_seen": 1880489984 }, { "epoch": 0.14, "learning_rate": 0.00043444596004172347, "loss": 0.0716, "theoretical_loss": 3.476614233232256, "tokens_seen": 1880621056 }, { "epoch": 0.14, "learning_rate": 0.00043440584129021903, "loss": 0.0735, "theoretical_loss": 3.476593953247115, "tokens_seen": 1880752128 }, { "epoch": 0.14, "learning_rate": 0.0004343657225387146, "loss": 0.0757, "theoretical_loss": 3.476573675070966, "tokens_seen": 1880883200 }, { "epoch": 0.14, "learning_rate": 0.00043432560378721017, "loss": 0.0684, "theoretical_loss": 3.4765533987035213, "tokens_seen": 1881014272 }, { "epoch": 0.14, "learning_rate": 0.0004342854850357057, "loss": 0.0708, "theoretical_loss": 3.4765331241444937, "tokens_seen": 1881145344 }, { "epoch": 0.14, "learning_rate": 0.00043424536628420125, "loss": 0.0694, "theoretical_loss": 3.476512851393596, "tokens_seen": 1881276416 }, { "epoch": 0.14, "learning_rate": 0.0004342052475326968, "loss": 0.0706, "theoretical_loss": 3.4764925804505404, "tokens_seen": 1881407488 }, { "epoch": 0.14, "learning_rate": 0.00043416512878119234, "loss": 0.0721, "theoretical_loss": 3.4764723113150406, "tokens_seen": 1881538560 }, { "epoch": 0.14, "learning_rate": 0.0004341250100296879, "loss": 0.0694, "theoretical_loss": 3.476452043986809, "tokens_seen": 1881669632 }, { "epoch": 0.14, "learning_rate": 0.00043408489127818347, "loss": 0.0714, "theoretical_loss": 3.476431778465559, "tokens_seen": 1881800704 }, { "epoch": 0.14, "learning_rate": 0.00043404477252667893, "loss": 0.0674, "theoretical_loss": 3.4764115147510033, "tokens_seen": 1881931776 }, { "epoch": 0.14, "learning_rate": 0.0004340046537751745, "loss": 0.0701, "theoretical_loss": 3.4763912528428547, "tokens_seen": 1882062848 }, { "epoch": 0.14, "learning_rate": 0.00043396453502367007, "loss": 0.072, "theoretical_loss": 3.4763709927408266, "tokens_seen": 1882193920 }, { "epoch": 0.14, "learning_rate": 0.00043392441627216564, "loss": 0.0687, "theoretical_loss": 3.476350734444633, "tokens_seen": 1882324992 }, { "epoch": 0.14, "learning_rate": 0.00043388429752066115, "loss": 0.0702, "theoretical_loss": 3.4763304779539865, "tokens_seen": 1882456064 }, { "epoch": 0.14, "objective/train/advantage_avg": -3.3198826713487506e-05, "objective/train/docs_used": 686423, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4653056859970093, "objective/train/original_loss": 1.4653058052062988, "objective/train/theoretical_loss": 3.4763102232686003, "objective/train/tokens_used": 253111776, "objective/train/value_avg": -0.005138397216796875, "objective/train/value_loss": 0.00022890044783707708, "objective/train/value_max": -6.759166717529297e-05, "objective/train/value_min": -0.220703125, "objective/train/value_reward_corr": 0.48472755565255143, "objective/train/value_std": 0.0079498291015625, "objective/train/weight_avg": 1.0000646114349365, "objective/train/weighted_lm_loss": 1.4648327827453613, "objective/train/weights_max": 1.1143436431884766, "objective/train/weights_min": 0.36804676055908203, "theoretical_loss": 3.4763102232686003, "tokens_seen": 1882587136 }, { "epoch": 0.14, "learning_rate": 0.0004338441787691567, "loss": 0.0728, "theoretical_loss": 3.4763102232686003, "tokens_seen": 1882587136 }, { "epoch": 0.14, "learning_rate": 0.0004338040600176523, "loss": 0.0706, "theoretical_loss": 3.476289970388188, "tokens_seen": 1882718208 }, { "epoch": 0.14, "learning_rate": 0.0004337639412661478, "loss": 0.0739, "theoretical_loss": 3.476269719312464, "tokens_seen": 1882849280 }, { "epoch": 0.14, "learning_rate": 0.00043372382251464337, "loss": 0.0753, "theoretical_loss": 3.4762494700411404, "tokens_seen": 1882980352 }, { "epoch": 0.14, "learning_rate": 0.00043368370376313894, "loss": 0.0674, "theoretical_loss": 3.4762292225739317, "tokens_seen": 1883111424 }, { "epoch": 0.14, "learning_rate": 0.0004336435850116344, "loss": 0.0696, "theoretical_loss": 3.476208976910552, "tokens_seen": 1883242496 }, { "epoch": 0.14, "learning_rate": 0.00043360346626012997, "loss": 0.0652, "theoretical_loss": 3.476188733050714, "tokens_seen": 1883373568 }, { "epoch": 0.14, "learning_rate": 0.00043356334750862554, "loss": 0.0707, "theoretical_loss": 3.476168490994132, "tokens_seen": 1883504640 }, { "epoch": 0.14, "learning_rate": 0.0004335232287571211, "loss": 0.0721, "theoretical_loss": 3.47614825074052, "tokens_seen": 1883635712 }, { "epoch": 0.14, "learning_rate": 0.0004334831100056166, "loss": 0.0677, "theoretical_loss": 3.4761280122895926, "tokens_seen": 1883766784 }, { "epoch": 0.14, "learning_rate": 0.0004334429912541122, "loss": 0.069, "theoretical_loss": 3.4761077756410628, "tokens_seen": 1883897856 }, { "epoch": 0.14, "learning_rate": 0.00043340287250260776, "loss": 0.0719, "theoretical_loss": 3.476087540794645, "tokens_seen": 1884028928 }, { "epoch": 0.14, "learning_rate": 0.00043336275375110327, "loss": 0.0699, "theoretical_loss": 3.4760673077500535, "tokens_seen": 1884160000 }, { "epoch": 0.14, "learning_rate": 0.00043332263499959884, "loss": 0.0717, "theoretical_loss": 3.476047076507002, "tokens_seen": 1884291072 }, { "epoch": 0.14, "learning_rate": 0.0004332825162480944, "loss": 0.0747, "theoretical_loss": 3.4760268470652056, "tokens_seen": 1884422144 }, { "epoch": 0.14, "learning_rate": 0.00043324239749658987, "loss": 0.0738, "theoretical_loss": 3.4760066194243784, "tokens_seen": 1884553216 }, { "epoch": 0.14, "learning_rate": 0.00043320227874508543, "loss": 0.0697, "theoretical_loss": 3.475986393584235, "tokens_seen": 1884684288 }, { "epoch": 0.14, "learning_rate": 0.000433162159993581, "loss": 0.0729, "theoretical_loss": 3.4759661695444892, "tokens_seen": 1884815360 }, { "epoch": 0.14, "learning_rate": 0.00043312204124207657, "loss": 0.0715, "theoretical_loss": 3.475945947304856, "tokens_seen": 1884946432 }, { "epoch": 0.14, "learning_rate": 0.0004330819224905721, "loss": 0.0688, "theoretical_loss": 3.47592572686505, "tokens_seen": 1885077504 }, { "epoch": 0.14, "learning_rate": 0.00043304180373906765, "loss": 0.0695, "theoretical_loss": 3.475905508224786, "tokens_seen": 1885208576 }, { "epoch": 0.14, "learning_rate": 0.0004330016849875632, "loss": 0.0708, "theoretical_loss": 3.4758852913837783, "tokens_seen": 1885339648 }, { "epoch": 0.14, "learning_rate": 0.00043296156623605874, "loss": 0.0726, "theoretical_loss": 3.4758650763417425, "tokens_seen": 1885470720 }, { "epoch": 0.14, "learning_rate": 0.0004329214474845543, "loss": 0.0752, "theoretical_loss": 3.475844863098393, "tokens_seen": 1885601792 }, { "epoch": 0.14, "learning_rate": 0.0004328813287330499, "loss": 0.0701, "theoretical_loss": 3.4758246516534443, "tokens_seen": 1885732864 }, { "epoch": 0.14, "objective/train/advantage_avg": -0.0011149345664307475, "objective/train/docs_used": 687569, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.284780502319336, "objective/train/original_loss": 1.284780502319336, "objective/train/theoretical_loss": 3.475804442006612, "objective/train/tokens_used": 256388576, "objective/train/value_avg": -0.0080108642578125, "objective/train/value_loss": 0.00024721521185711026, "objective/train/value_max": -2.6881694793701172e-05, "objective/train/value_min": -0.1842041015625, "objective/train/value_reward_corr": 0.7013729038123, "objective/train/value_std": 0.01253509521484375, "objective/train/weight_avg": 0.9989969730377197, "objective/train/weighted_lm_loss": 1.2830551862716675, "objective/train/weights_max": 1.089339256286621, "objective/train/weights_min": 0.3683217763900757, "theoretical_loss": 3.475804442006612, "tokens_seen": 1885863936 }, { "epoch": 0.14, "learning_rate": 0.00043284120998154533, "loss": 0.0664, "theoretical_loss": 3.475804442006612, "tokens_seen": 1885863936 }, { "epoch": 0.14, "learning_rate": 0.0004328010912300409, "loss": 0.0687, "theoretical_loss": 3.4757842341576115, "tokens_seen": 1885995008 }, { "epoch": 0.14, "learning_rate": 0.00043276097247853647, "loss": 0.0711, "theoretical_loss": 3.4757640281061573, "tokens_seen": 1886126080 }, { "epoch": 0.14, "learning_rate": 0.00043272085372703204, "loss": 0.0709, "theoretical_loss": 3.4757438238519645, "tokens_seen": 1886257152 }, { "epoch": 0.14, "learning_rate": 0.00043268073497552755, "loss": 0.0714, "theoretical_loss": 3.4757236213947484, "tokens_seen": 1886388224 }, { "epoch": 0.14, "learning_rate": 0.0004326406162240231, "loss": 0.0706, "theoretical_loss": 3.4757034207342254, "tokens_seen": 1886519296 }, { "epoch": 0.14, "learning_rate": 0.0004326004974725187, "loss": 0.0716, "theoretical_loss": 3.4756832218701095, "tokens_seen": 1886650368 }, { "epoch": 0.14, "learning_rate": 0.0004325603787210142, "loss": 0.0703, "theoretical_loss": 3.4756630248021168, "tokens_seen": 1886781440 }, { "epoch": 0.14, "learning_rate": 0.00043252025996950977, "loss": 0.0672, "theoretical_loss": 3.475642829529963, "tokens_seen": 1886912512 }, { "epoch": 0.14, "learning_rate": 0.00043248014121800534, "loss": 0.0755, "theoretical_loss": 3.4756226360533633, "tokens_seen": 1887043584 }, { "epoch": 0.14, "learning_rate": 0.0004324400224665008, "loss": 0.0723, "theoretical_loss": 3.4756024443720337, "tokens_seen": 1887174656 }, { "epoch": 0.14, "learning_rate": 0.00043239990371499637, "loss": 0.0718, "theoretical_loss": 3.4755822544856896, "tokens_seen": 1887305728 }, { "epoch": 0.14, "learning_rate": 0.00043235978496349194, "loss": 0.0693, "theoretical_loss": 3.4755620663940467, "tokens_seen": 1887436800 }, { "epoch": 0.14, "learning_rate": 0.0004323196662119875, "loss": 0.065, "theoretical_loss": 3.475541880096822, "tokens_seen": 1887567872 }, { "epoch": 0.14, "learning_rate": 0.000432279547460483, "loss": 0.0719, "theoretical_loss": 3.4755216955937294, "tokens_seen": 1887698944 }, { "epoch": 0.14, "learning_rate": 0.0004322394287089786, "loss": 0.0702, "theoretical_loss": 3.4755015128844864, "tokens_seen": 1887830016 }, { "epoch": 0.14, "learning_rate": 0.00043219930995747416, "loss": 0.0705, "theoretical_loss": 3.4754813319688087, "tokens_seen": 1887961088 }, { "epoch": 0.14, "learning_rate": 0.00043215919120596967, "loss": 0.0668, "theoretical_loss": 3.475461152846412, "tokens_seen": 1888092160 }, { "epoch": 0.14, "learning_rate": 0.00043211907245446524, "loss": 0.0671, "theoretical_loss": 3.4754409755170133, "tokens_seen": 1888223232 }, { "epoch": 0.14, "learning_rate": 0.0004320789537029608, "loss": 0.0751, "theoretical_loss": 3.475420799980328, "tokens_seen": 1888354304 }, { "epoch": 0.14, "learning_rate": 0.00043203883495145627, "loss": 0.0691, "theoretical_loss": 3.4754006262360733, "tokens_seen": 1888485376 }, { "epoch": 0.14, "learning_rate": 0.00043199871619995184, "loss": 0.0722, "theoretical_loss": 3.4753804542839646, "tokens_seen": 1888616448 }, { "epoch": 0.14, "learning_rate": 0.0004319585974484474, "loss": 0.0667, "theoretical_loss": 3.4753602841237186, "tokens_seen": 1888747520 }, { "epoch": 0.14, "learning_rate": 0.00043191847869694297, "loss": 0.0719, "theoretical_loss": 3.475340115755052, "tokens_seen": 1888878592 }, { "epoch": 0.14, "learning_rate": 0.0004318783599454385, "loss": 0.0704, "theoretical_loss": 3.4753199491776816, "tokens_seen": 1889009664 }, { "epoch": 0.14, "objective/train/advantage_avg": 0.00019621438696049154, "objective/train/docs_used": 688718, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4665930271148682, "objective/train/original_loss": 1.466592788696289, "objective/train/theoretical_loss": 3.4752997843913236, "objective/train/tokens_used": 259665376, "objective/train/value_avg": -0.007183074951171875, "objective/train/value_loss": 0.00027460267301648855, "objective/train/value_max": -8.219480514526367e-05, "objective/train/value_min": -0.830078125, "objective/train/value_reward_corr": 0.7209643304822161, "objective/train/value_std": 0.0157623291015625, "objective/train/weight_avg": 1.0003166198730469, "objective/train/weighted_lm_loss": 1.4667540788650513, "objective/train/weights_max": 1.2927699089050293, "objective/train/weights_min": 0.3724459707736969, "theoretical_loss": 3.4752997843913236, "tokens_seen": 1889140736 }, { "epoch": 0.14, "learning_rate": 0.00043183824119393406, "loss": 0.0681, "theoretical_loss": 3.4752997843913236, "tokens_seen": 1889140736 }, { "epoch": 0.15, "learning_rate": 0.0004317981224424296, "loss": 0.0695, "theoretical_loss": 3.475279621395695, "tokens_seen": 1889271808 }, { "epoch": 0.15, "learning_rate": 0.00043175800369092514, "loss": 0.0706, "theoretical_loss": 3.475259460190512, "tokens_seen": 1889402880 }, { "epoch": 0.15, "learning_rate": 0.0004317178849394207, "loss": 0.0693, "theoretical_loss": 3.4752393007754923, "tokens_seen": 1889533952 }, { "epoch": 0.15, "learning_rate": 0.0004316777661879163, "loss": 0.0696, "theoretical_loss": 3.475219143150352, "tokens_seen": 1889665024 }, { "epoch": 0.15, "learning_rate": 0.0004316376474364118, "loss": 0.0714, "theoretical_loss": 3.4751989873148084, "tokens_seen": 1889796096 }, { "epoch": 0.15, "learning_rate": 0.0004315975286849073, "loss": 0.075, "theoretical_loss": 3.4751788332685782, "tokens_seen": 1889927168 }, { "epoch": 0.15, "learning_rate": 0.00043155740993340287, "loss": 0.0736, "theoretical_loss": 3.4751586810113793, "tokens_seen": 1890058240 }, { "epoch": 0.15, "learning_rate": 0.00043151729118189844, "loss": 0.0717, "theoretical_loss": 3.475138530542928, "tokens_seen": 1890189312 }, { "epoch": 0.15, "learning_rate": 0.00043147717243039395, "loss": 0.0704, "theoretical_loss": 3.4751183818629414, "tokens_seen": 1890320384 }, { "epoch": 0.15, "learning_rate": 0.0004314370536788895, "loss": 0.0652, "theoretical_loss": 3.4750982349711377, "tokens_seen": 1890451456 }, { "epoch": 0.15, "learning_rate": 0.0004313969349273851, "loss": 0.0675, "theoretical_loss": 3.4750780898672335, "tokens_seen": 1890582528 }, { "epoch": 0.15, "learning_rate": 0.0004313568161758806, "loss": 0.0725, "theoretical_loss": 3.4750579465509466, "tokens_seen": 1890713600 }, { "epoch": 0.15, "learning_rate": 0.0004313166974243762, "loss": 0.0713, "theoretical_loss": 3.475037805021994, "tokens_seen": 1890844672 }, { "epoch": 0.15, "learning_rate": 0.00043127657867287174, "loss": 0.0688, "theoretical_loss": 3.4750176652800935, "tokens_seen": 1890975744 }, { "epoch": 0.15, "learning_rate": 0.00043123645992136726, "loss": 0.0692, "theoretical_loss": 3.4749975273249625, "tokens_seen": 1891106816 }, { "epoch": 0.15, "learning_rate": 0.00043119634116986277, "loss": 0.0669, "theoretical_loss": 3.474977391156319, "tokens_seen": 1891237888 }, { "epoch": 0.15, "learning_rate": 0.00043115622241835834, "loss": 0.0708, "theoretical_loss": 3.4749572567738807, "tokens_seen": 1891368960 }, { "epoch": 0.15, "learning_rate": 0.0004311161036668539, "loss": 0.0694, "theoretical_loss": 3.474937124177365, "tokens_seen": 1891500032 }, { "epoch": 0.15, "learning_rate": 0.0004310759849153494, "loss": 0.0686, "theoretical_loss": 3.4749169933664903, "tokens_seen": 1891631104 }, { "epoch": 0.15, "learning_rate": 0.000431035866163845, "loss": 0.0686, "theoretical_loss": 3.474896864340974, "tokens_seen": 1891762176 }, { "epoch": 0.15, "learning_rate": 0.00043099574741234056, "loss": 0.0722, "theoretical_loss": 3.4748767371005345, "tokens_seen": 1891893248 }, { "epoch": 0.15, "learning_rate": 0.00043095562866083607, "loss": 0.068, "theoretical_loss": 3.4748566116448893, "tokens_seen": 1892024320 }, { "epoch": 0.15, "learning_rate": 0.00043091550990933164, "loss": 0.0686, "theoretical_loss": 3.474836487973757, "tokens_seen": 1892155392 }, { "epoch": 0.15, "learning_rate": 0.0004308753911578272, "loss": 0.072, "theoretical_loss": 3.4748163660868556, "tokens_seen": 1892286464 }, { "epoch": 0.15, "objective/train/advantage_avg": -0.0007753781392239034, "objective/train/docs_used": 689894, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.5407929420471191, "objective/train/original_loss": 1.5407931804656982, "objective/train/theoretical_loss": 3.474796245983903, "objective/train/tokens_used": 262942176, "objective/train/value_avg": -0.00702667236328125, "objective/train/value_loss": 0.00046344310976564884, "objective/train/value_max": -6.204843521118164e-05, "objective/train/value_min": -0.6240234375, "objective/train/value_reward_corr": 0.6890079888317606, "objective/train/value_std": 0.015411376953125, "objective/train/weight_avg": 0.9994248747825623, "objective/train/weighted_lm_loss": 1.5396496057510376, "objective/train/weights_max": 1.4538551568984985, "objective/train/weights_min": 0.36886805295944214, "theoretical_loss": 3.474796245983903, "tokens_seen": 1892417536 }, { "epoch": 0.15, "learning_rate": 0.0004308352724063227, "loss": 0.0733, "theoretical_loss": 3.474796245983903, "tokens_seen": 1892417536 }, { "epoch": 0.15, "learning_rate": 0.00043079515365481824, "loss": 0.0646, "theoretical_loss": 3.4747761276646183, "tokens_seen": 1892548608 }, { "epoch": 0.15, "learning_rate": 0.0004307550349033138, "loss": 0.0693, "theoretical_loss": 3.4747560111287195, "tokens_seen": 1892679680 }, { "epoch": 0.15, "learning_rate": 0.0004307149161518094, "loss": 0.071, "theoretical_loss": 3.4747358963759245, "tokens_seen": 1892810752 }, { "epoch": 0.15, "learning_rate": 0.0004306747974003049, "loss": 0.0666, "theoretical_loss": 3.4747157834059523, "tokens_seen": 1892941824 }, { "epoch": 0.15, "learning_rate": 0.00043063467864880046, "loss": 0.0711, "theoretical_loss": 3.4746956722185214, "tokens_seen": 1893072896 }, { "epoch": 0.15, "learning_rate": 0.000430594559897296, "loss": 0.072, "theoretical_loss": 3.47467556281335, "tokens_seen": 1893203968 }, { "epoch": 0.15, "learning_rate": 0.00043055444114579154, "loss": 0.0665, "theoretical_loss": 3.4746554551901574, "tokens_seen": 1893335040 }, { "epoch": 0.15, "learning_rate": 0.0004305143223942871, "loss": 0.071, "theoretical_loss": 3.474635349348662, "tokens_seen": 1893466112 }, { "epoch": 0.15, "learning_rate": 0.0004304742036427827, "loss": 0.0727, "theoretical_loss": 3.4746152452885832, "tokens_seen": 1893597184 }, { "epoch": 0.15, "learning_rate": 0.0004304340848912782, "loss": 0.0725, "theoretical_loss": 3.4745951430096387, "tokens_seen": 1893728256 }, { "epoch": 0.15, "learning_rate": 0.0004303939661397737, "loss": 0.0697, "theoretical_loss": 3.4745750425115487, "tokens_seen": 1893859328 }, { "epoch": 0.15, "learning_rate": 0.00043035384738826927, "loss": 0.0708, "theoretical_loss": 3.474554943794031, "tokens_seen": 1893990400 }, { "epoch": 0.15, "learning_rate": 0.00043031372863676484, "loss": 0.066, "theoretical_loss": 3.474534846856806, "tokens_seen": 1894121472 }, { "epoch": 0.15, "learning_rate": 0.00043027360988526036, "loss": 0.0685, "theoretical_loss": 3.4745147516995916, "tokens_seen": 1894252544 }, { "epoch": 0.15, "learning_rate": 0.0004302334911337559, "loss": 0.0749, "theoretical_loss": 3.4744946583221075, "tokens_seen": 1894383616 }, { "epoch": 0.15, "learning_rate": 0.0004301933723822515, "loss": 0.0706, "theoretical_loss": 3.474474566724073, "tokens_seen": 1894514688 }, { "epoch": 0.15, "learning_rate": 0.000430153253630747, "loss": 0.07, "theoretical_loss": 3.4744544769052075, "tokens_seen": 1894645760 }, { "epoch": 0.15, "learning_rate": 0.0004301131348792426, "loss": 0.0709, "theoretical_loss": 3.4744343888652303, "tokens_seen": 1894776832 }, { "epoch": 0.15, "learning_rate": 0.00043007301612773814, "loss": 0.0664, "theoretical_loss": 3.4744143026038605, "tokens_seen": 1894907904 }, { "epoch": 0.15, "learning_rate": 0.00043003289737623366, "loss": 0.0681, "theoretical_loss": 3.474394218120818, "tokens_seen": 1895038976 }, { "epoch": 0.15, "learning_rate": 0.00042999277862472917, "loss": 0.0699, "theoretical_loss": 3.4743741354158226, "tokens_seen": 1895170048 }, { "epoch": 0.15, "learning_rate": 0.00042995265987322474, "loss": 0.0727, "theoretical_loss": 3.474354054488593, "tokens_seen": 1895301120 }, { "epoch": 0.15, "learning_rate": 0.0004299125411217203, "loss": 0.0693, "theoretical_loss": 3.4743339753388502, "tokens_seen": 1895432192 }, { "epoch": 0.15, "learning_rate": 0.0004298724223702158, "loss": 0.0709, "theoretical_loss": 3.4743138979663133, "tokens_seen": 1895563264 }, { "epoch": 0.15, "objective/train/advantage_avg": -0.00048152837553061545, "objective/train/docs_used": 691059, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4284868240356445, "objective/train/original_loss": 1.4284864664077759, "objective/train/theoretical_loss": 3.4742938223707016, "objective/train/tokens_used": 266218976, "objective/train/value_avg": -0.006591796875, "objective/train/value_loss": 0.000979525619186461, "objective/train/value_max": -5.918741226196289e-05, "objective/train/value_min": -0.7392578125, "objective/train/value_reward_corr": 0.5336818231681447, "objective/train/value_std": 0.019287109375, "objective/train/weight_avg": 0.9998344779014587, "objective/train/weighted_lm_loss": 1.428508996963501, "objective/train/weights_max": 1.7889978885650635, "objective/train/weights_min": 0.0192104484885931, "theoretical_loss": 3.4742938223707016, "tokens_seen": 1895694336 }, { "epoch": 0.15, "learning_rate": 0.0004298323036187114, "loss": 0.0683, "theoretical_loss": 3.4742938223707016, "tokens_seen": 1895694336 }, { "epoch": 0.15, "learning_rate": 0.00042979218486720696, "loss": 0.0674, "theoretical_loss": 3.4742737485517354, "tokens_seen": 1895825408 }, { "epoch": 0.15, "learning_rate": 0.00042975206611570253, "loss": 0.0727, "theoretical_loss": 3.4742536765091354, "tokens_seen": 1895956480 }, { "epoch": 0.15, "learning_rate": 0.00042971194736419804, "loss": 0.068, "theoretical_loss": 3.4742336062426205, "tokens_seen": 1896087552 }, { "epoch": 0.15, "learning_rate": 0.0004296718286126936, "loss": 0.0648, "theoretical_loss": 3.474213537751911, "tokens_seen": 1896218624 }, { "epoch": 0.15, "learning_rate": 0.0004296317098611891, "loss": 0.0676, "theoretical_loss": 3.4741934710367284, "tokens_seen": 1896349696 }, { "epoch": 0.15, "learning_rate": 0.00042959159110968464, "loss": 0.0683, "theoretical_loss": 3.4741734060967913, "tokens_seen": 1896480768 }, { "epoch": 0.15, "learning_rate": 0.0004295514723581802, "loss": 0.0681, "theoretical_loss": 3.47415334293182, "tokens_seen": 1896611840 }, { "epoch": 0.15, "learning_rate": 0.0004295113536066758, "loss": 0.0662, "theoretical_loss": 3.4741332815415356, "tokens_seen": 1896742912 }, { "epoch": 0.15, "learning_rate": 0.0004294712348551713, "loss": 0.0656, "theoretical_loss": 3.4741132219256583, "tokens_seen": 1896873984 }, { "epoch": 0.15, "learning_rate": 0.00042943111610366686, "loss": 0.0695, "theoretical_loss": 3.474093164083908, "tokens_seen": 1897005056 }, { "epoch": 0.15, "learning_rate": 0.0004293909973521624, "loss": 0.07, "theoretical_loss": 3.4740731080160066, "tokens_seen": 1897136128 }, { "epoch": 0.15, "learning_rate": 0.000429350878600658, "loss": 0.0726, "theoretical_loss": 3.474053053721673, "tokens_seen": 1897267200 }, { "epoch": 0.15, "learning_rate": 0.0004293107598491535, "loss": 0.0681, "theoretical_loss": 3.4740330012006293, "tokens_seen": 1897398272 }, { "epoch": 0.15, "learning_rate": 0.0004292706410976491, "loss": 0.0669, "theoretical_loss": 3.474012950452595, "tokens_seen": 1897529344 }, { "epoch": 0.15, "learning_rate": 0.0004292305223461446, "loss": 0.066, "theoretical_loss": 3.473992901477292, "tokens_seen": 1897660416 }, { "epoch": 0.15, "learning_rate": 0.0004291904035946401, "loss": 0.0697, "theoretical_loss": 3.47397285427444, "tokens_seen": 1897791488 }, { "epoch": 0.15, "learning_rate": 0.0004291502848431357, "loss": 0.0703, "theoretical_loss": 3.4739528088437606, "tokens_seen": 1897922560 }, { "epoch": 0.15, "learning_rate": 0.00042911016609163124, "loss": 0.0688, "theoretical_loss": 3.473932765184975, "tokens_seen": 1898053632 }, { "epoch": 0.15, "learning_rate": 0.00042907004734012676, "loss": 0.0639, "theoretical_loss": 3.4739127232978033, "tokens_seen": 1898184704 }, { "epoch": 0.15, "learning_rate": 0.0004290299285886223, "loss": 0.0688, "theoretical_loss": 3.473892683181968, "tokens_seen": 1898315776 }, { "epoch": 0.15, "learning_rate": 0.0004289898098371179, "loss": 0.0734, "theoretical_loss": 3.473872644837189, "tokens_seen": 1898446848 }, { "epoch": 0.15, "learning_rate": 0.00042894969108561346, "loss": 0.07, "theoretical_loss": 3.4738526082631878, "tokens_seen": 1898577920 }, { "epoch": 0.15, "learning_rate": 0.000428909572334109, "loss": 0.0642, "theoretical_loss": 3.4738325734596858, "tokens_seen": 1898708992 }, { "epoch": 0.15, "learning_rate": 0.00042886945358260454, "loss": 0.0695, "theoretical_loss": 3.4738125404264046, "tokens_seen": 1898840064 }, { "epoch": 0.15, "objective/train/advantage_avg": -8.356694888789207e-05, "objective/train/docs_used": 692233, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2927488088607788, "objective/train/original_loss": 1.2927489280700684, "objective/train/theoretical_loss": 3.473792509163066, "objective/train/tokens_used": 269495776, "objective/train/value_avg": -0.00630950927734375, "objective/train/value_loss": 0.00018059047579299659, "objective/train/value_max": -5.519390106201172e-05, "objective/train/value_min": -0.56201171875, "objective/train/value_reward_corr": 0.6279588421217154, "objective/train/value_std": 0.010894775390625, "objective/train/weight_avg": 0.9999991059303284, "objective/train/weighted_lm_loss": 1.2924408912658691, "objective/train/weights_max": 1.4721184968948364, "objective/train/weights_min": 0.40263333916664124, "theoretical_loss": 3.473792509163066, "tokens_seen": 1898971136 }, { "epoch": 0.15, "learning_rate": 0.00042882933483110006, "loss": 0.0673, "theoretical_loss": 3.473792509163066, "tokens_seen": 1898971136 }, { "epoch": 0.15, "learning_rate": 0.00042878921607959557, "loss": 0.071, "theoretical_loss": 3.47377247966939, "tokens_seen": 1899102208 }, { "epoch": 0.15, "learning_rate": 0.00042874909732809114, "loss": 0.0683, "theoretical_loss": 3.4737524519450993, "tokens_seen": 1899233280 }, { "epoch": 0.15, "learning_rate": 0.0004287089785765867, "loss": 0.07, "theoretical_loss": 3.4737324259899154, "tokens_seen": 1899364352 }, { "epoch": 0.15, "learning_rate": 0.0004286688598250822, "loss": 0.072, "theoretical_loss": 3.4737124018035597, "tokens_seen": 1899495424 }, { "epoch": 0.15, "learning_rate": 0.0004286287410735778, "loss": 0.0648, "theoretical_loss": 3.4736923793857537, "tokens_seen": 1899626496 }, { "epoch": 0.15, "learning_rate": 0.00042858862232207336, "loss": 0.0713, "theoretical_loss": 3.47367235873622, "tokens_seen": 1899757568 }, { "epoch": 0.15, "learning_rate": 0.00042854850357056893, "loss": 0.0693, "theoretical_loss": 3.4736523398546795, "tokens_seen": 1899888640 }, { "epoch": 0.15, "learning_rate": 0.00042850838481906444, "loss": 0.0675, "theoretical_loss": 3.4736323227408548, "tokens_seen": 1900019712 }, { "epoch": 0.15, "learning_rate": 0.00042846826606756, "loss": 0.0651, "theoretical_loss": 3.4736123073944674, "tokens_seen": 1900150784 }, { "epoch": 0.15, "learning_rate": 0.0004284281473160556, "loss": 0.0725, "theoretical_loss": 3.4735922938152397, "tokens_seen": 1900281856 }, { "epoch": 0.15, "learning_rate": 0.00042838802856455104, "loss": 0.0689, "theoretical_loss": 3.473572282002894, "tokens_seen": 1900412928 }, { "epoch": 0.15, "learning_rate": 0.0004283479098130466, "loss": 0.0676, "theoretical_loss": 3.4735522719571517, "tokens_seen": 1900544000 }, { "epoch": 0.15, "learning_rate": 0.0004283077910615422, "loss": 0.0696, "theoretical_loss": 3.4735322636777357, "tokens_seen": 1900675072 }, { "epoch": 0.15, "learning_rate": 0.0004282676723100377, "loss": 0.0669, "theoretical_loss": 3.473512257164368, "tokens_seen": 1900806144 }, { "epoch": 0.15, "learning_rate": 0.00042822755355853326, "loss": 0.0702, "theoretical_loss": 3.4734922524167713, "tokens_seen": 1900937216 }, { "epoch": 0.15, "learning_rate": 0.0004281874348070288, "loss": 0.0715, "theoretical_loss": 3.4734722494346673, "tokens_seen": 1901068288 }, { "epoch": 0.15, "learning_rate": 0.0004281473160555244, "loss": 0.0653, "theoretical_loss": 3.473452248217779, "tokens_seen": 1901199360 }, { "epoch": 0.15, "learning_rate": 0.0004281071973040199, "loss": 0.0697, "theoretical_loss": 3.473432248765829, "tokens_seen": 1901330432 }, { "epoch": 0.15, "learning_rate": 0.0004280670785525155, "loss": 0.0735, "theoretical_loss": 3.47341225107854, "tokens_seen": 1901461504 }, { "epoch": 0.15, "learning_rate": 0.00042802695980101105, "loss": 0.0679, "theoretical_loss": 3.473392255155634, "tokens_seen": 1901592576 }, { "epoch": 0.15, "learning_rate": 0.0004279868410495065, "loss": 0.0704, "theoretical_loss": 3.4733722609968343, "tokens_seen": 1901723648 }, { "epoch": 0.15, "learning_rate": 0.0004279467222980021, "loss": 0.0712, "theoretical_loss": 3.4733522686018636, "tokens_seen": 1901854720 }, { "epoch": 0.15, "learning_rate": 0.00042790660354649764, "loss": 0.0728, "theoretical_loss": 3.473332277970445, "tokens_seen": 1901985792 }, { "epoch": 0.15, "learning_rate": 0.00042786648479499316, "loss": 0.0684, "theoretical_loss": 3.4733122891023007, "tokens_seen": 1902116864 }, { "epoch": 0.15, "objective/train/advantage_avg": 0.0005795042961835861, "objective/train/docs_used": 693391, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2941027879714966, "objective/train/original_loss": 1.2941029071807861, "objective/train/theoretical_loss": 3.473292301997154, "objective/train/tokens_used": 272772576, "objective/train/value_avg": -0.0052947998046875, "objective/train/value_loss": 6.377628596965224e-05, "objective/train/value_max": -5.435943603515625e-05, "objective/train/value_min": -0.1773681640625, "objective/train/value_reward_corr": 0.6987091320368113, "objective/train/value_std": 0.00852203369140625, "objective/train/weight_avg": 1.0006109476089478, "objective/train/weighted_lm_loss": 1.294875144958496, "objective/train/weights_max": 1.0537636280059814, "objective/train/weights_min": 0.7196541428565979, "theoretical_loss": 3.473292301997154, "tokens_seen": 1902247936 }, { "epoch": 0.15, "learning_rate": 0.0004278263660434887, "loss": 0.0717, "theoretical_loss": 3.473292301997154, "tokens_seen": 1902247936 }, { "epoch": 0.15, "learning_rate": 0.0004277862472919843, "loss": 0.0712, "theoretical_loss": 3.473272316654729, "tokens_seen": 1902379008 }, { "epoch": 0.15, "learning_rate": 0.00042774612854047986, "loss": 0.0661, "theoretical_loss": 3.473252333074747, "tokens_seen": 1902510080 }, { "epoch": 0.15, "learning_rate": 0.0004277060097889754, "loss": 0.0665, "theoretical_loss": 3.4732323512569323, "tokens_seen": 1902641152 }, { "epoch": 0.15, "learning_rate": 0.00042766589103747095, "loss": 0.0723, "theoretical_loss": 3.473212371201008, "tokens_seen": 1902772224 }, { "epoch": 0.15, "learning_rate": 0.0004276257722859665, "loss": 0.066, "theoretical_loss": 3.473192392906697, "tokens_seen": 1902903296 }, { "epoch": 0.15, "learning_rate": 0.000427585653534462, "loss": 0.0721, "theoretical_loss": 3.473172416373723, "tokens_seen": 1903034368 }, { "epoch": 0.15, "learning_rate": 0.00042754553478295754, "loss": 0.0687, "theoretical_loss": 3.473152441601809, "tokens_seen": 1903165440 }, { "epoch": 0.15, "learning_rate": 0.0004275054160314531, "loss": 0.0686, "theoretical_loss": 3.4731324685906797, "tokens_seen": 1903296512 }, { "epoch": 0.15, "learning_rate": 0.0004274652972799486, "loss": 0.07, "theoretical_loss": 3.473112497340057, "tokens_seen": 1903427584 }, { "epoch": 0.15, "learning_rate": 0.0004274251785284442, "loss": 0.0683, "theoretical_loss": 3.4730925278496656, "tokens_seen": 1903558656 }, { "epoch": 0.15, "learning_rate": 0.00042738505977693976, "loss": 0.0681, "theoretical_loss": 3.473072560119229, "tokens_seen": 1903689728 }, { "epoch": 0.15, "learning_rate": 0.00042734494102543533, "loss": 0.0685, "theoretical_loss": 3.4730525941484705, "tokens_seen": 1903820800 }, { "epoch": 0.15, "learning_rate": 0.00042730482227393084, "loss": 0.0659, "theoretical_loss": 3.473032629937114, "tokens_seen": 1903951872 }, { "epoch": 0.15, "learning_rate": 0.0004272647035224264, "loss": 0.0699, "theoretical_loss": 3.473012667484884, "tokens_seen": 1904082944 }, { "epoch": 0.15, "learning_rate": 0.000427224584770922, "loss": 0.0714, "theoretical_loss": 3.472992706791504, "tokens_seen": 1904214016 }, { "epoch": 0.15, "learning_rate": 0.00042718446601941744, "loss": 0.0686, "theoretical_loss": 3.4729727478566974, "tokens_seen": 1904345088 }, { "epoch": 0.15, "learning_rate": 0.000427144347267913, "loss": 0.0674, "theoretical_loss": 3.472952790680189, "tokens_seen": 1904476160 }, { "epoch": 0.15, "learning_rate": 0.0004271042285164086, "loss": 0.0694, "theoretical_loss": 3.4729328352617026, "tokens_seen": 1904607232 }, { "epoch": 0.15, "learning_rate": 0.0004270641097649041, "loss": 0.0725, "theoretical_loss": 3.472912881600963, "tokens_seen": 1904738304 }, { "epoch": 0.15, "learning_rate": 0.00042702399101339966, "loss": 0.0669, "theoretical_loss": 3.4728929296976934, "tokens_seen": 1904869376 }, { "epoch": 0.15, "learning_rate": 0.00042698387226189523, "loss": 0.0689, "theoretical_loss": 3.4728729795516182, "tokens_seen": 1905000448 }, { "epoch": 0.15, "learning_rate": 0.0004269437535103908, "loss": 0.069, "theoretical_loss": 3.4728530311624626, "tokens_seen": 1905131520 }, { "epoch": 0.15, "learning_rate": 0.0004269036347588863, "loss": 0.0693, "theoretical_loss": 3.4728330845299507, "tokens_seen": 1905262592 }, { "epoch": 0.15, "learning_rate": 0.0004268635160073819, "loss": 0.0688, "theoretical_loss": 3.4728131396538067, "tokens_seen": 1905393664 }, { "epoch": 0.15, "objective/train/advantage_avg": 0.0002428995503578335, "objective/train/docs_used": 694687, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3665748834609985, "objective/train/original_loss": 1.366574764251709, "objective/train/theoretical_loss": 3.472793196533755, "objective/train/tokens_used": 276049376, "objective/train/value_avg": -0.00658416748046875, "objective/train/value_loss": 0.00036214495776221156, "objective/train/value_max": -4.07099723815918e-05, "objective/train/value_min": -0.7119140625, "objective/train/value_reward_corr": 0.5857805387012942, "objective/train/value_std": 0.01416015625, "objective/train/weight_avg": 1.0003992319107056, "objective/train/weighted_lm_loss": 1.366950273513794, "objective/train/weights_max": 2.0299432277679443, "objective/train/weights_min": 0.373872309923172, "theoretical_loss": 3.472793196533755, "tokens_seen": 1905524736 }, { "epoch": 0.15, "learning_rate": 0.00042682339725587745, "loss": 0.0737, "theoretical_loss": 3.472793196533755, "tokens_seen": 1905524736 }, { "epoch": 0.15, "learning_rate": 0.0004267832785043729, "loss": 0.0683, "theoretical_loss": 3.4727732551695207, "tokens_seen": 1905655808 }, { "epoch": 0.16, "learning_rate": 0.0004267431597528685, "loss": 0.0699, "theoretical_loss": 3.472753315560828, "tokens_seen": 1905786880 }, { "epoch": 0.16, "learning_rate": 0.00042670304100136404, "loss": 0.0686, "theoretical_loss": 3.472733377707402, "tokens_seen": 1905917952 }, { "epoch": 0.16, "learning_rate": 0.0004266629222498596, "loss": 0.0693, "theoretical_loss": 3.4727134416089678, "tokens_seen": 1906049024 }, { "epoch": 0.16, "learning_rate": 0.0004266228034983551, "loss": 0.0683, "theoretical_loss": 3.4726935072652494, "tokens_seen": 1906180096 }, { "epoch": 0.16, "learning_rate": 0.0004265826847468507, "loss": 0.0692, "theoretical_loss": 3.472673574675972, "tokens_seen": 1906311168 }, { "epoch": 0.16, "learning_rate": 0.00042654256599534626, "loss": 0.0728, "theoretical_loss": 3.472653643840861, "tokens_seen": 1906442240 }, { "epoch": 0.16, "learning_rate": 0.0004265024472438418, "loss": 0.0702, "theoretical_loss": 3.472633714759641, "tokens_seen": 1906573312 }, { "epoch": 0.16, "learning_rate": 0.00042646232849233735, "loss": 0.0721, "theoretical_loss": 3.472613787432037, "tokens_seen": 1906704384 }, { "epoch": 0.16, "learning_rate": 0.0004264222097408329, "loss": 0.0701, "theoretical_loss": 3.4725938618577743, "tokens_seen": 1906835456 }, { "epoch": 0.16, "learning_rate": 0.0004263820909893284, "loss": 0.0686, "theoretical_loss": 3.4725739380365788, "tokens_seen": 1906966528 }, { "epoch": 0.16, "learning_rate": 0.00042634197223782394, "loss": 0.0694, "theoretical_loss": 3.4725540159681745, "tokens_seen": 1907097600 }, { "epoch": 0.16, "learning_rate": 0.0004263018534863195, "loss": 0.0681, "theoretical_loss": 3.4725340956522874, "tokens_seen": 1907228672 }, { "epoch": 0.16, "learning_rate": 0.0004262617347348151, "loss": 0.069, "theoretical_loss": 3.4725141770886436, "tokens_seen": 1907359744 }, { "epoch": 0.16, "learning_rate": 0.0004262216159833106, "loss": 0.0671, "theoretical_loss": 3.472494260276968, "tokens_seen": 1907490816 }, { "epoch": 0.16, "learning_rate": 0.00042618149723180616, "loss": 0.0695, "theoretical_loss": 3.4724743452169857, "tokens_seen": 1907621888 }, { "epoch": 0.16, "learning_rate": 0.00042614137848030173, "loss": 0.0689, "theoretical_loss": 3.4724544319084227, "tokens_seen": 1907752960 }, { "epoch": 0.16, "learning_rate": 0.00042610125972879724, "loss": 0.0682, "theoretical_loss": 3.4724345203510047, "tokens_seen": 1907884032 }, { "epoch": 0.16, "learning_rate": 0.0004260611409772928, "loss": 0.0701, "theoretical_loss": 3.4724146105444573, "tokens_seen": 1908015104 }, { "epoch": 0.16, "learning_rate": 0.0004260210222257884, "loss": 0.069, "theoretical_loss": 3.472394702488506, "tokens_seen": 1908146176 }, { "epoch": 0.16, "learning_rate": 0.00042598090347428384, "loss": 0.0718, "theoretical_loss": 3.4723747961828773, "tokens_seen": 1908277248 }, { "epoch": 0.16, "learning_rate": 0.0004259407847227794, "loss": 0.0677, "theoretical_loss": 3.4723548916272966, "tokens_seen": 1908408320 }, { "epoch": 0.16, "learning_rate": 0.000425900665971275, "loss": 0.071, "theoretical_loss": 3.4723349888214905, "tokens_seen": 1908539392 }, { "epoch": 0.16, "learning_rate": 0.00042586054721977055, "loss": 0.0655, "theoretical_loss": 3.4723150877651836, "tokens_seen": 1908670464 }, { "epoch": 0.16, "objective/train/advantage_avg": 0.0019129390129819512, "objective/train/docs_used": 695871, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3882313966751099, "objective/train/original_loss": 1.3882315158843994, "objective/train/theoretical_loss": 3.4722951884581037, "objective/train/tokens_used": 279326176, "objective/train/value_avg": -0.007778167724609375, "objective/train/value_loss": 0.00020979112014174461, "objective/train/value_max": -4.83393669128418e-05, "objective/train/value_min": -0.379150390625, "objective/train/value_reward_corr": 0.5161679792717546, "objective/train/value_std": 0.01361846923828125, "objective/train/weight_avg": 1.0020109415054321, "objective/train/weighted_lm_loss": 1.3899874687194824, "objective/train/weights_max": 1.4610427618026733, "objective/train/weights_min": 0.37335923314094543, "theoretical_loss": 3.4722951884581037, "tokens_seen": 1908801536 }, { "epoch": 0.16, "learning_rate": 0.00042582042846826606, "loss": 0.0697, "theoretical_loss": 3.4722951884581037, "tokens_seen": 1908801536 }, { "epoch": 0.16, "learning_rate": 0.00042578030971676163, "loss": 0.0652, "theoretical_loss": 3.472275290899976, "tokens_seen": 1908932608 }, { "epoch": 0.16, "learning_rate": 0.0004257401909652572, "loss": 0.0673, "theoretical_loss": 3.472255395090527, "tokens_seen": 1909063680 }, { "epoch": 0.16, "learning_rate": 0.0004257000722137527, "loss": 0.0697, "theoretical_loss": 3.472235501029483, "tokens_seen": 1909194752 }, { "epoch": 0.16, "learning_rate": 0.0004256599534622483, "loss": 0.0709, "theoretical_loss": 3.47221560871657, "tokens_seen": 1909325824 }, { "epoch": 0.16, "learning_rate": 0.00042561983471074385, "loss": 0.071, "theoretical_loss": 3.4721957181515144, "tokens_seen": 1909456896 }, { "epoch": 0.16, "learning_rate": 0.0004255797159592393, "loss": 0.0722, "theoretical_loss": 3.4721758293340432, "tokens_seen": 1909587968 }, { "epoch": 0.16, "learning_rate": 0.0004255395972077349, "loss": 0.072, "theoretical_loss": 3.472155942263883, "tokens_seen": 1909719040 }, { "epoch": 0.16, "learning_rate": 0.00042549947845623045, "loss": 0.0684, "theoretical_loss": 3.472136056940759, "tokens_seen": 1909850112 }, { "epoch": 0.16, "learning_rate": 0.000425459359704726, "loss": 0.0649, "theoretical_loss": 3.4721161733643995, "tokens_seen": 1909981184 }, { "epoch": 0.16, "learning_rate": 0.00042541924095322153, "loss": 0.0627, "theoretical_loss": 3.4720962915345304, "tokens_seen": 1910112256 }, { "epoch": 0.16, "learning_rate": 0.0004253791222017171, "loss": 0.0677, "theoretical_loss": 3.472076411450878, "tokens_seen": 1910243328 }, { "epoch": 0.16, "learning_rate": 0.00042533900345021266, "loss": 0.0658, "theoretical_loss": 3.4720565331131708, "tokens_seen": 1910374400 }, { "epoch": 0.16, "learning_rate": 0.0004252988846987082, "loss": 0.0672, "theoretical_loss": 3.472036656521134, "tokens_seen": 1910505472 }, { "epoch": 0.16, "learning_rate": 0.00042525876594720375, "loss": 0.0755, "theoretical_loss": 3.4720167816744953, "tokens_seen": 1910636544 }, { "epoch": 0.16, "learning_rate": 0.0004252186471956993, "loss": 0.0662, "theoretical_loss": 3.4719969085729816, "tokens_seen": 1910767616 }, { "epoch": 0.16, "learning_rate": 0.0004251785284441948, "loss": 0.0659, "theoretical_loss": 3.4719770372163197, "tokens_seen": 1910898688 }, { "epoch": 0.16, "learning_rate": 0.00042513840969269034, "loss": 0.0693, "theoretical_loss": 3.4719571676042373, "tokens_seen": 1911029760 }, { "epoch": 0.16, "learning_rate": 0.0004250982909411859, "loss": 0.0716, "theoretical_loss": 3.471937299736461, "tokens_seen": 1911160832 }, { "epoch": 0.16, "learning_rate": 0.0004250581721896815, "loss": 0.0669, "theoretical_loss": 3.4719174336127185, "tokens_seen": 1911291904 }, { "epoch": 0.16, "learning_rate": 0.000425018053438177, "loss": 0.0696, "theoretical_loss": 3.471897569232737, "tokens_seen": 1911422976 }, { "epoch": 0.16, "learning_rate": 0.00042497793468667256, "loss": 0.0691, "theoretical_loss": 3.471877706596244, "tokens_seen": 1911554048 }, { "epoch": 0.16, "learning_rate": 0.00042493781593516813, "loss": 0.0672, "theoretical_loss": 3.4718578457029663, "tokens_seen": 1911685120 }, { "epoch": 0.16, "learning_rate": 0.00042489769718366365, "loss": 0.0733, "theoretical_loss": 3.4718379865526323, "tokens_seen": 1911816192 }, { "epoch": 0.16, "learning_rate": 0.0004248575784321592, "loss": 0.0672, "theoretical_loss": 3.4718181291449692, "tokens_seen": 1911947264 }, { "epoch": 0.16, "objective/train/advantage_avg": 0.00016959662025328726, "objective/train/docs_used": 697086, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2779778242111206, "objective/train/original_loss": 1.277977705001831, "objective/train/theoretical_loss": 3.4717982734797044, "objective/train/tokens_used": 282602976, "objective/train/value_avg": -0.0084228515625, "objective/train/value_loss": 0.0001938844652613625, "objective/train/value_max": -6.300210952758789e-05, "objective/train/value_min": -0.259521484375, "objective/train/value_reward_corr": 0.8159301681735474, "objective/train/value_std": 0.01763916015625, "objective/train/weight_avg": 1.0002610683441162, "objective/train/weighted_lm_loss": 1.2788281440734863, "objective/train/weights_max": 1.1402249336242676, "objective/train/weights_min": 0.3925389051437378, "theoretical_loss": 3.4717982734797044, "tokens_seen": 1912078336 }, { "epoch": 0.16, "learning_rate": 0.0004248174596806548, "loss": 0.0684, "theoretical_loss": 3.4717982734797044, "tokens_seen": 1912078336 }, { "epoch": 0.16, "learning_rate": 0.00042477734092915024, "loss": 0.067, "theoretical_loss": 3.471778419556566, "tokens_seen": 1912209408 }, { "epoch": 0.16, "learning_rate": 0.0004247372221776458, "loss": 0.0716, "theoretical_loss": 3.4717585673752813, "tokens_seen": 1912340480 }, { "epoch": 0.16, "learning_rate": 0.0004246971034261414, "loss": 0.0677, "theoretical_loss": 3.4717387169355787, "tokens_seen": 1912471552 }, { "epoch": 0.16, "learning_rate": 0.00042465698467463695, "loss": 0.0753, "theoretical_loss": 3.4717188682371853, "tokens_seen": 1912602624 }, { "epoch": 0.16, "learning_rate": 0.00042461686592313246, "loss": 0.0699, "theoretical_loss": 3.4716990212798295, "tokens_seen": 1912733696 }, { "epoch": 0.16, "learning_rate": 0.00042457674717162803, "loss": 0.0696, "theoretical_loss": 3.471679176063239, "tokens_seen": 1912864768 }, { "epoch": 0.16, "learning_rate": 0.0004245366284201236, "loss": 0.0708, "theoretical_loss": 3.4716593325871425, "tokens_seen": 1912995840 }, { "epoch": 0.16, "learning_rate": 0.0004244965096686191, "loss": 0.0711, "theoretical_loss": 3.4716394908512678, "tokens_seen": 1913126912 }, { "epoch": 0.16, "learning_rate": 0.0004244563909171147, "loss": 0.0703, "theoretical_loss": 3.4716196508553425, "tokens_seen": 1913257984 }, { "epoch": 0.16, "learning_rate": 0.00042441627216561025, "loss": 0.0757, "theoretical_loss": 3.471599812599095, "tokens_seen": 1913389056 }, { "epoch": 0.16, "learning_rate": 0.0004243761534141057, "loss": 0.0686, "theoretical_loss": 3.4715799760822548, "tokens_seen": 1913520128 }, { "epoch": 0.16, "learning_rate": 0.0004243360346626013, "loss": 0.0722, "theoretical_loss": 3.4715601413045487, "tokens_seen": 1913651200 }, { "epoch": 0.16, "learning_rate": 0.00042429591591109685, "loss": 0.0672, "theoretical_loss": 3.4715403082657064, "tokens_seen": 1913782272 }, { "epoch": 0.16, "learning_rate": 0.0004242557971595924, "loss": 0.0699, "theoretical_loss": 3.4715204769654555, "tokens_seen": 1913913344 }, { "epoch": 0.16, "learning_rate": 0.00042421567840808793, "loss": 0.0708, "theoretical_loss": 3.4715006474035244, "tokens_seen": 1914044416 }, { "epoch": 0.16, "learning_rate": 0.0004241755596565835, "loss": 0.0672, "theoretical_loss": 3.471480819579643, "tokens_seen": 1914175488 }, { "epoch": 0.16, "learning_rate": 0.00042413544090507907, "loss": 0.0676, "theoretical_loss": 3.4714609934935385, "tokens_seen": 1914306560 }, { "epoch": 0.16, "learning_rate": 0.0004240953221535746, "loss": 0.0711, "theoretical_loss": 3.4714411691449403, "tokens_seen": 1914437632 }, { "epoch": 0.16, "learning_rate": 0.00042405520340207015, "loss": 0.0694, "theoretical_loss": 3.4714213465335773, "tokens_seen": 1914568704 }, { "epoch": 0.16, "learning_rate": 0.0004240150846505657, "loss": 0.0694, "theoretical_loss": 3.4714015256591777, "tokens_seen": 1914699776 }, { "epoch": 0.16, "learning_rate": 0.00042397496589906123, "loss": 0.0695, "theoretical_loss": 3.4713817065214707, "tokens_seen": 1914830848 }, { "epoch": 0.16, "learning_rate": 0.00042393484714755675, "loss": 0.07, "theoretical_loss": 3.4713618891201863, "tokens_seen": 1914961920 }, { "epoch": 0.16, "learning_rate": 0.0004238947283960523, "loss": 0.0687, "theoretical_loss": 3.4713420734550517, "tokens_seen": 1915092992 }, { "epoch": 0.16, "learning_rate": 0.0004238546096445479, "loss": 0.0715, "theoretical_loss": 3.471322259525798, "tokens_seen": 1915224064 }, { "epoch": 0.16, "objective/train/advantage_avg": 0.0007327937637455761, "objective/train/docs_used": 698281, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3470486402511597, "objective/train/original_loss": 1.3470487594604492, "objective/train/theoretical_loss": 3.4713024473321523, "objective/train/tokens_used": 285879776, "objective/train/value_avg": -0.0086212158203125, "objective/train/value_loss": 0.00041456177132204175, "objective/train/value_max": -8.416175842285156e-05, "objective/train/value_min": -0.73388671875, "objective/train/value_reward_corr": 0.6727540007958261, "objective/train/value_std": 0.0185089111328125, "objective/train/weight_avg": 1.0009160041809082, "objective/train/weighted_lm_loss": 1.3480035066604614, "objective/train/weights_max": 2.0087709426879883, "objective/train/weights_min": 0.37283265590667725, "theoretical_loss": 3.4713024473321523, "tokens_seen": 1915355136 }, { "epoch": 0.16, "learning_rate": 0.0004238144908930434, "loss": 0.0678, "theoretical_loss": 3.4713024473321523, "tokens_seen": 1915355136 }, { "epoch": 0.16, "learning_rate": 0.00042377437214153896, "loss": 0.0679, "theoretical_loss": 3.4712826368738456, "tokens_seen": 1915486208 }, { "epoch": 0.16, "learning_rate": 0.00042373425339003453, "loss": 0.0683, "theoretical_loss": 3.4712628281506057, "tokens_seen": 1915617280 }, { "epoch": 0.16, "learning_rate": 0.00042369413463853005, "loss": 0.0715, "theoretical_loss": 3.471243021162163, "tokens_seen": 1915748352 }, { "epoch": 0.16, "learning_rate": 0.0004236540158870256, "loss": 0.0669, "theoretical_loss": 3.471223215908246, "tokens_seen": 1915879424 }, { "epoch": 0.16, "learning_rate": 0.0004236138971355212, "loss": 0.0725, "theoretical_loss": 3.4712034123885855, "tokens_seen": 1916010496 }, { "epoch": 0.16, "learning_rate": 0.0004235737783840167, "loss": 0.0639, "theoretical_loss": 3.47118361060291, "tokens_seen": 1916141568 }, { "epoch": 0.16, "learning_rate": 0.0004235336596325122, "loss": 0.0729, "theoretical_loss": 3.471163810550949, "tokens_seen": 1916272640 }, { "epoch": 0.16, "learning_rate": 0.0004234935408810078, "loss": 0.0719, "theoretical_loss": 3.4711440122324326, "tokens_seen": 1916403712 }, { "epoch": 0.16, "learning_rate": 0.00042345342212950335, "loss": 0.0669, "theoretical_loss": 3.47112421564709, "tokens_seen": 1916534784 }, { "epoch": 0.16, "learning_rate": 0.00042341330337799886, "loss": 0.0736, "theoretical_loss": 3.471104420794651, "tokens_seen": 1916665856 }, { "epoch": 0.16, "learning_rate": 0.00042337318462649443, "loss": 0.0697, "theoretical_loss": 3.4710846276748466, "tokens_seen": 1916796928 }, { "epoch": 0.16, "learning_rate": 0.00042333306587499, "loss": 0.0732, "theoretical_loss": 3.471064836287405, "tokens_seen": 1916928000 }, { "epoch": 0.16, "learning_rate": 0.0004232929471234855, "loss": 0.0721, "theoretical_loss": 3.4710450466320575, "tokens_seen": 1917059072 }, { "epoch": 0.16, "learning_rate": 0.0004232528283719811, "loss": 0.0706, "theoretical_loss": 3.471025258708533, "tokens_seen": 1917190144 }, { "epoch": 0.16, "learning_rate": 0.00042321270962047665, "loss": 0.0689, "theoretical_loss": 3.471005472516562, "tokens_seen": 1917321216 }, { "epoch": 0.16, "learning_rate": 0.00042317259086897216, "loss": 0.0689, "theoretical_loss": 3.470985688055875, "tokens_seen": 1917452288 }, { "epoch": 0.16, "learning_rate": 0.0004231324721174677, "loss": 0.0696, "theoretical_loss": 3.4709659053262016, "tokens_seen": 1917583360 }, { "epoch": 0.16, "learning_rate": 0.00042309235336596325, "loss": 0.0727, "theoretical_loss": 3.4709461243272726, "tokens_seen": 1917714432 }, { "epoch": 0.16, "learning_rate": 0.0004230522346144588, "loss": 0.0687, "theoretical_loss": 3.470926345058818, "tokens_seen": 1917845504 }, { "epoch": 0.16, "learning_rate": 0.00042301211586295433, "loss": 0.0666, "theoretical_loss": 3.4709065675205677, "tokens_seen": 1917976576 }, { "epoch": 0.16, "learning_rate": 0.0004229719971114499, "loss": 0.0728, "theoretical_loss": 3.4708867917122532, "tokens_seen": 1918107648 }, { "epoch": 0.16, "learning_rate": 0.00042293187835994547, "loss": 0.0673, "theoretical_loss": 3.470867017633604, "tokens_seen": 1918238720 }, { "epoch": 0.16, "learning_rate": 0.000422891759608441, "loss": 0.0681, "theoretical_loss": 3.470847245284351, "tokens_seen": 1918369792 }, { "epoch": 0.16, "learning_rate": 0.00042285164085693655, "loss": 0.0714, "theoretical_loss": 3.470827474664225, "tokens_seen": 1918500864 }, { "epoch": 0.16, "objective/train/advantage_avg": 0.0011065874714404345, "objective/train/docs_used": 699453, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3870476484298706, "objective/train/original_loss": 1.3870477676391602, "objective/train/theoretical_loss": 3.4708077057729567, "objective/train/tokens_used": 289156576, "objective/train/value_avg": -0.00675201416015625, "objective/train/value_loss": 0.00024777991347946227, "objective/train/value_max": -3.0934810638427734e-05, "objective/train/value_min": -0.3955078125, "objective/train/value_reward_corr": 0.6285902284449576, "objective/train/value_std": 0.0134735107421875, "objective/train/weight_avg": 1.001218318939209, "objective/train/weighted_lm_loss": 1.387878656387329, "objective/train/weights_max": 1.3305058479309082, "objective/train/weights_min": 0.37021923065185547, "theoretical_loss": 3.4708077057729567, "tokens_seen": 1918631936 }, { "epoch": 0.16, "learning_rate": 0.0004228115221054321, "loss": 0.0701, "theoretical_loss": 3.4708077057729567, "tokens_seen": 1918631936 }, { "epoch": 0.16, "learning_rate": 0.00042277140335392763, "loss": 0.0727, "theoretical_loss": 3.4707879386102762, "tokens_seen": 1918763008 }, { "epoch": 0.16, "learning_rate": 0.00042273128460242315, "loss": 0.0659, "theoretical_loss": 3.4707681731759155, "tokens_seen": 1918894080 }, { "epoch": 0.16, "learning_rate": 0.0004226911658509187, "loss": 0.0671, "theoretical_loss": 3.470748409469604, "tokens_seen": 1919025152 }, { "epoch": 0.16, "learning_rate": 0.0004226510470994143, "loss": 0.0663, "theoretical_loss": 3.4707286474910735, "tokens_seen": 1919156224 }, { "epoch": 0.16, "learning_rate": 0.0004226109283479098, "loss": 0.0661, "theoretical_loss": 3.4707088872400544, "tokens_seen": 1919287296 }, { "epoch": 0.16, "learning_rate": 0.00042257080959640537, "loss": 0.0656, "theoretical_loss": 3.4706891287162787, "tokens_seen": 1919418368 }, { "epoch": 0.16, "learning_rate": 0.00042253069084490093, "loss": 0.0652, "theoretical_loss": 3.4706693719194766, "tokens_seen": 1919549440 }, { "epoch": 0.16, "learning_rate": 0.00042249057209339645, "loss": 0.0692, "theoretical_loss": 3.470649616849379, "tokens_seen": 1919680512 }, { "epoch": 0.16, "learning_rate": 0.000422450453341892, "loss": 0.0721, "theoretical_loss": 3.4706298635057182, "tokens_seen": 1919811584 }, { "epoch": 0.16, "learning_rate": 0.0004224103345903876, "loss": 0.0713, "theoretical_loss": 3.4706101118882247, "tokens_seen": 1919942656 }, { "epoch": 0.16, "learning_rate": 0.0004223702158388831, "loss": 0.0688, "theoretical_loss": 3.4705903619966296, "tokens_seen": 1920073728 }, { "epoch": 0.16, "learning_rate": 0.0004223300970873786, "loss": 0.0708, "theoretical_loss": 3.4705706138306653, "tokens_seen": 1920204800 }, { "epoch": 0.16, "learning_rate": 0.0004222899783358742, "loss": 0.0684, "theoretical_loss": 3.4705508673900622, "tokens_seen": 1920335872 }, { "epoch": 0.16, "learning_rate": 0.00042224985958436975, "loss": 0.0667, "theoretical_loss": 3.4705311226745525, "tokens_seen": 1920466944 }, { "epoch": 0.16, "learning_rate": 0.00042220974083286526, "loss": 0.0693, "theoretical_loss": 3.470511379683868, "tokens_seen": 1920598016 }, { "epoch": 0.16, "learning_rate": 0.00042216962208136083, "loss": 0.0698, "theoretical_loss": 3.470491638417739, "tokens_seen": 1920729088 }, { "epoch": 0.16, "learning_rate": 0.0004221295033298564, "loss": 0.0709, "theoretical_loss": 3.470471898875899, "tokens_seen": 1920860160 }, { "epoch": 0.16, "learning_rate": 0.00042208938457835197, "loss": 0.0733, "theoretical_loss": 3.470452161058078, "tokens_seen": 1920991232 }, { "epoch": 0.16, "learning_rate": 0.0004220492658268475, "loss": 0.0736, "theoretical_loss": 3.4704324249640086, "tokens_seen": 1921122304 }, { "epoch": 0.16, "learning_rate": 0.00042200914707534305, "loss": 0.0683, "theoretical_loss": 3.470412690593423, "tokens_seen": 1921253376 }, { "epoch": 0.16, "learning_rate": 0.00042196902832383857, "loss": 0.0675, "theoretical_loss": 3.470392957946053, "tokens_seen": 1921384448 }, { "epoch": 0.16, "learning_rate": 0.0004219289095723341, "loss": 0.0718, "theoretical_loss": 3.4703732270216303, "tokens_seen": 1921515520 }, { "epoch": 0.16, "learning_rate": 0.00042188879082082965, "loss": 0.0634, "theoretical_loss": 3.470353497819887, "tokens_seen": 1921646592 }, { "epoch": 0.16, "learning_rate": 0.0004218486720693252, "loss": 0.0702, "theoretical_loss": 3.470333770340555, "tokens_seen": 1921777664 }, { "epoch": 0.16, "objective/train/advantage_avg": 0.0010013426654040813, "objective/train/docs_used": 700743, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4455525875091553, "objective/train/original_loss": 1.4455525875091553, "objective/train/theoretical_loss": 3.4703140445833673, "objective/train/tokens_used": 292433376, "objective/train/value_avg": -0.0068206787109375, "objective/train/value_loss": 0.00025284229195676744, "objective/train/value_max": -7.140636444091797e-05, "objective/train/value_min": -0.98876953125, "objective/train/value_reward_corr": 0.8202248838835473, "objective/train/value_std": 0.0224456787109375, "objective/train/weight_avg": 1.001115083694458, "objective/train/weighted_lm_loss": 1.4460923671722412, "objective/train/weights_max": 1.2912558317184448, "objective/train/weights_min": 0.4256550669670105, "theoretical_loss": 3.4703140445833673, "tokens_seen": 1921908736 }, { "epoch": 0.16, "learning_rate": 0.00042180855331782073, "loss": 0.0709, "theoretical_loss": 3.4703140445833673, "tokens_seen": 1921908736 }, { "epoch": 0.16, "learning_rate": 0.0004217684345663163, "loss": 0.07, "theoretical_loss": 3.4702943205480548, "tokens_seen": 1922039808 }, { "epoch": 0.16, "learning_rate": 0.00042172831581481187, "loss": 0.0691, "theoretical_loss": 3.4702745982343512, "tokens_seen": 1922170880 }, { "epoch": 0.17, "learning_rate": 0.00042168819706330744, "loss": 0.071, "theoretical_loss": 3.470254877641988, "tokens_seen": 1922301952 }, { "epoch": 0.17, "learning_rate": 0.00042164807831180295, "loss": 0.0682, "theoretical_loss": 3.4702351587706977, "tokens_seen": 1922433024 }, { "epoch": 0.17, "learning_rate": 0.0004216079595602985, "loss": 0.0736, "theoretical_loss": 3.470215441620213, "tokens_seen": 1922564096 }, { "epoch": 0.17, "learning_rate": 0.00042156784080879403, "loss": 0.0675, "theoretical_loss": 3.470195726190266, "tokens_seen": 1922695168 }, { "epoch": 0.17, "learning_rate": 0.00042152772205728955, "loss": 0.0699, "theoretical_loss": 3.47017601248059, "tokens_seen": 1922826240 }, { "epoch": 0.17, "learning_rate": 0.0004214876033057851, "loss": 0.0699, "theoretical_loss": 3.470156300490917, "tokens_seen": 1922957312 }, { "epoch": 0.17, "learning_rate": 0.0004214474845542807, "loss": 0.069, "theoretical_loss": 3.4701365902209798, "tokens_seen": 1923088384 }, { "epoch": 0.17, "learning_rate": 0.0004214073658027762, "loss": 0.0694, "theoretical_loss": 3.4701168816705117, "tokens_seen": 1923219456 }, { "epoch": 0.17, "learning_rate": 0.00042136724705127177, "loss": 0.0689, "theoretical_loss": 3.4700971748392453, "tokens_seen": 1923350528 }, { "epoch": 0.17, "learning_rate": 0.00042132712829976734, "loss": 0.0654, "theoretical_loss": 3.4700774697269128, "tokens_seen": 1923481600 }, { "epoch": 0.17, "learning_rate": 0.0004212870095482629, "loss": 0.0686, "theoretical_loss": 3.470057766333248, "tokens_seen": 1923612672 }, { "epoch": 0.17, "learning_rate": 0.0004212468907967584, "loss": 0.0665, "theoretical_loss": 3.4700380646579836, "tokens_seen": 1923743744 }, { "epoch": 0.17, "learning_rate": 0.000421206772045254, "loss": 0.0681, "theoretical_loss": 3.4700183647008522, "tokens_seen": 1923874816 }, { "epoch": 0.17, "learning_rate": 0.0004211666532937495, "loss": 0.0694, "theoretical_loss": 3.469998666461588, "tokens_seen": 1924005888 }, { "epoch": 0.17, "learning_rate": 0.000421126534542245, "loss": 0.0721, "theoretical_loss": 3.4699789699399233, "tokens_seen": 1924136960 }, { "epoch": 0.17, "learning_rate": 0.0004210864157907406, "loss": 0.0658, "theoretical_loss": 3.469959275135592, "tokens_seen": 1924268032 }, { "epoch": 0.17, "learning_rate": 0.00042104629703923615, "loss": 0.0706, "theoretical_loss": 3.469939582048326, "tokens_seen": 1924399104 }, { "epoch": 0.17, "learning_rate": 0.00042100617828773167, "loss": 0.0697, "theoretical_loss": 3.4699198906778603, "tokens_seen": 1924530176 }, { "epoch": 0.17, "learning_rate": 0.00042096605953622723, "loss": 0.0656, "theoretical_loss": 3.469900201023928, "tokens_seen": 1924661248 }, { "epoch": 0.17, "learning_rate": 0.0004209259407847228, "loss": 0.0659, "theoretical_loss": 3.469880513086262, "tokens_seen": 1924792320 }, { "epoch": 0.17, "learning_rate": 0.00042088582203321837, "loss": 0.07, "theoretical_loss": 3.469860826864596, "tokens_seen": 1924923392 }, { "epoch": 0.17, "learning_rate": 0.0004208457032817139, "loss": 0.0714, "theoretical_loss": 3.469841142358664, "tokens_seen": 1925054464 }, { "epoch": 0.17, "objective/train/advantage_avg": 0.0013288449263200164, "objective/train/docs_used": 701973, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4720760583877563, "objective/train/original_loss": 1.4720759391784668, "objective/train/theoretical_loss": 3.4698214595681995, "objective/train/tokens_used": 295710176, "objective/train/value_avg": -0.01050567626953125, "objective/train/value_loss": 0.0003967011289205402, "objective/train/value_max": -8.749961853027344e-05, "objective/train/value_min": -0.591796875, "objective/train/value_reward_corr": 0.5589192701842807, "objective/train/value_std": 0.015869140625, "objective/train/weight_avg": 1.0014991760253906, "objective/train/weighted_lm_loss": 1.473642110824585, "objective/train/weights_max": 1.3908729553222656, "objective/train/weights_min": 0.37064456939697266, "theoretical_loss": 3.4698214595681995, "tokens_seen": 1925185536 }, { "epoch": 0.17, "learning_rate": 0.00042080558453020945, "loss": 0.0725, "theoretical_loss": 3.4698214595681995, "tokens_seen": 1925185536 }, { "epoch": 0.17, "learning_rate": 0.00042076546577870497, "loss": 0.0728, "theoretical_loss": 3.469801778492936, "tokens_seen": 1925316608 }, { "epoch": 0.17, "learning_rate": 0.0004207253470272005, "loss": 0.0707, "theoretical_loss": 3.4697820991326074, "tokens_seen": 1925447680 }, { "epoch": 0.17, "learning_rate": 0.00042068522827569605, "loss": 0.0713, "theoretical_loss": 3.4697624214869474, "tokens_seen": 1925578752 }, { "epoch": 0.17, "learning_rate": 0.0004206451095241916, "loss": 0.0704, "theoretical_loss": 3.46974274555569, "tokens_seen": 1925709824 }, { "epoch": 0.17, "learning_rate": 0.00042060499077268713, "loss": 0.0708, "theoretical_loss": 3.4697230713385694, "tokens_seen": 1925840896 }, { "epoch": 0.17, "learning_rate": 0.0004205648720211827, "loss": 0.0657, "theoretical_loss": 3.4697033988353194, "tokens_seen": 1925971968 }, { "epoch": 0.17, "learning_rate": 0.00042052475326967827, "loss": 0.0706, "theoretical_loss": 3.469683728045674, "tokens_seen": 1926103040 }, { "epoch": 0.17, "learning_rate": 0.00042048463451817384, "loss": 0.0701, "theoretical_loss": 3.4696640589693675, "tokens_seen": 1926234112 }, { "epoch": 0.17, "learning_rate": 0.00042044451576666935, "loss": 0.0712, "theoretical_loss": 3.469644391606134, "tokens_seen": 1926365184 }, { "epoch": 0.17, "learning_rate": 0.0004204043970151649, "loss": 0.0645, "theoretical_loss": 3.4696247259557076, "tokens_seen": 1926496256 }, { "epoch": 0.17, "learning_rate": 0.0004203642782636605, "loss": 0.0716, "theoretical_loss": 3.4696050620178234, "tokens_seen": 1926627328 }, { "epoch": 0.17, "learning_rate": 0.00042032415951215595, "loss": 0.0711, "theoretical_loss": 3.469585399792215, "tokens_seen": 1926758400 }, { "epoch": 0.17, "learning_rate": 0.0004202840407606515, "loss": 0.0695, "theoretical_loss": 3.4695657392786163, "tokens_seen": 1926889472 }, { "epoch": 0.17, "learning_rate": 0.0004202439220091471, "loss": 0.0677, "theoretical_loss": 3.4695460804767633, "tokens_seen": 1927020544 }, { "epoch": 0.17, "learning_rate": 0.0004202038032576426, "loss": 0.0678, "theoretical_loss": 3.469526423386389, "tokens_seen": 1927151616 }, { "epoch": 0.17, "learning_rate": 0.00042016368450613817, "loss": 0.0668, "theoretical_loss": 3.4695067680072293, "tokens_seen": 1927282688 }, { "epoch": 0.17, "learning_rate": 0.00042012356575463374, "loss": 0.0708, "theoretical_loss": 3.4694871143390182, "tokens_seen": 1927413760 }, { "epoch": 0.17, "learning_rate": 0.0004200834470031293, "loss": 0.0671, "theoretical_loss": 3.469467462381491, "tokens_seen": 1927544832 }, { "epoch": 0.17, "learning_rate": 0.0004200433282516248, "loss": 0.0722, "theoretical_loss": 3.4694478121343817, "tokens_seen": 1927675904 }, { "epoch": 0.17, "learning_rate": 0.0004200032095001204, "loss": 0.0704, "theoretical_loss": 3.469428163597426, "tokens_seen": 1927806976 }, { "epoch": 0.17, "learning_rate": 0.00041996309074861596, "loss": 0.0692, "theoretical_loss": 3.4694085167703577, "tokens_seen": 1927938048 }, { "epoch": 0.17, "learning_rate": 0.0004199229719971114, "loss": 0.0671, "theoretical_loss": 3.4693888716529124, "tokens_seen": 1928069120 }, { "epoch": 0.17, "learning_rate": 0.000419882853245607, "loss": 0.0668, "theoretical_loss": 3.469369228244825, "tokens_seen": 1928200192 }, { "epoch": 0.17, "learning_rate": 0.00041984273449410255, "loss": 0.0689, "theoretical_loss": 3.4693495865458313, "tokens_seen": 1928331264 }, { "epoch": 0.17, "objective/train/advantage_avg": 0.0011218232102692127, "objective/train/docs_used": 703126, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3255513906478882, "objective/train/original_loss": 1.3255512714385986, "objective/train/theoretical_loss": 3.4693299465556655, "objective/train/tokens_used": 298986976, "objective/train/value_avg": -0.01129150390625, "objective/train/value_loss": 0.000299301726045087, "objective/train/value_max": -6.812810897827148e-05, "objective/train/value_min": -0.5302734375, "objective/train/value_reward_corr": 0.6673834485593564, "objective/train/value_std": 0.0173187255859375, "objective/train/weight_avg": 1.0012623071670532, "objective/train/weighted_lm_loss": 1.3270624876022339, "objective/train/weights_max": 1.5635205507278442, "objective/train/weights_min": 0.41369375586509705, "theoretical_loss": 3.4693299465556655, "tokens_seen": 1928462336 }, { "epoch": 0.17, "learning_rate": 0.00041980261574259807, "loss": 0.0723, "theoretical_loss": 3.4693299465556655, "tokens_seen": 1928462336 }, { "epoch": 0.17, "learning_rate": 0.00041976249699109363, "loss": 0.0704, "theoretical_loss": 3.4693103082740633, "tokens_seen": 1928593408 }, { "epoch": 0.17, "learning_rate": 0.0004197223782395892, "loss": 0.071, "theoretical_loss": 3.4692906717007594, "tokens_seen": 1928724480 }, { "epoch": 0.17, "learning_rate": 0.00041968225948808477, "loss": 0.0703, "theoretical_loss": 3.4692710368354898, "tokens_seen": 1928855552 }, { "epoch": 0.17, "learning_rate": 0.0004196421407365803, "loss": 0.073, "theoretical_loss": 3.4692514036779896, "tokens_seen": 1928986624 }, { "epoch": 0.17, "learning_rate": 0.00041960202198507585, "loss": 0.0681, "theoretical_loss": 3.469231772227994, "tokens_seen": 1929117696 }, { "epoch": 0.17, "learning_rate": 0.0004195619032335714, "loss": 0.0684, "theoretical_loss": 3.4692121424852393, "tokens_seen": 1929248768 }, { "epoch": 0.17, "learning_rate": 0.0004195217844820669, "loss": 0.0678, "theoretical_loss": 3.4691925144494604, "tokens_seen": 1929379840 }, { "epoch": 0.17, "learning_rate": 0.00041948166573056245, "loss": 0.0706, "theoretical_loss": 3.4691728881203927, "tokens_seen": 1929510912 }, { "epoch": 0.17, "learning_rate": 0.000419441546979058, "loss": 0.0696, "theoretical_loss": 3.4691532634977724, "tokens_seen": 1929641984 }, { "epoch": 0.17, "learning_rate": 0.0004194014282275536, "loss": 0.0709, "theoretical_loss": 3.469133640581335, "tokens_seen": 1929773056 }, { "epoch": 0.17, "learning_rate": 0.0004193613094760491, "loss": 0.07, "theoretical_loss": 3.469114019370816, "tokens_seen": 1929904128 }, { "epoch": 0.17, "learning_rate": 0.00041932119072454467, "loss": 0.0753, "theoretical_loss": 3.4690943998659516, "tokens_seen": 1930035200 }, { "epoch": 0.17, "learning_rate": 0.00041928107197304024, "loss": 0.0686, "theoretical_loss": 3.4690747820664782, "tokens_seen": 1930166272 }, { "epoch": 0.17, "learning_rate": 0.00041924095322153575, "loss": 0.0676, "theoretical_loss": 3.4690551659721307, "tokens_seen": 1930297344 }, { "epoch": 0.17, "learning_rate": 0.0004192008344700313, "loss": 0.0672, "theoretical_loss": 3.469035551582646, "tokens_seen": 1930428416 }, { "epoch": 0.17, "learning_rate": 0.0004191607157185269, "loss": 0.0705, "theoretical_loss": 3.4690159388977593, "tokens_seen": 1930559488 }, { "epoch": 0.17, "learning_rate": 0.00041912059696702235, "loss": 0.065, "theoretical_loss": 3.468996327917208, "tokens_seen": 1930690560 }, { "epoch": 0.17, "learning_rate": 0.0004190804782155179, "loss": 0.0672, "theoretical_loss": 3.4689767186407274, "tokens_seen": 1930821632 }, { "epoch": 0.17, "learning_rate": 0.0004190403594640135, "loss": 0.0685, "theoretical_loss": 3.468957111068054, "tokens_seen": 1930952704 }, { "epoch": 0.17, "learning_rate": 0.00041900024071250905, "loss": 0.0719, "theoretical_loss": 3.468937505198924, "tokens_seen": 1931083776 }, { "epoch": 0.17, "learning_rate": 0.00041896012196100457, "loss": 0.0735, "theoretical_loss": 3.468917901033074, "tokens_seen": 1931214848 }, { "epoch": 0.17, "learning_rate": 0.00041892000320950014, "loss": 0.0741, "theoretical_loss": 3.4688982985702403, "tokens_seen": 1931345920 }, { "epoch": 0.17, "learning_rate": 0.0004188798844579957, "loss": 0.0718, "theoretical_loss": 3.468878697810159, "tokens_seen": 1931476992 }, { "epoch": 0.17, "learning_rate": 0.0004188397657064912, "loss": 0.0686, "theoretical_loss": 3.468859098752568, "tokens_seen": 1931608064 }, { "epoch": 0.17, "objective/train/advantage_avg": 0.0004168991290498525, "objective/train/docs_used": 704289, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3093249797821045, "objective/train/original_loss": 1.309324860572815, "objective/train/theoretical_loss": 3.468839501397202, "objective/train/tokens_used": 302263776, "objective/train/value_avg": -0.007221221923828125, "objective/train/value_loss": 0.0002891523181460798, "objective/train/value_max": -5.066394805908203e-05, "objective/train/value_min": -0.67529296875, "objective/train/value_reward_corr": 0.7061764252204354, "objective/train/value_std": 0.017578125, "objective/train/weight_avg": 1.000549554824829, "objective/train/weighted_lm_loss": 1.3091713190078735, "objective/train/weights_max": 1.2622534036636353, "objective/train/weights_min": 0.3741576671600342, "theoretical_loss": 3.468839501397202, "tokens_seen": 1931739136 }, { "epoch": 0.17, "learning_rate": 0.0004187996469549868, "loss": 0.0674, "theoretical_loss": 3.468839501397202, "tokens_seen": 1931739136 }, { "epoch": 0.17, "learning_rate": 0.00041875952820348236, "loss": 0.0701, "theoretical_loss": 3.468819905743799, "tokens_seen": 1931870208 }, { "epoch": 0.17, "learning_rate": 0.0004187194094519778, "loss": 0.065, "theoretical_loss": 3.4688003117920956, "tokens_seen": 1932001280 }, { "epoch": 0.17, "learning_rate": 0.0004186792907004734, "loss": 0.0693, "theoretical_loss": 3.4687807195418285, "tokens_seen": 1932132352 }, { "epoch": 0.17, "learning_rate": 0.00041863917194896895, "loss": 0.0656, "theoretical_loss": 3.4687611289927336, "tokens_seen": 1932263424 }, { "epoch": 0.17, "learning_rate": 0.0004185990531974645, "loss": 0.068, "theoretical_loss": 3.468741540144549, "tokens_seen": 1932394496 }, { "epoch": 0.17, "learning_rate": 0.00041855893444596004, "loss": 0.073, "theoretical_loss": 3.468721952997012, "tokens_seen": 1932525568 }, { "epoch": 0.17, "learning_rate": 0.0004185188156944556, "loss": 0.0676, "theoretical_loss": 3.4687023675498585, "tokens_seen": 1932656640 }, { "epoch": 0.17, "learning_rate": 0.00041847869694295117, "loss": 0.0706, "theoretical_loss": 3.4686827838028256, "tokens_seen": 1932787712 }, { "epoch": 0.17, "learning_rate": 0.0004184385781914467, "loss": 0.0702, "theoretical_loss": 3.4686632017556516, "tokens_seen": 1932918784 }, { "epoch": 0.17, "learning_rate": 0.00041839845943994226, "loss": 0.07, "theoretical_loss": 3.4686436214080727, "tokens_seen": 1933049856 }, { "epoch": 0.17, "learning_rate": 0.0004183583406884378, "loss": 0.0702, "theoretical_loss": 3.468624042759826, "tokens_seen": 1933180928 }, { "epoch": 0.17, "learning_rate": 0.0004183182219369333, "loss": 0.0695, "theoretical_loss": 3.4686044658106496, "tokens_seen": 1933312000 }, { "epoch": 0.17, "learning_rate": 0.00041827810318542885, "loss": 0.0714, "theoretical_loss": 3.46858489056028, "tokens_seen": 1933443072 }, { "epoch": 0.17, "learning_rate": 0.0004182379844339244, "loss": 0.0708, "theoretical_loss": 3.468565317008456, "tokens_seen": 1933574144 }, { "epoch": 0.17, "learning_rate": 0.00041819786568242, "loss": 0.0708, "theoretical_loss": 3.4685457451549135, "tokens_seen": 1933705216 }, { "epoch": 0.17, "learning_rate": 0.0004181577469309155, "loss": 0.0709, "theoretical_loss": 3.4685261749993908, "tokens_seen": 1933836288 }, { "epoch": 0.17, "learning_rate": 0.00041811762817941107, "loss": 0.0723, "theoretical_loss": 3.468506606541626, "tokens_seen": 1933967360 }, { "epoch": 0.17, "learning_rate": 0.00041807750942790664, "loss": 0.0731, "theoretical_loss": 3.4684870397813556, "tokens_seen": 1934098432 }, { "epoch": 0.17, "learning_rate": 0.00041803739067640215, "loss": 0.0672, "theoretical_loss": 3.4684674747183184, "tokens_seen": 1934229504 }, { "epoch": 0.17, "learning_rate": 0.0004179972719248977, "loss": 0.068, "theoretical_loss": 3.4684479113522517, "tokens_seen": 1934360576 }, { "epoch": 0.17, "learning_rate": 0.0004179571531733933, "loss": 0.0689, "theoretical_loss": 3.4684283496828936, "tokens_seen": 1934491648 }, { "epoch": 0.17, "learning_rate": 0.00041791703442188875, "loss": 0.0751, "theoretical_loss": 3.4684087897099816, "tokens_seen": 1934622720 }, { "epoch": 0.17, "learning_rate": 0.0004178769156703843, "loss": 0.069, "theoretical_loss": 3.468389231433254, "tokens_seen": 1934753792 }, { "epoch": 0.17, "learning_rate": 0.0004178367969188799, "loss": 0.0685, "theoretical_loss": 3.4683696748524486, "tokens_seen": 1934884864 }, { "epoch": 0.17, "objective/train/advantage_avg": 0.0003968069504480809, "objective/train/docs_used": 705622, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3316153287887573, "objective/train/original_loss": 1.3316152095794678, "objective/train/theoretical_loss": 3.4683501199673037, "objective/train/tokens_used": 305540576, "objective/train/value_avg": -0.00832366943359375, "objective/train/value_loss": 0.0002805917465593666, "objective/train/value_max": -3.24249267578125e-05, "objective/train/value_min": -0.98486328125, "objective/train/value_reward_corr": 0.8418879026764973, "objective/train/value_std": 0.027252197265625, "objective/train/weight_avg": 1.0005205869674683, "objective/train/weighted_lm_loss": 1.3321831226348877, "objective/train/weights_max": 1.4851784706115723, "objective/train/weights_min": 0.3687315881252289, "theoretical_loss": 3.4683501199673037, "tokens_seen": 1935015936 }, { "epoch": 0.17, "learning_rate": 0.00041779667816737546, "loss": 0.0697, "theoretical_loss": 3.4683501199673037, "tokens_seen": 1935015936 }, { "epoch": 0.17, "learning_rate": 0.00041775655941587097, "loss": 0.0693, "theoretical_loss": 3.468330566777557, "tokens_seen": 1935147008 }, { "epoch": 0.17, "learning_rate": 0.00041771644066436654, "loss": 0.0677, "theoretical_loss": 3.468311015282947, "tokens_seen": 1935278080 }, { "epoch": 0.17, "learning_rate": 0.0004176763219128621, "loss": 0.0671, "theoretical_loss": 3.4682914654832118, "tokens_seen": 1935409152 }, { "epoch": 0.17, "learning_rate": 0.0004176362031613576, "loss": 0.0712, "theoretical_loss": 3.4682719173780905, "tokens_seen": 1935540224 }, { "epoch": 0.17, "learning_rate": 0.0004175960844098532, "loss": 0.0681, "theoretical_loss": 3.46825237096732, "tokens_seen": 1935671296 }, { "epoch": 0.17, "learning_rate": 0.00041755596565834876, "loss": 0.0669, "theoretical_loss": 3.46823282625064, "tokens_seen": 1935802368 }, { "epoch": 0.17, "learning_rate": 0.0004175158469068442, "loss": 0.0685, "theoretical_loss": 3.4682132832277883, "tokens_seen": 1935933440 }, { "epoch": 0.17, "learning_rate": 0.0004174757281553398, "loss": 0.0709, "theoretical_loss": 3.468193741898504, "tokens_seen": 1936064512 }, { "epoch": 0.17, "learning_rate": 0.00041743560940383535, "loss": 0.0666, "theoretical_loss": 3.4681742022625253, "tokens_seen": 1936195584 }, { "epoch": 0.17, "learning_rate": 0.0004173954906523309, "loss": 0.0675, "theoretical_loss": 3.4681546643195906, "tokens_seen": 1936326656 }, { "epoch": 0.17, "learning_rate": 0.00041735537190082644, "loss": 0.0702, "theoretical_loss": 3.468135128069439, "tokens_seen": 1936457728 }, { "epoch": 0.17, "learning_rate": 0.000417315253149322, "loss": 0.069, "theoretical_loss": 3.4681155935118095, "tokens_seen": 1936588800 }, { "epoch": 0.17, "learning_rate": 0.0004172751343978176, "loss": 0.0691, "theoretical_loss": 3.4680960606464404, "tokens_seen": 1936719872 }, { "epoch": 0.17, "learning_rate": 0.0004172350156463131, "loss": 0.07, "theoretical_loss": 3.4680765294730707, "tokens_seen": 1936850944 }, { "epoch": 0.17, "learning_rate": 0.00041719489689480866, "loss": 0.0657, "theoretical_loss": 3.4680569999914397, "tokens_seen": 1936982016 }, { "epoch": 0.17, "learning_rate": 0.0004171547781433042, "loss": 0.0701, "theoretical_loss": 3.468037472201286, "tokens_seen": 1937113088 }, { "epoch": 0.17, "learning_rate": 0.0004171146593917997, "loss": 0.0713, "theoretical_loss": 3.468017946102349, "tokens_seen": 1937244160 }, { "epoch": 0.17, "learning_rate": 0.00041707454064029525, "loss": 0.071, "theoretical_loss": 3.467998421694367, "tokens_seen": 1937375232 }, { "epoch": 0.17, "learning_rate": 0.0004170344218887908, "loss": 0.0689, "theoretical_loss": 3.467978898977081, "tokens_seen": 1937506304 }, { "epoch": 0.17, "learning_rate": 0.0004169943031372864, "loss": 0.0714, "theoretical_loss": 3.467959377950228, "tokens_seen": 1937637376 }, { "epoch": 0.17, "learning_rate": 0.0004169541843857819, "loss": 0.0674, "theoretical_loss": 3.4679398586135486, "tokens_seen": 1937768448 }, { "epoch": 0.17, "learning_rate": 0.00041691406563427747, "loss": 0.0698, "theoretical_loss": 3.4679203409667823, "tokens_seen": 1937899520 }, { "epoch": 0.17, "learning_rate": 0.00041687394688277304, "loss": 0.0729, "theoretical_loss": 3.467900825009668, "tokens_seen": 1938030592 }, { "epoch": 0.17, "learning_rate": 0.00041683382813126856, "loss": 0.0693, "theoretical_loss": 3.467881310741945, "tokens_seen": 1938161664 }, { "epoch": 0.17, "objective/train/advantage_avg": 0.0007179125677794218, "objective/train/docs_used": 706849, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4022886753082275, "objective/train/original_loss": 1.4022884368896484, "objective/train/theoretical_loss": 3.4678617981633533, "objective/train/tokens_used": 308817376, "objective/train/value_avg": -0.006259918212890625, "objective/train/value_loss": 0.00016032786516007036, "objective/train/value_max": -3.737211227416992e-05, "objective/train/value_min": -0.26708984375, "objective/train/value_reward_corr": 0.6602139385635815, "objective/train/value_std": 0.0125732421875, "objective/train/weight_avg": 1.0007935762405396, "objective/train/weighted_lm_loss": 1.4033867120742798, "objective/train/weights_max": 1.2137562036514282, "objective/train/weights_min": 0.3708581030368805, "theoretical_loss": 3.4678617981633533, "tokens_seen": 1938292736 }, { "epoch": 0.17, "learning_rate": 0.0004167937093797641, "loss": 0.0679, "theoretical_loss": 3.4678617981633533, "tokens_seen": 1938292736 }, { "epoch": 0.17, "learning_rate": 0.0004167535906282597, "loss": 0.0709, "theoretical_loss": 3.467842287273632, "tokens_seen": 1938423808 }, { "epoch": 0.17, "learning_rate": 0.0004167134718767552, "loss": 0.0639, "theoretical_loss": 3.467822778072521, "tokens_seen": 1938554880 }, { "epoch": 0.17, "learning_rate": 0.0004166733531252507, "loss": 0.0664, "theoretical_loss": 3.4678032705597603, "tokens_seen": 1938685952 }, { "epoch": 0.18, "learning_rate": 0.0004166332343737463, "loss": 0.0717, "theoretical_loss": 3.4677837647350898, "tokens_seen": 1938817024 }, { "epoch": 0.18, "learning_rate": 0.00041659311562224186, "loss": 0.0669, "theoretical_loss": 3.4677642605982486, "tokens_seen": 1938948096 }, { "epoch": 0.18, "learning_rate": 0.00041655299687073737, "loss": 0.0699, "theoretical_loss": 3.467744758148976, "tokens_seen": 1939079168 }, { "epoch": 0.18, "learning_rate": 0.00041651287811923294, "loss": 0.0694, "theoretical_loss": 3.467725257387014, "tokens_seen": 1939210240 }, { "epoch": 0.18, "learning_rate": 0.0004164727593677285, "loss": 0.0729, "theoretical_loss": 3.4677057583121007, "tokens_seen": 1939341312 }, { "epoch": 0.18, "learning_rate": 0.000416432640616224, "loss": 0.0721, "theoretical_loss": 3.467686260923977, "tokens_seen": 1939472384 }, { "epoch": 0.18, "learning_rate": 0.0004163925218647196, "loss": 0.0732, "theoretical_loss": 3.467666765222383, "tokens_seen": 1939603456 }, { "epoch": 0.18, "learning_rate": 0.00041635240311321516, "loss": 0.0708, "theoretical_loss": 3.467647271207059, "tokens_seen": 1939734528 }, { "epoch": 0.18, "learning_rate": 0.0004163122843617107, "loss": 0.0727, "theoretical_loss": 3.4676277788777443, "tokens_seen": 1939865600 }, { "epoch": 0.18, "learning_rate": 0.0004162721656102062, "loss": 0.0673, "theoretical_loss": 3.46760828823418, "tokens_seen": 1939996672 }, { "epoch": 0.18, "learning_rate": 0.00041623204685870176, "loss": 0.0699, "theoretical_loss": 3.467588799276106, "tokens_seen": 1940127744 }, { "epoch": 0.18, "learning_rate": 0.0004161919281071973, "loss": 0.0684, "theoretical_loss": 3.4675693120032633, "tokens_seen": 1940258816 }, { "epoch": 0.18, "learning_rate": 0.00041615180935569284, "loss": 0.0699, "theoretical_loss": 3.4675498264153912, "tokens_seen": 1940389888 }, { "epoch": 0.18, "learning_rate": 0.0004161116906041884, "loss": 0.0673, "theoretical_loss": 3.467530342512232, "tokens_seen": 1940520960 }, { "epoch": 0.18, "learning_rate": 0.000416071571852684, "loss": 0.073, "theoretical_loss": 3.4675108602935243, "tokens_seen": 1940652032 }, { "epoch": 0.18, "learning_rate": 0.0004160314531011795, "loss": 0.0728, "theoretical_loss": 3.46749137975901, "tokens_seen": 1940783104 }, { "epoch": 0.18, "learning_rate": 0.00041599133434967506, "loss": 0.0697, "theoretical_loss": 3.4674719009084294, "tokens_seen": 1940914176 }, { "epoch": 0.18, "learning_rate": 0.0004159512155981706, "loss": 0.0727, "theoretical_loss": 3.4674524237415234, "tokens_seen": 1941045248 }, { "epoch": 0.18, "learning_rate": 0.00041591109684666614, "loss": 0.0705, "theoretical_loss": 3.4674329482580326, "tokens_seen": 1941176320 }, { "epoch": 0.18, "learning_rate": 0.00041587097809516165, "loss": 0.0718, "theoretical_loss": 3.4674134744576977, "tokens_seen": 1941307392 }, { "epoch": 0.18, "learning_rate": 0.0004158308593436572, "loss": 0.0703, "theoretical_loss": 3.46739400234026, "tokens_seen": 1941438464 }, { "epoch": 0.18, "objective/train/advantage_avg": -0.0002739332849159837, "objective/train/docs_used": 708064, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4311261177062988, "objective/train/original_loss": 1.4311261177062988, "objective/train/theoretical_loss": 3.46737453190546, "objective/train/tokens_used": 312094176, "objective/train/value_avg": -0.007617950439453125, "objective/train/value_loss": 0.0003204289823770523, "objective/train/value_max": -4.756450653076172e-05, "objective/train/value_min": -0.86376953125, "objective/train/value_reward_corr": 0.6488725572515892, "objective/train/value_std": 0.016632080078125, "objective/train/weight_avg": 0.9998761415481567, "objective/train/weighted_lm_loss": 1.4302130937576294, "objective/train/weights_max": 2.093358039855957, "objective/train/weights_min": 0.3705187439918518, "theoretical_loss": 3.46737453190546, "tokens_seen": 1941569536 }, { "epoch": 0.18, "learning_rate": 0.0004157907405921528, "loss": 0.0696, "theoretical_loss": 3.46737453190546, "tokens_seen": 1941569536 }, { "epoch": 0.18, "learning_rate": 0.0004157506218406483, "loss": 0.0701, "theoretical_loss": 3.467355063153039, "tokens_seen": 1941700608 }, { "epoch": 0.18, "learning_rate": 0.0004157105030891439, "loss": 0.0677, "theoretical_loss": 3.467335596082738, "tokens_seen": 1941831680 }, { "epoch": 0.18, "learning_rate": 0.00041567038433763944, "loss": 0.073, "theoretical_loss": 3.467316130694299, "tokens_seen": 1941962752 }, { "epoch": 0.18, "learning_rate": 0.00041563026558613496, "loss": 0.0674, "theoretical_loss": 3.467296666987462, "tokens_seen": 1942093824 }, { "epoch": 0.18, "learning_rate": 0.0004155901468346305, "loss": 0.0752, "theoretical_loss": 3.467277204961968, "tokens_seen": 1942224896 }, { "epoch": 0.18, "learning_rate": 0.0004155500280831261, "loss": 0.0719, "theoretical_loss": 3.4672577446175596, "tokens_seen": 1942355968 }, { "epoch": 0.18, "learning_rate": 0.0004155099093316216, "loss": 0.0703, "theoretical_loss": 3.467238285953977, "tokens_seen": 1942487040 }, { "epoch": 0.18, "learning_rate": 0.0004154697905801171, "loss": 0.0669, "theoretical_loss": 3.4672188289709625, "tokens_seen": 1942618112 }, { "epoch": 0.18, "learning_rate": 0.0004154296718286127, "loss": 0.0721, "theoretical_loss": 3.467199373668257, "tokens_seen": 1942749184 }, { "epoch": 0.18, "learning_rate": 0.00041538955307710826, "loss": 0.0659, "theoretical_loss": 3.467179920045602, "tokens_seen": 1942880256 }, { "epoch": 0.18, "learning_rate": 0.00041534943432560377, "loss": 0.0692, "theoretical_loss": 3.4671604681027404, "tokens_seen": 1943011328 }, { "epoch": 0.18, "learning_rate": 0.00041530931557409934, "loss": 0.0689, "theoretical_loss": 3.4671410178394115, "tokens_seen": 1943142400 }, { "epoch": 0.18, "learning_rate": 0.0004152691968225949, "loss": 0.0675, "theoretical_loss": 3.467121569255359, "tokens_seen": 1943273472 }, { "epoch": 0.18, "learning_rate": 0.0004152290780710904, "loss": 0.073, "theoretical_loss": 3.4671021223503242, "tokens_seen": 1943404544 }, { "epoch": 0.18, "learning_rate": 0.000415188959319586, "loss": 0.0727, "theoretical_loss": 3.4670826771240484, "tokens_seen": 1943535616 }, { "epoch": 0.18, "learning_rate": 0.00041514884056808156, "loss": 0.0682, "theoretical_loss": 3.467063233576274, "tokens_seen": 1943666688 }, { "epoch": 0.18, "learning_rate": 0.0004151087218165771, "loss": 0.0678, "theoretical_loss": 3.4670437917067423, "tokens_seen": 1943797760 }, { "epoch": 0.18, "learning_rate": 0.0004150686030650726, "loss": 0.0709, "theoretical_loss": 3.467024351515196, "tokens_seen": 1943928832 }, { "epoch": 0.18, "learning_rate": 0.00041502848431356816, "loss": 0.0711, "theoretical_loss": 3.4670049130013765, "tokens_seen": 1944059904 }, { "epoch": 0.18, "learning_rate": 0.0004149883655620637, "loss": 0.0681, "theoretical_loss": 3.4669854761650267, "tokens_seen": 1944190976 }, { "epoch": 0.18, "learning_rate": 0.00041494824681055924, "loss": 0.0707, "theoretical_loss": 3.466966041005888, "tokens_seen": 1944322048 }, { "epoch": 0.18, "learning_rate": 0.0004149081280590548, "loss": 0.0681, "theoretical_loss": 3.466946607523703, "tokens_seen": 1944453120 }, { "epoch": 0.18, "learning_rate": 0.0004148680093075504, "loss": 0.067, "theoretical_loss": 3.4669271757182143, "tokens_seen": 1944584192 }, { "epoch": 0.18, "learning_rate": 0.0004148278905560459, "loss": 0.0696, "theoretical_loss": 3.4669077455891637, "tokens_seen": 1944715264 }, { "epoch": 0.18, "objective/train/advantage_avg": 0.0009508821531198919, "objective/train/docs_used": 709206, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.52418053150177, "objective/train/original_loss": 1.52418053150177, "objective/train/theoretical_loss": 3.466888317136293, "objective/train/tokens_used": 315370976, "objective/train/value_avg": -0.00888824462890625, "objective/train/value_loss": 0.00018279771029483527, "objective/train/value_max": -5.519390106201172e-05, "objective/train/value_min": -0.3271484375, "objective/train/value_reward_corr": 0.7343405348003489, "objective/train/value_std": 0.01568603515625, "objective/train/weight_avg": 1.0010364055633545, "objective/train/weighted_lm_loss": 1.5248446464538574, "objective/train/weights_max": 1.3163669109344482, "objective/train/weights_min": 0.3683597147464752, "theoretical_loss": 3.466888317136293, "tokens_seen": 1944846336 }, { "epoch": 0.18, "learning_rate": 0.00041478777180454146, "loss": 0.0702, "theoretical_loss": 3.466888317136293, "tokens_seen": 1944846336 }, { "epoch": 0.18, "learning_rate": 0.000414747653053037, "loss": 0.0684, "theoretical_loss": 3.466868890359346, "tokens_seen": 1944977408 }, { "epoch": 0.18, "learning_rate": 0.00041470753430153254, "loss": 0.0719, "theoretical_loss": 3.466849465258065, "tokens_seen": 1945108480 }, { "epoch": 0.18, "learning_rate": 0.00041466741555002806, "loss": 0.0685, "theoretical_loss": 3.466830041832192, "tokens_seen": 1945239552 }, { "epoch": 0.18, "learning_rate": 0.0004146272967985236, "loss": 0.0721, "theoretical_loss": 3.4668106200814695, "tokens_seen": 1945370624 }, { "epoch": 0.18, "learning_rate": 0.0004145871780470192, "loss": 0.0676, "theoretical_loss": 3.4667912000056402, "tokens_seen": 1945501696 }, { "epoch": 0.18, "learning_rate": 0.0004145470592955147, "loss": 0.0702, "theoretical_loss": 3.466771781604448, "tokens_seen": 1945632768 }, { "epoch": 0.18, "learning_rate": 0.0004145069405440103, "loss": 0.0703, "theoretical_loss": 3.4667523648776344, "tokens_seen": 1945763840 }, { "epoch": 0.18, "learning_rate": 0.00041446682179250584, "loss": 0.0684, "theoretical_loss": 3.4667329498249426, "tokens_seen": 1945894912 }, { "epoch": 0.18, "learning_rate": 0.0004144267030410014, "loss": 0.0682, "theoretical_loss": 3.4667135364461155, "tokens_seen": 1946025984 }, { "epoch": 0.18, "learning_rate": 0.0004143865842894969, "loss": 0.0713, "theoretical_loss": 3.466694124740896, "tokens_seen": 1946157056 }, { "epoch": 0.18, "learning_rate": 0.0004143464655379925, "loss": 0.0684, "theoretical_loss": 3.4666747147090273, "tokens_seen": 1946288128 }, { "epoch": 0.18, "learning_rate": 0.000414306346786488, "loss": 0.0682, "theoretical_loss": 3.466655306350253, "tokens_seen": 1946419200 }, { "epoch": 0.18, "learning_rate": 0.0004142662280349835, "loss": 0.0739, "theoretical_loss": 3.4666358996643147, "tokens_seen": 1946550272 }, { "epoch": 0.18, "learning_rate": 0.0004142261092834791, "loss": 0.0709, "theoretical_loss": 3.4666164946509572, "tokens_seen": 1946681344 }, { "epoch": 0.18, "learning_rate": 0.00041418599053197466, "loss": 0.0713, "theoretical_loss": 3.466597091309923, "tokens_seen": 1946812416 }, { "epoch": 0.18, "learning_rate": 0.0004141458717804702, "loss": 0.0721, "theoretical_loss": 3.466577689640955, "tokens_seen": 1946943488 }, { "epoch": 0.18, "learning_rate": 0.00041410575302896574, "loss": 0.0696, "theoretical_loss": 3.466558289643798, "tokens_seen": 1947074560 }, { "epoch": 0.18, "learning_rate": 0.0004140656342774613, "loss": 0.0703, "theoretical_loss": 3.4665388913181934, "tokens_seen": 1947205632 }, { "epoch": 0.18, "learning_rate": 0.0004140255155259569, "loss": 0.0688, "theoretical_loss": 3.466519494663886, "tokens_seen": 1947336704 }, { "epoch": 0.18, "learning_rate": 0.0004139853967744524, "loss": 0.0673, "theoretical_loss": 3.4665000996806192, "tokens_seen": 1947467776 }, { "epoch": 0.18, "learning_rate": 0.00041394527802294796, "loss": 0.0731, "theoretical_loss": 3.466480706368137, "tokens_seen": 1947598848 }, { "epoch": 0.18, "learning_rate": 0.0004139051592714435, "loss": 0.0702, "theoretical_loss": 3.466461314726182, "tokens_seen": 1947729920 }, { "epoch": 0.18, "learning_rate": 0.000413865040519939, "loss": 0.0724, "theoretical_loss": 3.466441924754498, "tokens_seen": 1947860992 }, { "epoch": 0.18, "learning_rate": 0.00041382492176843456, "loss": 0.0724, "theoretical_loss": 3.4664225364528294, "tokens_seen": 1947992064 }, { "epoch": 0.18, "objective/train/advantage_avg": 0.0008800983196124434, "objective/train/docs_used": 710405, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3018213510513306, "objective/train/original_loss": 1.3018211126327515, "objective/train/theoretical_loss": 3.46640314982092, "objective/train/tokens_used": 318647776, "objective/train/value_avg": -0.0081787109375, "objective/train/value_loss": 0.0002484224969521165, "objective/train/value_max": -4.297494888305664e-05, "objective/train/value_min": -0.98388671875, "objective/train/value_reward_corr": 0.7326059060347823, "objective/train/value_std": 0.015777587890625, "objective/train/weight_avg": 1.0009875297546387, "objective/train/weighted_lm_loss": 1.3027188777923584, "objective/train/weights_max": 1.2370983362197876, "objective/train/weights_min": 0.37599483132362366, "theoretical_loss": 3.46640314982092, "tokens_seen": 1948123136 }, { "epoch": 0.18, "learning_rate": 0.0004137848030169301, "loss": 0.0715, "theoretical_loss": 3.46640314982092, "tokens_seen": 1948123136 }, { "epoch": 0.18, "learning_rate": 0.00041374468426542564, "loss": 0.0687, "theoretical_loss": 3.466383764858513, "tokens_seen": 1948254208 }, { "epoch": 0.18, "learning_rate": 0.0004137045655139212, "loss": 0.0694, "theoretical_loss": 3.466364381565353, "tokens_seen": 1948385280 }, { "epoch": 0.18, "learning_rate": 0.0004136644467624168, "loss": 0.0699, "theoretical_loss": 3.466344999941184, "tokens_seen": 1948516352 }, { "epoch": 0.18, "learning_rate": 0.00041362432801091235, "loss": 0.0671, "theoretical_loss": 3.4663256199857493, "tokens_seen": 1948647424 }, { "epoch": 0.18, "learning_rate": 0.00041358420925940786, "loss": 0.0708, "theoretical_loss": 3.4663062416987938, "tokens_seen": 1948778496 }, { "epoch": 0.18, "learning_rate": 0.00041354409050790343, "loss": 0.068, "theoretical_loss": 3.466286865080061, "tokens_seen": 1948909568 }, { "epoch": 0.18, "learning_rate": 0.00041350397175639894, "loss": 0.0732, "theoretical_loss": 3.4662674901292956, "tokens_seen": 1949040640 }, { "epoch": 0.18, "learning_rate": 0.00041346385300489446, "loss": 0.0714, "theoretical_loss": 3.466248116846242, "tokens_seen": 1949171712 }, { "epoch": 0.18, "learning_rate": 0.00041342373425339, "loss": 0.0686, "theoretical_loss": 3.4662287452306435, "tokens_seen": 1949302784 }, { "epoch": 0.18, "learning_rate": 0.0004133836155018856, "loss": 0.0727, "theoretical_loss": 3.4662093752822454, "tokens_seen": 1949433856 }, { "epoch": 0.18, "learning_rate": 0.0004133434967503811, "loss": 0.0678, "theoretical_loss": 3.466190007000792, "tokens_seen": 1949564928 }, { "epoch": 0.18, "learning_rate": 0.0004133033779988767, "loss": 0.0704, "theoretical_loss": 3.4661706403860277, "tokens_seen": 1949696000 }, { "epoch": 0.18, "learning_rate": 0.00041326325924737224, "loss": 0.07, "theoretical_loss": 3.466151275437697, "tokens_seen": 1949827072 }, { "epoch": 0.18, "learning_rate": 0.0004132231404958678, "loss": 0.0731, "theoretical_loss": 3.4661319121555447, "tokens_seen": 1949958144 }, { "epoch": 0.18, "learning_rate": 0.0004131830217443633, "loss": 0.0686, "theoretical_loss": 3.4661125505393153, "tokens_seen": 1950089216 }, { "epoch": 0.18, "learning_rate": 0.0004131429029928589, "loss": 0.0727, "theoretical_loss": 3.4660931905887535, "tokens_seen": 1950220288 }, { "epoch": 0.18, "learning_rate": 0.0004131027842413544, "loss": 0.07, "theoretical_loss": 3.4660738323036036, "tokens_seen": 1950351360 }, { "epoch": 0.18, "learning_rate": 0.0004130626654898499, "loss": 0.0717, "theoretical_loss": 3.4660544756836114, "tokens_seen": 1950482432 }, { "epoch": 0.18, "learning_rate": 0.0004130225467383455, "loss": 0.0708, "theoretical_loss": 3.466035120728521, "tokens_seen": 1950613504 }, { "epoch": 0.18, "learning_rate": 0.00041298242798684106, "loss": 0.0694, "theoretical_loss": 3.466015767438078, "tokens_seen": 1950744576 }, { "epoch": 0.18, "learning_rate": 0.0004129423092353366, "loss": 0.0679, "theoretical_loss": 3.465996415812027, "tokens_seen": 1950875648 }, { "epoch": 0.18, "learning_rate": 0.00041290219048383214, "loss": 0.0721, "theoretical_loss": 3.4659770658501126, "tokens_seen": 1951006720 }, { "epoch": 0.18, "learning_rate": 0.0004128620717323277, "loss": 0.0688, "theoretical_loss": 3.4659577175520813, "tokens_seen": 1951137792 }, { "epoch": 0.18, "learning_rate": 0.0004128219529808233, "loss": 0.0713, "theoretical_loss": 3.4659383709176765, "tokens_seen": 1951268864 }, { "epoch": 0.18, "objective/train/advantage_avg": 0.0013167118886485696, "objective/train/docs_used": 711575, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3343226909637451, "objective/train/original_loss": 1.3343226909637451, "objective/train/theoretical_loss": 3.4659190259466444, "objective/train/tokens_used": 321924576, "objective/train/value_avg": -0.0098876953125, "objective/train/value_loss": 0.0002301274216733873, "objective/train/value_max": -4.57763671875e-05, "objective/train/value_min": -0.376708984375, "objective/train/value_reward_corr": 0.7222252697704883, "objective/train/value_std": 0.016448974609375, "objective/train/weight_avg": 1.0014201402664185, "objective/train/weighted_lm_loss": 1.335819959640503, "objective/train/weights_max": 1.2038698196411133, "objective/train/weights_min": 0.37129127979278564, "theoretical_loss": 3.4659190259466444, "tokens_seen": 1951399936 }, { "epoch": 0.18, "learning_rate": 0.0004127818342293188, "loss": 0.0711, "theoretical_loss": 3.4659190259466444, "tokens_seen": 1951399936 }, { "epoch": 0.18, "learning_rate": 0.00041274171547781436, "loss": 0.0673, "theoretical_loss": 3.4658996826387303, "tokens_seen": 1951531008 }, { "epoch": 0.18, "learning_rate": 0.0004127015967263099, "loss": 0.0689, "theoretical_loss": 3.46588034099368, "tokens_seen": 1951662080 }, { "epoch": 0.18, "learning_rate": 0.0004126614779748054, "loss": 0.0705, "theoretical_loss": 3.465861001011237, "tokens_seen": 1951793152 }, { "epoch": 0.18, "learning_rate": 0.00041262135922330096, "loss": 0.0677, "theoretical_loss": 3.465841662691149, "tokens_seen": 1951924224 }, { "epoch": 0.18, "learning_rate": 0.00041258124047179653, "loss": 0.0694, "theoretical_loss": 3.4658223260331598, "tokens_seen": 1952055296 }, { "epoch": 0.18, "learning_rate": 0.00041254112172029204, "loss": 0.0689, "theoretical_loss": 3.465802991037016, "tokens_seen": 1952186368 }, { "epoch": 0.18, "learning_rate": 0.0004125010029687876, "loss": 0.0706, "theoretical_loss": 3.465783657702463, "tokens_seen": 1952317440 }, { "epoch": 0.18, "learning_rate": 0.0004124608842172832, "loss": 0.0716, "theoretical_loss": 3.4657643260292463, "tokens_seen": 1952448512 }, { "epoch": 0.18, "learning_rate": 0.00041242076546577875, "loss": 0.0696, "theoretical_loss": 3.465744996017112, "tokens_seen": 1952579584 }, { "epoch": 0.18, "learning_rate": 0.00041238064671427426, "loss": 0.073, "theoretical_loss": 3.4657256676658053, "tokens_seen": 1952710656 }, { "epoch": 0.18, "learning_rate": 0.00041234052796276983, "loss": 0.0732, "theoretical_loss": 3.465706340975072, "tokens_seen": 1952841728 }, { "epoch": 0.18, "learning_rate": 0.0004123004092112654, "loss": 0.0658, "theoretical_loss": 3.4656870159446584, "tokens_seen": 1952972800 }, { "epoch": 0.18, "learning_rate": 0.00041226029045976086, "loss": 0.0653, "theoretical_loss": 3.4656676925743106, "tokens_seen": 1953103872 }, { "epoch": 0.18, "learning_rate": 0.0004122201717082564, "loss": 0.0705, "theoretical_loss": 3.465648370863774, "tokens_seen": 1953234944 }, { "epoch": 0.18, "learning_rate": 0.000412180052956752, "loss": 0.0688, "theoretical_loss": 3.465629050812795, "tokens_seen": 1953366016 }, { "epoch": 0.18, "learning_rate": 0.0004121399342052475, "loss": 0.0764, "theoretical_loss": 3.46560973242112, "tokens_seen": 1953497088 }, { "epoch": 0.18, "learning_rate": 0.0004120998154537431, "loss": 0.0736, "theoretical_loss": 3.465590415688495, "tokens_seen": 1953628160 }, { "epoch": 0.18, "learning_rate": 0.00041205969670223865, "loss": 0.0698, "theoretical_loss": 3.4655711006146657, "tokens_seen": 1953759232 }, { "epoch": 0.18, "learning_rate": 0.0004120195779507342, "loss": 0.0703, "theoretical_loss": 3.465551787199379, "tokens_seen": 1953890304 }, { "epoch": 0.18, "learning_rate": 0.00041197945919922973, "loss": 0.0755, "theoretical_loss": 3.465532475442381, "tokens_seen": 1954021376 }, { "epoch": 0.18, "learning_rate": 0.0004119393404477253, "loss": 0.0733, "theoretical_loss": 3.4655131653434186, "tokens_seen": 1954152448 }, { "epoch": 0.18, "learning_rate": 0.00041189922169622086, "loss": 0.0704, "theoretical_loss": 3.4654938569022375, "tokens_seen": 1954283520 }, { "epoch": 0.18, "learning_rate": 0.0004118591029447163, "loss": 0.0649, "theoretical_loss": 3.465474550118584, "tokens_seen": 1954414592 }, { "epoch": 0.18, "learning_rate": 0.0004118189841932119, "loss": 0.0685, "theoretical_loss": 3.4654552449922056, "tokens_seen": 1954545664 }, { "epoch": 0.18, "objective/train/advantage_avg": 5.6474476878065616e-05, "objective/train/docs_used": 712850, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2776131629943848, "objective/train/original_loss": 1.2776132822036743, "objective/train/theoretical_loss": 3.4654359415228484, "objective/train/tokens_used": 325201376, "objective/train/value_avg": -0.0081634521484375, "objective/train/value_loss": 0.00017926660075318068, "objective/train/value_max": -4.3332576751708984e-05, "objective/train/value_min": -0.77587890625, "objective/train/value_reward_corr": 0.7149735383311777, "objective/train/value_std": 0.01556396484375, "objective/train/weight_avg": 1.000146508216858, "objective/train/weighted_lm_loss": 1.2777245044708252, "objective/train/weights_max": 1.915353775024414, "objective/train/weights_min": 0.5642915368080139, "theoretical_loss": 3.4654359415228484, "tokens_seen": 1954676736 }, { "epoch": 0.18, "learning_rate": 0.00041177886544170746, "loss": 0.0686, "theoretical_loss": 3.4654359415228484, "tokens_seen": 1954676736 }, { "epoch": 0.18, "learning_rate": 0.00041173874669020303, "loss": 0.0715, "theoretical_loss": 3.4654166397102593, "tokens_seen": 1954807808 }, { "epoch": 0.18, "learning_rate": 0.00041169862793869854, "loss": 0.069, "theoretical_loss": 3.4653973395541846, "tokens_seen": 1954938880 }, { "epoch": 0.18, "learning_rate": 0.0004116585091871941, "loss": 0.0735, "theoretical_loss": 3.4653780410543717, "tokens_seen": 1955069952 }, { "epoch": 0.19, "learning_rate": 0.0004116183904356897, "loss": 0.0694, "theoretical_loss": 3.465358744210567, "tokens_seen": 1955201024 }, { "epoch": 0.19, "learning_rate": 0.0004115782716841852, "loss": 0.067, "theoretical_loss": 3.465339449022517, "tokens_seen": 1955332096 }, { "epoch": 0.19, "learning_rate": 0.00041153815293268076, "loss": 0.0699, "theoretical_loss": 3.46532015548997, "tokens_seen": 1955463168 }, { "epoch": 0.19, "learning_rate": 0.00041149803418117633, "loss": 0.0685, "theoretical_loss": 3.4653008636126716, "tokens_seen": 1955594240 }, { "epoch": 0.19, "learning_rate": 0.0004114579154296718, "loss": 0.0712, "theoretical_loss": 3.46528157339037, "tokens_seen": 1955725312 }, { "epoch": 0.19, "learning_rate": 0.00041141779667816736, "loss": 0.0706, "theoretical_loss": 3.4652622848228107, "tokens_seen": 1955856384 }, { "epoch": 0.19, "learning_rate": 0.00041137767792666293, "loss": 0.0688, "theoretical_loss": 3.465242997909743, "tokens_seen": 1955987456 }, { "epoch": 0.19, "learning_rate": 0.0004113375591751585, "loss": 0.0706, "theoretical_loss": 3.4652237126509124, "tokens_seen": 1956118528 }, { "epoch": 0.19, "learning_rate": 0.000411297440423654, "loss": 0.072, "theoretical_loss": 3.465204429046067, "tokens_seen": 1956249600 }, { "epoch": 0.19, "learning_rate": 0.0004112573216721496, "loss": 0.0716, "theoretical_loss": 3.465185147094954, "tokens_seen": 1956380672 }, { "epoch": 0.19, "learning_rate": 0.00041121720292064515, "loss": 0.0693, "theoretical_loss": 3.465165866797321, "tokens_seen": 1956511744 }, { "epoch": 0.19, "learning_rate": 0.00041117708416914066, "loss": 0.07, "theoretical_loss": 3.465146588152915, "tokens_seen": 1956642816 }, { "epoch": 0.19, "learning_rate": 0.00041113696541763623, "loss": 0.0732, "theoretical_loss": 3.465127311161484, "tokens_seen": 1956773888 }, { "epoch": 0.19, "learning_rate": 0.0004110968466661318, "loss": 0.073, "theoretical_loss": 3.465108035822775, "tokens_seen": 1956904960 }, { "epoch": 0.19, "learning_rate": 0.00041105672791462726, "loss": 0.0687, "theoretical_loss": 3.465088762136536, "tokens_seen": 1957036032 }, { "epoch": 0.19, "learning_rate": 0.0004110166091631228, "loss": 0.0725, "theoretical_loss": 3.4650694901025147, "tokens_seen": 1957167104 }, { "epoch": 0.19, "learning_rate": 0.0004109764904116184, "loss": 0.0671, "theoretical_loss": 3.4650502197204585, "tokens_seen": 1957298176 }, { "epoch": 0.19, "learning_rate": 0.00041093637166011396, "loss": 0.071, "theoretical_loss": 3.4650309509901156, "tokens_seen": 1957429248 }, { "epoch": 0.19, "learning_rate": 0.0004108962529086095, "loss": 0.0688, "theoretical_loss": 3.465011683911234, "tokens_seen": 1957560320 }, { "epoch": 0.19, "learning_rate": 0.00041085613415710505, "loss": 0.0716, "theoretical_loss": 3.4649924184835603, "tokens_seen": 1957691392 }, { "epoch": 0.19, "learning_rate": 0.0004108160154056006, "loss": 0.0714, "theoretical_loss": 3.4649731547068443, "tokens_seen": 1957822464 }, { "epoch": 0.19, "objective/train/advantage_avg": -0.0002620279265101999, "objective/train/docs_used": 713999, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.399580717086792, "objective/train/original_loss": 1.399580717086792, "objective/train/theoretical_loss": 3.4649538925808328, "objective/train/tokens_used": 328478176, "objective/train/value_avg": -0.006084442138671875, "objective/train/value_loss": 0.00012024539319099858, "objective/train/value_max": -4.988908767700195e-05, "objective/train/value_min": -0.1719970703125, "objective/train/value_reward_corr": 0.6891423262521603, "objective/train/value_std": 0.00991058349609375, "objective/train/weight_avg": 0.9997968077659607, "objective/train/weighted_lm_loss": 1.3995217084884644, "objective/train/weights_max": 1.1876744031906128, "objective/train/weights_min": 0.6277732849121094, "theoretical_loss": 3.4649538925808328, "tokens_seen": 1957953536 }, { "epoch": 0.19, "learning_rate": 0.00041077589665409613, "loss": 0.0712, "theoretical_loss": 3.4649538925808328, "tokens_seen": 1957953536 }, { "epoch": 0.19, "learning_rate": 0.0004107357779025917, "loss": 0.0692, "theoretical_loss": 3.4649346321052743, "tokens_seen": 1958084608 }, { "epoch": 0.19, "learning_rate": 0.00041069565915108727, "loss": 0.0676, "theoretical_loss": 3.4649153732799167, "tokens_seen": 1958215680 }, { "epoch": 0.19, "learning_rate": 0.0004106555403995827, "loss": 0.0721, "theoretical_loss": 3.4648961161045078, "tokens_seen": 1958346752 }, { "epoch": 0.19, "learning_rate": 0.0004106154216480783, "loss": 0.0675, "theoretical_loss": 3.464876860578797, "tokens_seen": 1958477824 }, { "epoch": 0.19, "learning_rate": 0.00041057530289657386, "loss": 0.0723, "theoretical_loss": 3.4648576067025316, "tokens_seen": 1958608896 }, { "epoch": 0.19, "learning_rate": 0.00041053518414506943, "loss": 0.0694, "theoretical_loss": 3.46483835447546, "tokens_seen": 1958739968 }, { "epoch": 0.19, "learning_rate": 0.00041049506539356495, "loss": 0.0712, "theoretical_loss": 3.464819103897331, "tokens_seen": 1958871040 }, { "epoch": 0.19, "learning_rate": 0.0004104549466420605, "loss": 0.0731, "theoretical_loss": 3.464799854967893, "tokens_seen": 1959002112 }, { "epoch": 0.19, "learning_rate": 0.0004104148278905561, "loss": 0.0648, "theoretical_loss": 3.4647806076868943, "tokens_seen": 1959133184 }, { "epoch": 0.19, "learning_rate": 0.0004103747091390516, "loss": 0.0688, "theoretical_loss": 3.464761362054084, "tokens_seen": 1959264256 }, { "epoch": 0.19, "learning_rate": 0.00041033459038754716, "loss": 0.0683, "theoretical_loss": 3.4647421180692093, "tokens_seen": 1959395328 }, { "epoch": 0.19, "learning_rate": 0.00041029447163604273, "loss": 0.0712, "theoretical_loss": 3.4647228757320203, "tokens_seen": 1959526400 }, { "epoch": 0.19, "learning_rate": 0.0004102543528845382, "loss": 0.07, "theoretical_loss": 3.464703635042265, "tokens_seen": 1959657472 }, { "epoch": 0.19, "learning_rate": 0.00041021423413303376, "loss": 0.0703, "theoretical_loss": 3.4646843959996927, "tokens_seen": 1959788544 }, { "epoch": 0.19, "learning_rate": 0.00041017411538152933, "loss": 0.0694, "theoretical_loss": 3.4646651586040518, "tokens_seen": 1959919616 }, { "epoch": 0.19, "learning_rate": 0.0004101339966300249, "loss": 0.0677, "theoretical_loss": 3.464645922855091, "tokens_seen": 1960050688 }, { "epoch": 0.19, "learning_rate": 0.0004100938778785204, "loss": 0.065, "theoretical_loss": 3.46462668875256, "tokens_seen": 1960181760 }, { "epoch": 0.19, "learning_rate": 0.000410053759127016, "loss": 0.0684, "theoretical_loss": 3.464607456296207, "tokens_seen": 1960312832 }, { "epoch": 0.19, "learning_rate": 0.00041001364037551155, "loss": 0.0687, "theoretical_loss": 3.464588225485782, "tokens_seen": 1960443904 }, { "epoch": 0.19, "learning_rate": 0.00040997352162400706, "loss": 0.0723, "theoretical_loss": 3.464568996321033, "tokens_seen": 1960574976 }, { "epoch": 0.19, "learning_rate": 0.00040993340287250263, "loss": 0.0762, "theoretical_loss": 3.46454976880171, "tokens_seen": 1960706048 }, { "epoch": 0.19, "learning_rate": 0.0004098932841209982, "loss": 0.0727, "theoretical_loss": 3.4645305429275624, "tokens_seen": 1960837120 }, { "epoch": 0.19, "learning_rate": 0.00040985316536949366, "loss": 0.069, "theoretical_loss": 3.4645113186983383, "tokens_seen": 1960968192 }, { "epoch": 0.19, "learning_rate": 0.00040981304661798923, "loss": 0.0677, "theoretical_loss": 3.464492096113788, "tokens_seen": 1961099264 }, { "epoch": 0.19, "objective/train/advantage_avg": 0.0007064054952934384, "objective/train/docs_used": 715186, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3815683126449585, "objective/train/original_loss": 1.3815683126449585, "objective/train/theoretical_loss": 3.4644728751736604, "objective/train/tokens_used": 331754976, "objective/train/value_avg": -0.00772857666015625, "objective/train/value_loss": 0.00023964210413396358, "objective/train/value_max": -7.486343383789062e-05, "objective/train/value_min": -0.73828125, "objective/train/value_reward_corr": 0.7073029027278019, "objective/train/value_std": 0.0161590576171875, "objective/train/weight_avg": 1.000813603401184, "objective/train/weighted_lm_loss": 1.382520318031311, "objective/train/weights_max": 1.2781755924224854, "objective/train/weights_min": 0.3684791922569275, "theoretical_loss": 3.4644728751736604, "tokens_seen": 1961230336 }, { "epoch": 0.19, "learning_rate": 0.0004097729278664848, "loss": 0.0732, "theoretical_loss": 3.4644728751736604, "tokens_seen": 1961230336 }, { "epoch": 0.19, "learning_rate": 0.00040973280911498036, "loss": 0.0688, "theoretical_loss": 3.4644536558777057, "tokens_seen": 1961361408 }, { "epoch": 0.19, "learning_rate": 0.0004096926903634759, "loss": 0.0664, "theoretical_loss": 3.4644344382256724, "tokens_seen": 1961492480 }, { "epoch": 0.19, "learning_rate": 0.00040965257161197145, "loss": 0.071, "theoretical_loss": 3.4644152222173106, "tokens_seen": 1961623552 }, { "epoch": 0.19, "learning_rate": 0.000409612452860467, "loss": 0.0689, "theoretical_loss": 3.4643960078523697, "tokens_seen": 1961754624 }, { "epoch": 0.19, "learning_rate": 0.00040957233410896253, "loss": 0.0686, "theoretical_loss": 3.4643767951305997, "tokens_seen": 1961885696 }, { "epoch": 0.19, "learning_rate": 0.0004095322153574581, "loss": 0.0718, "theoretical_loss": 3.4643575840517498, "tokens_seen": 1962016768 }, { "epoch": 0.19, "learning_rate": 0.00040949209660595367, "loss": 0.0678, "theoretical_loss": 3.4643383746155703, "tokens_seen": 1962147840 }, { "epoch": 0.19, "learning_rate": 0.0004094519778544491, "loss": 0.0699, "theoretical_loss": 3.464319166821811, "tokens_seen": 1962278912 }, { "epoch": 0.19, "learning_rate": 0.0004094118591029447, "loss": 0.0727, "theoretical_loss": 3.464299960670221, "tokens_seen": 1962409984 }, { "epoch": 0.19, "learning_rate": 0.00040937174035144026, "loss": 0.0672, "theoretical_loss": 3.4642807561605515, "tokens_seen": 1962541056 }, { "epoch": 0.19, "learning_rate": 0.00040933162159993583, "loss": 0.0725, "theoretical_loss": 3.464261553292551, "tokens_seen": 1962672128 }, { "epoch": 0.19, "learning_rate": 0.00040929150284843135, "loss": 0.0693, "theoretical_loss": 3.464242352065971, "tokens_seen": 1962803200 }, { "epoch": 0.19, "learning_rate": 0.0004092513840969269, "loss": 0.0711, "theoretical_loss": 3.4642231524805607, "tokens_seen": 1962934272 }, { "epoch": 0.19, "learning_rate": 0.0004092112653454225, "loss": 0.0716, "theoretical_loss": 3.4642039545360706, "tokens_seen": 1963065344 }, { "epoch": 0.19, "learning_rate": 0.000409171146593918, "loss": 0.0711, "theoretical_loss": 3.4641847582322507, "tokens_seen": 1963196416 }, { "epoch": 0.19, "learning_rate": 0.00040913102784241357, "loss": 0.0714, "theoretical_loss": 3.464165563568851, "tokens_seen": 1963327488 }, { "epoch": 0.19, "learning_rate": 0.00040909090909090913, "loss": 0.0704, "theoretical_loss": 3.4641463705456226, "tokens_seen": 1963458560 }, { "epoch": 0.19, "learning_rate": 0.00040905079033940465, "loss": 0.0666, "theoretical_loss": 3.4641271791623156, "tokens_seen": 1963589632 }, { "epoch": 0.19, "learning_rate": 0.00040901067158790016, "loss": 0.0728, "theoretical_loss": 3.4641079894186797, "tokens_seen": 1963720704 }, { "epoch": 0.19, "learning_rate": 0.00040897055283639573, "loss": 0.0708, "theoretical_loss": 3.464088801314466, "tokens_seen": 1963851776 }, { "epoch": 0.19, "learning_rate": 0.0004089304340848913, "loss": 0.072, "theoretical_loss": 3.4640696148494254, "tokens_seen": 1963982848 }, { "epoch": 0.19, "learning_rate": 0.0004088903153333868, "loss": 0.0727, "theoretical_loss": 3.464050430023308, "tokens_seen": 1964113920 }, { "epoch": 0.19, "learning_rate": 0.0004088501965818824, "loss": 0.0702, "theoretical_loss": 3.464031246835864, "tokens_seen": 1964244992 }, { "epoch": 0.19, "learning_rate": 0.00040881007783037795, "loss": 0.0688, "theoretical_loss": 3.4640120652868456, "tokens_seen": 1964376064 }, { "epoch": 0.19, "objective/train/advantage_avg": 0.00025509079569019377, "objective/train/docs_used": 716326, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4875242710113525, "objective/train/original_loss": 1.4875242710113525, "objective/train/theoretical_loss": 3.4639928853760016, "objective/train/tokens_used": 335031776, "objective/train/value_avg": -0.005352020263671875, "objective/train/value_loss": 0.00017749890685081482, "objective/train/value_max": -2.390146255493164e-05, "objective/train/value_min": -0.29296875, "objective/train/value_reward_corr": 0.5379912844237303, "objective/train/value_std": 0.009033203125, "objective/train/weight_avg": 1.0003316402435303, "objective/train/weighted_lm_loss": 1.4882609844207764, "objective/train/weights_max": 1.1044021844863892, "objective/train/weights_min": 0.3686957061290741, "theoretical_loss": 3.4639928853760016, "tokens_seen": 1964507136 }, { "epoch": 0.19, "learning_rate": 0.00040876995907887346, "loss": 0.0702, "theoretical_loss": 3.4639928853760016, "tokens_seen": 1964507136 }, { "epoch": 0.19, "learning_rate": 0.00040872984032736903, "loss": 0.0722, "theoretical_loss": 3.4639737071030843, "tokens_seen": 1964638208 }, { "epoch": 0.19, "learning_rate": 0.0004086897215758646, "loss": 0.0694, "theoretical_loss": 3.463954530467844, "tokens_seen": 1964769280 }, { "epoch": 0.19, "learning_rate": 0.0004086496028243601, "loss": 0.0702, "theoretical_loss": 3.4639353554700314, "tokens_seen": 1964900352 }, { "epoch": 0.19, "learning_rate": 0.00040860948407285563, "loss": 0.0697, "theoretical_loss": 3.463916182109398, "tokens_seen": 1965031424 }, { "epoch": 0.19, "learning_rate": 0.0004085693653213512, "loss": 0.0688, "theoretical_loss": 3.4638970103856943, "tokens_seen": 1965162496 }, { "epoch": 0.19, "learning_rate": 0.00040852924656984677, "loss": 0.0716, "theoretical_loss": 3.463877840298672, "tokens_seen": 1965293568 }, { "epoch": 0.19, "learning_rate": 0.0004084891278183423, "loss": 0.0702, "theoretical_loss": 3.4638586718480817, "tokens_seen": 1965424640 }, { "epoch": 0.19, "learning_rate": 0.00040844900906683785, "loss": 0.0677, "theoretical_loss": 3.4638395050336745, "tokens_seen": 1965555712 }, { "epoch": 0.19, "learning_rate": 0.0004084088903153334, "loss": 0.0672, "theoretical_loss": 3.4638203398552028, "tokens_seen": 1965686784 }, { "epoch": 0.19, "learning_rate": 0.00040836877156382893, "loss": 0.0704, "theoretical_loss": 3.4638011763124164, "tokens_seen": 1965817856 }, { "epoch": 0.19, "learning_rate": 0.0004083286528123245, "loss": 0.0685, "theoretical_loss": 3.4637820144050666, "tokens_seen": 1965948928 }, { "epoch": 0.19, "learning_rate": 0.00040828853406082007, "loss": 0.0724, "theoretical_loss": 3.4637628541329066, "tokens_seen": 1966080000 }, { "epoch": 0.19, "learning_rate": 0.0004082484153093156, "loss": 0.0696, "theoretical_loss": 3.463743695495686, "tokens_seen": 1966211072 }, { "epoch": 0.19, "learning_rate": 0.0004082082965578111, "loss": 0.0691, "theoretical_loss": 3.4637245384931576, "tokens_seen": 1966342144 }, { "epoch": 0.19, "learning_rate": 0.00040816817780630666, "loss": 0.0709, "theoretical_loss": 3.463705383125072, "tokens_seen": 1966473216 }, { "epoch": 0.19, "learning_rate": 0.00040812805905480223, "loss": 0.0705, "theoretical_loss": 3.4636862293911816, "tokens_seen": 1966604288 }, { "epoch": 0.19, "learning_rate": 0.00040808794030329775, "loss": 0.073, "theoretical_loss": 3.4636670772912375, "tokens_seen": 1966735360 }, { "epoch": 0.19, "learning_rate": 0.0004080478215517933, "loss": 0.0706, "theoretical_loss": 3.463647926824992, "tokens_seen": 1966866432 }, { "epoch": 0.19, "learning_rate": 0.0004080077028002889, "loss": 0.0693, "theoretical_loss": 3.463628777992196, "tokens_seen": 1966997504 }, { "epoch": 0.19, "learning_rate": 0.0004079675840487844, "loss": 0.0685, "theoretical_loss": 3.4636096307926016, "tokens_seen": 1967128576 }, { "epoch": 0.19, "learning_rate": 0.00040792746529727997, "loss": 0.0702, "theoretical_loss": 3.4635904852259616, "tokens_seen": 1967259648 }, { "epoch": 0.19, "learning_rate": 0.00040788734654577554, "loss": 0.0684, "theoretical_loss": 3.4635713412920275, "tokens_seen": 1967390720 }, { "epoch": 0.19, "learning_rate": 0.00040784722779427105, "loss": 0.0688, "theoretical_loss": 3.4635521989905507, "tokens_seen": 1967521792 }, { "epoch": 0.19, "learning_rate": 0.00040780710904276656, "loss": 0.0715, "theoretical_loss": 3.4635330583212838, "tokens_seen": 1967652864 }, { "epoch": 0.19, "objective/train/advantage_avg": 0.0005822113016620278, "objective/train/docs_used": 717661, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.365969181060791, "objective/train/original_loss": 1.365968942642212, "objective/train/theoretical_loss": 3.463513919283979, "objective/train/tokens_used": 338308576, "objective/train/value_avg": -0.01247406005859375, "objective/train/value_loss": 0.0006170320557430387, "objective/train/value_max": -7.426738739013672e-05, "objective/train/value_min": -0.84912109375, "objective/train/value_reward_corr": 0.745158269481441, "objective/train/value_std": 0.0272369384765625, "objective/train/weight_avg": 1.0008593797683716, "objective/train/weighted_lm_loss": 1.3662405014038086, "objective/train/weights_max": 1.776610016822815, "objective/train/weights_min": 0.38289615511894226, "theoretical_loss": 3.463513919283979, "tokens_seen": 1967783936 }, { "epoch": 0.19, "learning_rate": 0.00040776699029126213, "loss": 0.0704, "theoretical_loss": 3.463513919283979, "tokens_seen": 1967783936 }, { "epoch": 0.19, "learning_rate": 0.0004077268715397577, "loss": 0.0661, "theoretical_loss": 3.463494781878388, "tokens_seen": 1967915008 }, { "epoch": 0.19, "learning_rate": 0.0004076867527882532, "loss": 0.0744, "theoretical_loss": 3.4634756461042633, "tokens_seen": 1968046080 }, { "epoch": 0.19, "learning_rate": 0.0004076466340367488, "loss": 0.0703, "theoretical_loss": 3.4634565119613576, "tokens_seen": 1968177152 }, { "epoch": 0.19, "learning_rate": 0.00040760651528524435, "loss": 0.0689, "theoretical_loss": 3.4634373794494224, "tokens_seen": 1968308224 }, { "epoch": 0.19, "learning_rate": 0.00040756639653373987, "loss": 0.0674, "theoretical_loss": 3.463418248568211, "tokens_seen": 1968439296 }, { "epoch": 0.19, "learning_rate": 0.00040752627778223543, "loss": 0.0669, "theoretical_loss": 3.463399119317475, "tokens_seen": 1968570368 }, { "epoch": 0.19, "learning_rate": 0.000407486159030731, "loss": 0.067, "theoretical_loss": 3.463379991696967, "tokens_seen": 1968701440 }, { "epoch": 0.19, "learning_rate": 0.0004074460402792265, "loss": 0.0653, "theoretical_loss": 3.4633608657064396, "tokens_seen": 1968832512 }, { "epoch": 0.19, "learning_rate": 0.00040740592152772203, "loss": 0.0635, "theoretical_loss": 3.463341741345646, "tokens_seen": 1968963584 }, { "epoch": 0.19, "learning_rate": 0.0004073658027762176, "loss": 0.0695, "theoretical_loss": 3.463322618614338, "tokens_seen": 1969094656 }, { "epoch": 0.19, "learning_rate": 0.00040732568402471317, "loss": 0.067, "theoretical_loss": 3.4633034975122694, "tokens_seen": 1969225728 }, { "epoch": 0.19, "learning_rate": 0.0004072855652732087, "loss": 0.0678, "theoretical_loss": 3.4632843780391918, "tokens_seen": 1969356800 }, { "epoch": 0.19, "learning_rate": 0.00040724544652170425, "loss": 0.0718, "theoretical_loss": 3.4632652601948593, "tokens_seen": 1969487872 }, { "epoch": 0.19, "learning_rate": 0.0004072053277701998, "loss": 0.0687, "theoretical_loss": 3.463246143979023, "tokens_seen": 1969618944 }, { "epoch": 0.19, "learning_rate": 0.0004071652090186954, "loss": 0.0713, "theoretical_loss": 3.463227029391437, "tokens_seen": 1969750016 }, { "epoch": 0.19, "learning_rate": 0.0004071250902671909, "loss": 0.069, "theoretical_loss": 3.4632079164318545, "tokens_seen": 1969881088 }, { "epoch": 0.19, "learning_rate": 0.00040708497151568647, "loss": 0.0718, "theoretical_loss": 3.4631888051000272, "tokens_seen": 1970012160 }, { "epoch": 0.19, "learning_rate": 0.000407044852764182, "loss": 0.0698, "theoretical_loss": 3.4631696953957096, "tokens_seen": 1970143232 }, { "epoch": 0.19, "learning_rate": 0.0004070047340126775, "loss": 0.0702, "theoretical_loss": 3.4631505873186548, "tokens_seen": 1970274304 }, { "epoch": 0.19, "learning_rate": 0.00040696461526117307, "loss": 0.0675, "theoretical_loss": 3.463131480868615, "tokens_seen": 1970405376 }, { "epoch": 0.19, "learning_rate": 0.00040692449650966863, "loss": 0.07, "theoretical_loss": 3.463112376045344, "tokens_seen": 1970536448 }, { "epoch": 0.19, "learning_rate": 0.00040688437775816415, "loss": 0.0711, "theoretical_loss": 3.4630932728485955, "tokens_seen": 1970667520 }, { "epoch": 0.19, "learning_rate": 0.0004068442590066597, "loss": 0.0715, "theoretical_loss": 3.463074171278122, "tokens_seen": 1970798592 }, { "epoch": 0.19, "learning_rate": 0.0004068041402551553, "loss": 0.0728, "theoretical_loss": 3.4630550713336774, "tokens_seen": 1970929664 }, { "epoch": 0.19, "objective/train/advantage_avg": 0.0004421295889187604, "objective/train/docs_used": 718798, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4797451496124268, "objective/train/original_loss": 1.4797451496124268, "objective/train/theoretical_loss": 3.4630359730150153, "objective/train/tokens_used": 341585376, "objective/train/value_avg": -0.005031585693359375, "objective/train/value_loss": 0.00014088333409745246, "objective/train/value_max": -7.486343383789062e-05, "objective/train/value_min": -0.27587890625, "objective/train/value_reward_corr": 0.644589989997411, "objective/train/value_std": 0.009521484375, "objective/train/weight_avg": 1.0005078315734863, "objective/train/weighted_lm_loss": 1.4798952341079712, "objective/train/weights_max": 1.2208958864212036, "objective/train/weights_min": 0.3735102415084839, "theoretical_loss": 3.4630359730150153, "tokens_seen": 1971060736 }, { "epoch": 0.19, "learning_rate": 0.00040676402150365085, "loss": 0.0708, "theoretical_loss": 3.4630359730150153, "tokens_seen": 1971060736 }, { "epoch": 0.19, "learning_rate": 0.00040672390275214637, "loss": 0.0718, "theoretical_loss": 3.4630168763218885, "tokens_seen": 1971191808 }, { "epoch": 0.19, "learning_rate": 0.00040668378400064194, "loss": 0.0701, "theoretical_loss": 3.4629977812540518, "tokens_seen": 1971322880 }, { "epoch": 0.19, "learning_rate": 0.00040664366524913745, "loss": 0.0703, "theoretical_loss": 3.4629786878112574, "tokens_seen": 1971453952 }, { "epoch": 0.19, "learning_rate": 0.00040660354649763296, "loss": 0.0698, "theoretical_loss": 3.46295959599326, "tokens_seen": 1971585024 }, { "epoch": 0.2, "learning_rate": 0.00040656342774612853, "loss": 0.0719, "theoretical_loss": 3.4629405057998133, "tokens_seen": 1971716096 }, { "epoch": 0.2, "learning_rate": 0.0004065233089946241, "loss": 0.0732, "theoretical_loss": 3.4629214172306706, "tokens_seen": 1971847168 }, { "epoch": 0.2, "learning_rate": 0.0004064831902431196, "loss": 0.0704, "theoretical_loss": 3.4629023302855857, "tokens_seen": 1971978240 }, { "epoch": 0.2, "learning_rate": 0.0004064430714916152, "loss": 0.0687, "theoretical_loss": 3.462883244964313, "tokens_seen": 1972109312 }, { "epoch": 0.2, "learning_rate": 0.00040640295274011075, "loss": 0.0701, "theoretical_loss": 3.462864161266606, "tokens_seen": 1972240384 }, { "epoch": 0.2, "learning_rate": 0.0004063628339886063, "loss": 0.0683, "theoretical_loss": 3.4628450791922187, "tokens_seen": 1972371456 }, { "epoch": 0.2, "learning_rate": 0.00040632271523710183, "loss": 0.0678, "theoretical_loss": 3.4628259987409056, "tokens_seen": 1972502528 }, { "epoch": 0.2, "learning_rate": 0.0004062825964855974, "loss": 0.0713, "theoretical_loss": 3.4628069199124205, "tokens_seen": 1972633600 }, { "epoch": 0.2, "learning_rate": 0.0004062424777340929, "loss": 0.072, "theoretical_loss": 3.462787842706518, "tokens_seen": 1972764672 }, { "epoch": 0.2, "learning_rate": 0.00040620235898258843, "loss": 0.0697, "theoretical_loss": 3.4627687671229515, "tokens_seen": 1972895744 }, { "epoch": 0.2, "learning_rate": 0.000406162240231084, "loss": 0.0665, "theoretical_loss": 3.4627496931614754, "tokens_seen": 1973026816 }, { "epoch": 0.2, "learning_rate": 0.00040612212147957957, "loss": 0.0692, "theoretical_loss": 3.462730620821845, "tokens_seen": 1973157888 }, { "epoch": 0.2, "learning_rate": 0.0004060820027280751, "loss": 0.0673, "theoretical_loss": 3.4627115501038133, "tokens_seen": 1973288960 }, { "epoch": 0.2, "learning_rate": 0.00040604188397657065, "loss": 0.0736, "theoretical_loss": 3.462692481007136, "tokens_seen": 1973420032 }, { "epoch": 0.2, "learning_rate": 0.0004060017652250662, "loss": 0.0725, "theoretical_loss": 3.4626734135315664, "tokens_seen": 1973551104 }, { "epoch": 0.2, "learning_rate": 0.0004059616464735618, "loss": 0.0732, "theoretical_loss": 3.46265434767686, "tokens_seen": 1973682176 }, { "epoch": 0.2, "learning_rate": 0.0004059215277220573, "loss": 0.0736, "theoretical_loss": 3.4626352834427707, "tokens_seen": 1973813248 }, { "epoch": 0.2, "learning_rate": 0.00040588140897055287, "loss": 0.0671, "theoretical_loss": 3.4626162208290534, "tokens_seen": 1973944320 }, { "epoch": 0.2, "learning_rate": 0.0004058412902190484, "loss": 0.0704, "theoretical_loss": 3.4625971598354632, "tokens_seen": 1974075392 }, { "epoch": 0.2, "learning_rate": 0.0004058011714675439, "loss": 0.072, "theoretical_loss": 3.462578100461754, "tokens_seen": 1974206464 }, { "epoch": 0.2, "objective/train/advantage_avg": 0.0009301617974415421, "objective/train/docs_used": 720061, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.300958275794983, "objective/train/original_loss": 1.3009580373764038, "objective/train/theoretical_loss": 3.4625590427076816, "objective/train/tokens_used": 344862176, "objective/train/value_avg": -0.00923919677734375, "objective/train/value_loss": 0.0001243773294845596, "objective/train/value_max": -8.887052536010742e-05, "objective/train/value_min": -0.283203125, "objective/train/value_reward_corr": 0.7798349534952553, "objective/train/value_std": 0.0142669677734375, "objective/train/weight_avg": 1.0009915828704834, "objective/train/weighted_lm_loss": 1.3019486665725708, "objective/train/weights_max": 1.2115427255630493, "objective/train/weights_min": 0.7184198498725891, "theoretical_loss": 3.4625590427076816, "tokens_seen": 1974337536 }, { "epoch": 0.2, "learning_rate": 0.00040576105271603947, "loss": 0.0684, "theoretical_loss": 3.4625590427076816, "tokens_seen": 1974337536 }, { "epoch": 0.2, "learning_rate": 0.00040572093396453504, "loss": 0.0687, "theoretical_loss": 3.4625399865730007, "tokens_seen": 1974468608 }, { "epoch": 0.2, "learning_rate": 0.00040568081521303055, "loss": 0.0694, "theoretical_loss": 3.462520932057465, "tokens_seen": 1974599680 }, { "epoch": 0.2, "learning_rate": 0.0004056406964615261, "loss": 0.0708, "theoretical_loss": 3.462501879160831, "tokens_seen": 1974730752 }, { "epoch": 0.2, "learning_rate": 0.0004056005777100217, "loss": 0.0679, "theoretical_loss": 3.4624828278828526, "tokens_seen": 1974861824 }, { "epoch": 0.2, "learning_rate": 0.00040556045895851725, "loss": 0.069, "theoretical_loss": 3.462463778223285, "tokens_seen": 1974992896 }, { "epoch": 0.2, "learning_rate": 0.00040552034020701277, "loss": 0.0681, "theoretical_loss": 3.462444730181884, "tokens_seen": 1975123968 }, { "epoch": 0.2, "learning_rate": 0.00040548022145550834, "loss": 0.0675, "theoretical_loss": 3.462425683758404, "tokens_seen": 1975255040 }, { "epoch": 0.2, "learning_rate": 0.00040544010270400385, "loss": 0.0697, "theoretical_loss": 3.4624066389526016, "tokens_seen": 1975386112 }, { "epoch": 0.2, "learning_rate": 0.00040539998395249937, "loss": 0.0685, "theoretical_loss": 3.4623875957642305, "tokens_seen": 1975517184 }, { "epoch": 0.2, "learning_rate": 0.00040535986520099493, "loss": 0.0677, "theoretical_loss": 3.4623685541930467, "tokens_seen": 1975648256 }, { "epoch": 0.2, "learning_rate": 0.0004053197464494905, "loss": 0.0694, "theoretical_loss": 3.462349514238805, "tokens_seen": 1975779328 }, { "epoch": 0.2, "learning_rate": 0.000405279627697986, "loss": 0.0697, "theoretical_loss": 3.462330475901262, "tokens_seen": 1975910400 }, { "epoch": 0.2, "learning_rate": 0.0004052395089464816, "loss": 0.0723, "theoretical_loss": 3.462311439180173, "tokens_seen": 1976041472 }, { "epoch": 0.2, "learning_rate": 0.00040519939019497715, "loss": 0.0698, "theoretical_loss": 3.462292404075292, "tokens_seen": 1976172544 }, { "epoch": 0.2, "learning_rate": 0.0004051592714434727, "loss": 0.0705, "theoretical_loss": 3.4622733705863764, "tokens_seen": 1976303616 }, { "epoch": 0.2, "learning_rate": 0.00040511915269196824, "loss": 0.0696, "theoretical_loss": 3.462254338713181, "tokens_seen": 1976434688 }, { "epoch": 0.2, "learning_rate": 0.0004050790339404638, "loss": 0.0682, "theoretical_loss": 3.462235308455462, "tokens_seen": 1976565760 }, { "epoch": 0.2, "learning_rate": 0.0004050389151889593, "loss": 0.0731, "theoretical_loss": 3.4622162798129743, "tokens_seen": 1976696832 }, { "epoch": 0.2, "learning_rate": 0.00040499879643745483, "loss": 0.0726, "theoretical_loss": 3.4621972527854745, "tokens_seen": 1976827904 }, { "epoch": 0.2, "learning_rate": 0.0004049586776859504, "loss": 0.0658, "theoretical_loss": 3.462178227372718, "tokens_seen": 1976958976 }, { "epoch": 0.2, "learning_rate": 0.00040491855893444597, "loss": 0.068, "theoretical_loss": 3.462159203574461, "tokens_seen": 1977090048 }, { "epoch": 0.2, "learning_rate": 0.0004048784401829415, "loss": 0.0693, "theoretical_loss": 3.4621401813904598, "tokens_seen": 1977221120 }, { "epoch": 0.2, "learning_rate": 0.00040483832143143705, "loss": 0.0703, "theoretical_loss": 3.46212116082047, "tokens_seen": 1977352192 }, { "epoch": 0.2, "learning_rate": 0.0004047982026799326, "loss": 0.0662, "theoretical_loss": 3.4621021418642473, "tokens_seen": 1977483264 }, { "debugging/Compilability": 1.0, "debugging/distinct-1-grams": 0.7831661346709227, "debugging/entropy-1-grams": 4.874902822236777, "debugging/length": 456.75, "debugging/num_segments": 8, "debugging/raw_token_scores_avg": 0.005691220983862877, "debugging/raw_token_scores_std": 0.016982102766633034, "debugging/score": 0.005996972870935459, "debugging/score_std": 0.0029591934987445866, "epoch": 0.2, "objective/train/advantage_avg": 0.0006603390793316066, "objective/train/docs_used": 721216, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3608952760696411, "objective/train/original_loss": 1.3608951568603516, "objective/train/theoretical_loss": 3.4620831245215484, "objective/train/tokens_used": 348138976, "objective/train/value_avg": -0.00634765625, "objective/train/value_loss": 0.00018245252431370318, "objective/train/value_max": -6.401538848876953e-05, "objective/train/value_min": -0.256591796875, "objective/train/value_reward_corr": 0.6072050890376157, "objective/train/value_std": 0.0100555419921875, "objective/train/weight_avg": 1.0007450580596924, "objective/train/weighted_lm_loss": 1.361603021621704, "objective/train/weights_max": 1.22624933719635, "objective/train/weights_min": 0.41920939087867737, "theoretical_loss": 3.4620831245215484, "tokens_seen": 1977614336 }, { "epoch": 0.2, "learning_rate": 0.0004047580839284282, "loss": 0.0673, "theoretical_loss": 3.4620831245215484, "tokens_seen": 1977614336 }, { "epoch": 0.2, "learning_rate": 0.0004047179651769237, "loss": 0.0695, "theoretical_loss": 3.46206410879213, "tokens_seen": 1977745408 }, { "epoch": 0.2, "learning_rate": 0.00040467784642541927, "loss": 0.0697, "theoretical_loss": 3.462045094675747, "tokens_seen": 1977876480 }, { "epoch": 0.2, "learning_rate": 0.00040463772767391484, "loss": 0.0674, "theoretical_loss": 3.462026082172156, "tokens_seen": 1978007552 }, { "epoch": 0.2, "learning_rate": 0.0004045976089224103, "loss": 0.0684, "theoretical_loss": 3.462007071281114, "tokens_seen": 1978138624 }, { "epoch": 0.2, "learning_rate": 0.00040455749017090587, "loss": 0.0695, "theoretical_loss": 3.4619880620023777, "tokens_seen": 1978269696 }, { "epoch": 0.2, "learning_rate": 0.00040451737141940144, "loss": 0.0689, "theoretical_loss": 3.461969054335703, "tokens_seen": 1978400768 }, { "epoch": 0.2, "learning_rate": 0.00040447725266789695, "loss": 0.0663, "theoretical_loss": 3.4619500482808454, "tokens_seen": 1978531840 }, { "epoch": 0.2, "learning_rate": 0.0004044371339163925, "loss": 0.0689, "theoretical_loss": 3.461931043837563, "tokens_seen": 1978662912 }, { "epoch": 0.2, "learning_rate": 0.0004043970151648881, "loss": 0.0693, "theoretical_loss": 3.4619120410056117, "tokens_seen": 1978793984 }, { "epoch": 0.2, "learning_rate": 0.00040435689641338366, "loss": 0.0702, "theoretical_loss": 3.4618930397847487, "tokens_seen": 1978925056 }, { "epoch": 0.2, "learning_rate": 0.00040431677766187917, "loss": 0.069, "theoretical_loss": 3.4618740401747297, "tokens_seen": 1979056128 }, { "epoch": 0.2, "learning_rate": 0.00040427665891037474, "loss": 0.0684, "theoretical_loss": 3.461855042175312, "tokens_seen": 1979187200 }, { "epoch": 0.2, "learning_rate": 0.0004042365401588703, "loss": 0.0666, "theoretical_loss": 3.461836045786253, "tokens_seen": 1979318272 }, { "epoch": 0.2, "learning_rate": 0.00040419642140736577, "loss": 0.0692, "theoretical_loss": 3.4618170510073085, "tokens_seen": 1979449344 }, { "epoch": 0.2, "learning_rate": 0.00040415630265586134, "loss": 0.0674, "theoretical_loss": 3.4617980578382364, "tokens_seen": 1979580416 }, { "epoch": 0.2, "learning_rate": 0.0004041161839043569, "loss": 0.0695, "theoretical_loss": 3.4617790662787935, "tokens_seen": 1979711488 }, { "epoch": 0.2, "learning_rate": 0.00040407606515285247, "loss": 0.0666, "theoretical_loss": 3.461760076328736, "tokens_seen": 1979842560 }, { "epoch": 0.2, "learning_rate": 0.000404035946401348, "loss": 0.0692, "theoretical_loss": 3.4617410879878223, "tokens_seen": 1979973632 }, { "epoch": 0.2, "learning_rate": 0.00040399582764984355, "loss": 0.0669, "theoretical_loss": 3.461722101255808, "tokens_seen": 1980104704 }, { "epoch": 0.2, "learning_rate": 0.0004039557088983391, "loss": 0.0671, "theoretical_loss": 3.461703116132452, "tokens_seen": 1980235776 }, { "epoch": 0.2, "learning_rate": 0.00040391559014683464, "loss": 0.0694, "theoretical_loss": 3.46168413261751, "tokens_seen": 1980366848 }, { "epoch": 0.2, "learning_rate": 0.0004038754713953302, "loss": 0.0697, "theoretical_loss": 3.46166515071074, "tokens_seen": 1980497920 }, { "epoch": 0.2, "learning_rate": 0.0004038353526438258, "loss": 0.0708, "theoretical_loss": 3.4616461704118997, "tokens_seen": 1980628992 }, { "epoch": 0.2, "learning_rate": 0.00040379523389232123, "loss": 0.0698, "theoretical_loss": 3.461627191720745, "tokens_seen": 1980760064 }, { "epoch": 0.2, "objective/train/advantage_avg": 0.001678215223364532, "objective/train/docs_used": 722322, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2240993976593018, "objective/train/original_loss": 1.2240993976593018, "objective/train/theoretical_loss": 3.4616082146370353, "objective/train/tokens_used": 351415776, "objective/train/value_avg": -0.00943756103515625, "objective/train/value_loss": 0.0005006311694160104, "objective/train/value_max": -3.820657730102539e-05, "objective/train/value_min": -0.9921875, "objective/train/value_reward_corr": 0.6553109189706817, "objective/train/value_std": 0.0191192626953125, "objective/train/weight_avg": 1.0018895864486694, "objective/train/weighted_lm_loss": 1.2256762981414795, "objective/train/weights_max": 1.4782650470733643, "objective/train/weights_min": 0.2276872992515564, "theoretical_loss": 3.4616082146370353, "tokens_seen": 1980891136 }, { "epoch": 0.2, "learning_rate": 0.0004037551151408168, "loss": 0.0683, "theoretical_loss": 3.4616082146370353, "tokens_seen": 1980891136 }, { "epoch": 0.2, "learning_rate": 0.00040371499638931237, "loss": 0.0688, "theoretical_loss": 3.461589239160528, "tokens_seen": 1981022208 }, { "epoch": 0.2, "learning_rate": 0.00040367487763780794, "loss": 0.0672, "theoretical_loss": 3.4615702652909786, "tokens_seen": 1981153280 }, { "epoch": 0.2, "learning_rate": 0.00040363475888630345, "loss": 0.0711, "theoretical_loss": 3.4615512930281467, "tokens_seen": 1981284352 }, { "epoch": 0.2, "learning_rate": 0.000403594640134799, "loss": 0.069, "theoretical_loss": 3.461532322371789, "tokens_seen": 1981415424 }, { "epoch": 0.2, "learning_rate": 0.0004035545213832946, "loss": 0.0689, "theoretical_loss": 3.4615133533216635, "tokens_seen": 1981546496 }, { "epoch": 0.2, "learning_rate": 0.0004035144026317901, "loss": 0.0692, "theoretical_loss": 3.4614943858775282, "tokens_seen": 1981677568 }, { "epoch": 0.2, "learning_rate": 0.00040347428388028567, "loss": 0.0682, "theoretical_loss": 3.4614754200391404, "tokens_seen": 1981808640 }, { "epoch": 0.2, "learning_rate": 0.00040343416512878124, "loss": 0.0694, "theoretical_loss": 3.4614564558062586, "tokens_seen": 1981939712 }, { "epoch": 0.2, "learning_rate": 0.0004033940463772767, "loss": 0.0676, "theoretical_loss": 3.4614374931786402, "tokens_seen": 1982070784 }, { "epoch": 0.2, "learning_rate": 0.00040335392762577227, "loss": 0.0713, "theoretical_loss": 3.461418532156043, "tokens_seen": 1982201856 }, { "epoch": 0.2, "learning_rate": 0.00040331380887426784, "loss": 0.0726, "theoretical_loss": 3.461399572738226, "tokens_seen": 1982332928 }, { "epoch": 0.2, "learning_rate": 0.0004032736901227634, "loss": 0.07, "theoretical_loss": 3.4613806149249466, "tokens_seen": 1982464000 }, { "epoch": 0.2, "learning_rate": 0.0004032335713712589, "loss": 0.0675, "theoretical_loss": 3.461361658715963, "tokens_seen": 1982595072 }, { "epoch": 0.2, "learning_rate": 0.0004031934526197545, "loss": 0.0687, "theoretical_loss": 3.4613427041110327, "tokens_seen": 1982726144 }, { "epoch": 0.2, "learning_rate": 0.00040315333386825006, "loss": 0.0667, "theoretical_loss": 3.4613237511099157, "tokens_seen": 1982857216 }, { "epoch": 0.2, "learning_rate": 0.00040311321511674557, "loss": 0.0709, "theoretical_loss": 3.4613047997123685, "tokens_seen": 1982988288 }, { "epoch": 0.2, "learning_rate": 0.00040307309636524114, "loss": 0.0696, "theoretical_loss": 3.4612858499181502, "tokens_seen": 1983119360 }, { "epoch": 0.2, "learning_rate": 0.0004030329776137367, "loss": 0.0725, "theoretical_loss": 3.46126690172702, "tokens_seen": 1983250432 }, { "epoch": 0.2, "learning_rate": 0.00040299285886223217, "loss": 0.0696, "theoretical_loss": 3.4612479551387345, "tokens_seen": 1983381504 }, { "epoch": 0.2, "learning_rate": 0.00040295274011072774, "loss": 0.0689, "theoretical_loss": 3.461229010153054, "tokens_seen": 1983512576 }, { "epoch": 0.2, "learning_rate": 0.0004029126213592233, "loss": 0.0705, "theoretical_loss": 3.461210066769736, "tokens_seen": 1983643648 }, { "epoch": 0.2, "learning_rate": 0.0004028725026077189, "loss": 0.0644, "theoretical_loss": 3.4611911249885394, "tokens_seen": 1983774720 }, { "epoch": 0.2, "learning_rate": 0.0004028323838562144, "loss": 0.0683, "theoretical_loss": 3.4611721848092225, "tokens_seen": 1983905792 }, { "epoch": 0.2, "learning_rate": 0.00040279226510470996, "loss": 0.0728, "theoretical_loss": 3.461153246231545, "tokens_seen": 1984036864 }, { "epoch": 0.2, "objective/train/advantage_avg": -3.694757106131874e-05, "objective/train/docs_used": 723589, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4143598079681396, "objective/train/original_loss": 1.4143598079681396, "objective/train/theoretical_loss": 3.461134309255265, "objective/train/tokens_used": 354692576, "objective/train/value_avg": -0.003997802734375, "objective/train/value_loss": 0.00013982444943394512, "objective/train/value_max": -3.707408905029297e-05, "objective/train/value_min": -0.143798828125, "objective/train/value_reward_corr": 0.6161077975640694, "objective/train/value_std": 0.0070953369140625, "objective/train/weight_avg": 1.0000245571136475, "objective/train/weighted_lm_loss": 1.4143340587615967, "objective/train/weights_max": 1.088674545288086, "objective/train/weights_min": 0.37003564834594727, "theoretical_loss": 3.461134309255265, "tokens_seen": 1984167936 }, { "epoch": 0.2, "learning_rate": 0.0004027521463532055, "loss": 0.0701, "theoretical_loss": 3.461134309255265, "tokens_seen": 1984167936 }, { "epoch": 0.2, "learning_rate": 0.00040271202760170104, "loss": 0.0697, "theoretical_loss": 3.461115373880141, "tokens_seen": 1984299008 }, { "epoch": 0.2, "learning_rate": 0.0004026719088501966, "loss": 0.0677, "theoretical_loss": 3.4610964401059325, "tokens_seen": 1984430080 }, { "epoch": 0.2, "learning_rate": 0.0004026317900986922, "loss": 0.0691, "theoretical_loss": 3.461077507932398, "tokens_seen": 1984561152 }, { "epoch": 0.2, "learning_rate": 0.00040259167134718763, "loss": 0.0657, "theoretical_loss": 3.461058577359297, "tokens_seen": 1984692224 }, { "epoch": 0.2, "learning_rate": 0.0004025515525956832, "loss": 0.0677, "theoretical_loss": 3.4610396483863877, "tokens_seen": 1984823296 }, { "epoch": 0.2, "learning_rate": 0.00040251143384417877, "loss": 0.0699, "theoretical_loss": 3.4610207210134294, "tokens_seen": 1984954368 }, { "epoch": 0.2, "learning_rate": 0.00040247131509267434, "loss": 0.0669, "theoretical_loss": 3.4610017952401817, "tokens_seen": 1985085440 }, { "epoch": 0.2, "learning_rate": 0.00040243119634116985, "loss": 0.0715, "theoretical_loss": 3.4609828710664035, "tokens_seen": 1985216512 }, { "epoch": 0.2, "learning_rate": 0.0004023910775896654, "loss": 0.0651, "theoretical_loss": 3.4609639484918544, "tokens_seen": 1985347584 }, { "epoch": 0.2, "learning_rate": 0.000402350958838161, "loss": 0.0699, "theoretical_loss": 3.460945027516293, "tokens_seen": 1985478656 }, { "epoch": 0.2, "learning_rate": 0.0004023108400866565, "loss": 0.0692, "theoretical_loss": 3.4609261081394793, "tokens_seen": 1985609728 }, { "epoch": 0.2, "learning_rate": 0.0004022707213351521, "loss": 0.0703, "theoretical_loss": 3.460907190361172, "tokens_seen": 1985740800 }, { "epoch": 0.2, "learning_rate": 0.00040223060258364764, "loss": 0.0686, "theoretical_loss": 3.460888274181131, "tokens_seen": 1985871872 }, { "epoch": 0.2, "learning_rate": 0.0004021904838321431, "loss": 0.0703, "theoretical_loss": 3.460869359599116, "tokens_seen": 1986002944 }, { "epoch": 0.2, "learning_rate": 0.00040215036508063867, "loss": 0.0671, "theoretical_loss": 3.460850446614886, "tokens_seen": 1986134016 }, { "epoch": 0.2, "learning_rate": 0.00040211024632913424, "loss": 0.0675, "theoretical_loss": 3.460831535228201, "tokens_seen": 1986265088 }, { "epoch": 0.2, "learning_rate": 0.0004020701275776298, "loss": 0.0688, "theoretical_loss": 3.4608126254388205, "tokens_seen": 1986396160 }, { "epoch": 0.2, "learning_rate": 0.0004020300088261253, "loss": 0.0651, "theoretical_loss": 3.4607937172465046, "tokens_seen": 1986527232 }, { "epoch": 0.2, "learning_rate": 0.0004019898900746209, "loss": 0.0724, "theoretical_loss": 3.460774810651012, "tokens_seen": 1986658304 }, { "epoch": 0.2, "learning_rate": 0.00040194977132311646, "loss": 0.0731, "theoretical_loss": 3.4607559056521033, "tokens_seen": 1986789376 }, { "epoch": 0.2, "learning_rate": 0.00040190965257161197, "loss": 0.0691, "theoretical_loss": 3.4607370022495387, "tokens_seen": 1986920448 }, { "epoch": 0.2, "learning_rate": 0.00040186953382010754, "loss": 0.0699, "theoretical_loss": 3.4607181004430774, "tokens_seen": 1987051520 }, { "epoch": 0.2, "learning_rate": 0.0004018294150686031, "loss": 0.0652, "theoretical_loss": 3.4606992002324795, "tokens_seen": 1987182592 }, { "epoch": 0.2, "learning_rate": 0.00040178929631709857, "loss": 0.0711, "theoretical_loss": 3.460680301617505, "tokens_seen": 1987313664 }, { "epoch": 0.2, "objective/train/advantage_avg": 0.000916296208743006, "objective/train/docs_used": 724791, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4451369047164917, "objective/train/original_loss": 1.4451367855072021, "objective/train/theoretical_loss": 3.4606614045979147, "objective/train/tokens_used": 357969376, "objective/train/value_avg": -0.011566162109375, "objective/train/value_loss": 0.0004834179999306798, "objective/train/value_max": -7.843971252441406e-05, "objective/train/value_min": -0.97705078125, "objective/train/value_reward_corr": 0.7089875466625517, "objective/train/value_std": 0.0228729248046875, "objective/train/weight_avg": 1.0011262893676758, "objective/train/weighted_lm_loss": 1.4457991123199463, "objective/train/weights_max": 1.4699876308441162, "objective/train/weights_min": 0.23660925030708313, "theoretical_loss": 3.4606614045979147, "tokens_seen": 1987444736 }, { "epoch": 0.2, "learning_rate": 0.00040174917756559414, "loss": 0.0656, "theoretical_loss": 3.4606614045979147, "tokens_seen": 1987444736 }, { "epoch": 0.2, "learning_rate": 0.0004017090588140897, "loss": 0.0704, "theoretical_loss": 3.460642509173468, "tokens_seen": 1987575808 }, { "epoch": 0.2, "learning_rate": 0.0004016689400625853, "loss": 0.0662, "theoretical_loss": 3.4606236153439247, "tokens_seen": 1987706880 }, { "epoch": 0.2, "learning_rate": 0.0004016288213110808, "loss": 0.0692, "theoretical_loss": 3.4606047231090455, "tokens_seen": 1987837952 }, { "epoch": 0.2, "learning_rate": 0.00040158870255957636, "loss": 0.0719, "theoretical_loss": 3.460585832468591, "tokens_seen": 1987969024 }, { "epoch": 0.2, "learning_rate": 0.0004015485838080719, "loss": 0.0703, "theoretical_loss": 3.4605669434223216, "tokens_seen": 1988100096 }, { "epoch": 0.21, "learning_rate": 0.00040150846505656744, "loss": 0.0681, "theoretical_loss": 3.460548055969997, "tokens_seen": 1988231168 }, { "epoch": 0.21, "learning_rate": 0.000401468346305063, "loss": 0.0677, "theoretical_loss": 3.4605291701113776, "tokens_seen": 1988362240 }, { "epoch": 0.21, "learning_rate": 0.0004014282275535586, "loss": 0.0665, "theoretical_loss": 3.460510285846224, "tokens_seen": 1988493312 }, { "epoch": 0.21, "learning_rate": 0.0004013881088020541, "loss": 0.0655, "theoretical_loss": 3.4604914031742977, "tokens_seen": 1988624384 }, { "epoch": 0.21, "learning_rate": 0.0004013479900505496, "loss": 0.075, "theoretical_loss": 3.460472522095358, "tokens_seen": 1988755456 }, { "epoch": 0.21, "learning_rate": 0.00040130787129904517, "loss": 0.0674, "theoretical_loss": 3.460453642609166, "tokens_seen": 1988886528 }, { "epoch": 0.21, "learning_rate": 0.00040126775254754074, "loss": 0.0729, "theoretical_loss": 3.4604347647154827, "tokens_seen": 1989017600 }, { "epoch": 0.21, "learning_rate": 0.00040122763379603626, "loss": 0.0666, "theoretical_loss": 3.4604158884140683, "tokens_seen": 1989148672 }, { "epoch": 0.21, "learning_rate": 0.0004011875150445318, "loss": 0.0685, "theoretical_loss": 3.460397013704684, "tokens_seen": 1989279744 }, { "epoch": 0.21, "learning_rate": 0.0004011473962930274, "loss": 0.0679, "theoretical_loss": 3.460378140587091, "tokens_seen": 1989410816 }, { "epoch": 0.21, "learning_rate": 0.0004011072775415229, "loss": 0.071, "theoretical_loss": 3.4603592690610494, "tokens_seen": 1989541888 }, { "epoch": 0.21, "learning_rate": 0.0004010671587900185, "loss": 0.0709, "theoretical_loss": 3.46034039912632, "tokens_seen": 1989672960 }, { "epoch": 0.21, "learning_rate": 0.00040102704003851404, "loss": 0.0675, "theoretical_loss": 3.460321530782665, "tokens_seen": 1989804032 }, { "epoch": 0.21, "learning_rate": 0.00040098692128700956, "loss": 0.07, "theoretical_loss": 3.460302664029844, "tokens_seen": 1989935104 }, { "epoch": 0.21, "learning_rate": 0.00040094680253550507, "loss": 0.0688, "theoretical_loss": 3.4602837988676196, "tokens_seen": 1990066176 }, { "epoch": 0.21, "learning_rate": 0.00040090668378400064, "loss": 0.0694, "theoretical_loss": 3.4602649352957515, "tokens_seen": 1990197248 }, { "epoch": 0.21, "learning_rate": 0.0004008665650324962, "loss": 0.0669, "theoretical_loss": 3.4602460733140017, "tokens_seen": 1990328320 }, { "epoch": 0.21, "learning_rate": 0.0004008264462809917, "loss": 0.0668, "theoretical_loss": 3.460227212922131, "tokens_seen": 1990459392 }, { "epoch": 0.21, "learning_rate": 0.0004007863275294873, "loss": 0.0719, "theoretical_loss": 3.460208354119902, "tokens_seen": 1990590464 }, { "epoch": 0.21, "objective/train/advantage_avg": -0.003994423430413008, "objective/train/docs_used": 726036, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3242079019546509, "objective/train/original_loss": 1.3242079019546509, "objective/train/theoretical_loss": 3.4601894969070743, "objective/train/tokens_used": 361246176, "objective/train/value_avg": -0.013580322265625, "objective/train/value_loss": 0.0005879381205886602, "objective/train/value_max": -2.193450927734375e-05, "objective/train/value_min": -0.3642578125, "objective/train/value_reward_corr": 0.9699578929913172, "objective/train/value_std": 0.04656982421875, "objective/train/weight_avg": 0.9962854981422424, "objective/train/weighted_lm_loss": 1.320798635482788, "objective/train/weights_max": 1.118717074394226, "objective/train/weights_min": 0.5699683427810669, "theoretical_loss": 3.4601894969070743, "tokens_seen": 1990721536 }, { "epoch": 0.21, "learning_rate": 0.00040074620877798286, "loss": 0.0691, "theoretical_loss": 3.4601894969070743, "tokens_seen": 1990721536 }, { "epoch": 0.21, "learning_rate": 0.0004007060900264784, "loss": 0.0713, "theoretical_loss": 3.4601706412834106, "tokens_seen": 1990852608 }, { "epoch": 0.21, "learning_rate": 0.00040066597127497394, "loss": 0.0717, "theoretical_loss": 3.460151787248672, "tokens_seen": 1990983680 }, { "epoch": 0.21, "learning_rate": 0.0004006258525234695, "loss": 0.0719, "theoretical_loss": 3.460132934802619, "tokens_seen": 1991114752 }, { "epoch": 0.21, "learning_rate": 0.000400585733771965, "loss": 0.0676, "theoretical_loss": 3.460114083945015, "tokens_seen": 1991245824 }, { "epoch": 0.21, "learning_rate": 0.00040054561502046054, "loss": 0.0714, "theoretical_loss": 3.46009523467562, "tokens_seen": 1991376896 }, { "epoch": 0.21, "learning_rate": 0.0004005054962689561, "loss": 0.0686, "theoretical_loss": 3.4600763869941966, "tokens_seen": 1991507968 }, { "epoch": 0.21, "learning_rate": 0.0004004653775174517, "loss": 0.0701, "theoretical_loss": 3.4600575409005065, "tokens_seen": 1991639040 }, { "epoch": 0.21, "learning_rate": 0.0004004252587659472, "loss": 0.0711, "theoretical_loss": 3.460038696394311, "tokens_seen": 1991770112 }, { "epoch": 0.21, "learning_rate": 0.00040038514001444276, "loss": 0.0708, "theoretical_loss": 3.460019853475372, "tokens_seen": 1991901184 }, { "epoch": 0.21, "learning_rate": 0.0004003450212629383, "loss": 0.0687, "theoretical_loss": 3.4600010121434517, "tokens_seen": 1992032256 }, { "epoch": 0.21, "learning_rate": 0.00040030490251143384, "loss": 0.0701, "theoretical_loss": 3.4599821723983117, "tokens_seen": 1992163328 }, { "epoch": 0.21, "learning_rate": 0.0004002647837599294, "loss": 0.0703, "theoretical_loss": 3.4599633342397142, "tokens_seen": 1992294400 }, { "epoch": 0.21, "learning_rate": 0.000400224665008425, "loss": 0.0718, "theoretical_loss": 3.459944497667421, "tokens_seen": 1992425472 }, { "epoch": 0.21, "learning_rate": 0.0004001845462569205, "loss": 0.0739, "theoretical_loss": 3.4599256626811945, "tokens_seen": 1992556544 }, { "epoch": 0.21, "learning_rate": 0.000400144427505416, "loss": 0.0695, "theoretical_loss": 3.459906829280797, "tokens_seen": 1992687616 }, { "epoch": 0.21, "learning_rate": 0.0004001043087539116, "loss": 0.068, "theoretical_loss": 3.45988799746599, "tokens_seen": 1992818688 }, { "epoch": 0.21, "learning_rate": 0.00040006419000240714, "loss": 0.0678, "theoretical_loss": 3.459869167236536, "tokens_seen": 1992949760 }, { "epoch": 0.21, "learning_rate": 0.00040002407125090266, "loss": 0.0679, "theoretical_loss": 3.4598503385921977, "tokens_seen": 1993080832 }, { "epoch": 0.21, "learning_rate": 0.0003999839524993982, "loss": 0.0703, "theoretical_loss": 3.4598315115327374, "tokens_seen": 1993211904 }, { "epoch": 0.21, "learning_rate": 0.0003999438337478938, "loss": 0.0764, "theoretical_loss": 3.4598126860579166, "tokens_seen": 1993342976 }, { "epoch": 0.21, "learning_rate": 0.0003999037149963893, "loss": 0.0702, "theoretical_loss": 3.459793862167498, "tokens_seen": 1993474048 }, { "epoch": 0.21, "learning_rate": 0.0003998635962448849, "loss": 0.073, "theoretical_loss": 3.4597750398612455, "tokens_seen": 1993605120 }, { "epoch": 0.21, "learning_rate": 0.00039982347749338044, "loss": 0.0698, "theoretical_loss": 3.45975621913892, "tokens_seen": 1993736192 }, { "epoch": 0.21, "learning_rate": 0.00039978335874187596, "loss": 0.0692, "theoretical_loss": 3.459737400000284, "tokens_seen": 1993867264 }, { "epoch": 0.21, "objective/train/advantage_avg": -0.0006918521248735487, "objective/train/docs_used": 727257, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.5075526237487793, "objective/train/original_loss": 1.5075526237487793, "objective/train/theoretical_loss": 3.4597185824451016, "objective/train/tokens_used": 364522976, "objective/train/value_avg": -0.0082550048828125, "objective/train/value_loss": 0.0003900788724422455, "objective/train/value_max": -3.647804260253906e-05, "objective/train/value_min": -0.634765625, "objective/train/value_reward_corr": 0.6972334745239129, "objective/train/value_std": 0.0178680419921875, "objective/train/weight_avg": 0.9994754195213318, "objective/train/weighted_lm_loss": 1.5055025815963745, "objective/train/weights_max": 1.714398980140686, "objective/train/weights_min": 0.2386397421360016, "theoretical_loss": 3.4597185824451016, "tokens_seen": 1993998336 }, { "epoch": 0.21, "learning_rate": 0.00039974323999037147, "loss": 0.0707, "theoretical_loss": 3.4597185824451016, "tokens_seen": 1993998336 }, { "epoch": 0.21, "learning_rate": 0.00039970312123886704, "loss": 0.0696, "theoretical_loss": 3.4596997664731344, "tokens_seen": 1994129408 }, { "epoch": 0.21, "learning_rate": 0.0003996630024873626, "loss": 0.0708, "theoretical_loss": 3.459680952084146, "tokens_seen": 1994260480 }, { "epoch": 0.21, "learning_rate": 0.0003996228837358581, "loss": 0.0716, "theoretical_loss": 3.4596621392778983, "tokens_seen": 1994391552 }, { "epoch": 0.21, "learning_rate": 0.0003995827649843537, "loss": 0.0724, "theoretical_loss": 3.4596433280541543, "tokens_seen": 1994522624 }, { "epoch": 0.21, "learning_rate": 0.00039954264623284926, "loss": 0.0686, "theoretical_loss": 3.459624518412677, "tokens_seen": 1994653696 }, { "epoch": 0.21, "learning_rate": 0.00039950252748134483, "loss": 0.0676, "theoretical_loss": 3.45960571035323, "tokens_seen": 1994784768 }, { "epoch": 0.21, "learning_rate": 0.00039946240872984034, "loss": 0.0697, "theoretical_loss": 3.459586903875575, "tokens_seen": 1994915840 }, { "epoch": 0.21, "learning_rate": 0.0003994222899783359, "loss": 0.0685, "theoretical_loss": 3.459568098979476, "tokens_seen": 1995046912 }, { "epoch": 0.21, "learning_rate": 0.0003993821712268314, "loss": 0.0695, "theoretical_loss": 3.4595492956646963, "tokens_seen": 1995177984 }, { "epoch": 0.21, "learning_rate": 0.00039934205247532694, "loss": 0.0663, "theoretical_loss": 3.4595304939309983, "tokens_seen": 1995309056 }, { "epoch": 0.21, "learning_rate": 0.0003993019337238225, "loss": 0.0714, "theoretical_loss": 3.459511693778146, "tokens_seen": 1995440128 }, { "epoch": 0.21, "learning_rate": 0.0003992618149723181, "loss": 0.0678, "theoretical_loss": 3.4594928952059014, "tokens_seen": 1995571200 }, { "epoch": 0.21, "learning_rate": 0.0003992216962208136, "loss": 0.0694, "theoretical_loss": 3.4594740982140295, "tokens_seen": 1995702272 }, { "epoch": 0.21, "learning_rate": 0.00039918157746930916, "loss": 0.0672, "theoretical_loss": 3.4594553028022927, "tokens_seen": 1995833344 }, { "epoch": 0.21, "learning_rate": 0.00039914145871780473, "loss": 0.0705, "theoretical_loss": 3.459436508970454, "tokens_seen": 1995964416 }, { "epoch": 0.21, "learning_rate": 0.0003991013399663003, "loss": 0.0687, "theoretical_loss": 3.459417716718278, "tokens_seen": 1996095488 }, { "epoch": 0.21, "learning_rate": 0.0003990612212147958, "loss": 0.0709, "theoretical_loss": 3.4593989260455267, "tokens_seen": 1996226560 }, { "epoch": 0.21, "learning_rate": 0.0003990211024632914, "loss": 0.0663, "theoretical_loss": 3.459380136951965, "tokens_seen": 1996357632 }, { "epoch": 0.21, "learning_rate": 0.0003989809837117869, "loss": 0.065, "theoretical_loss": 3.459361349437356, "tokens_seen": 1996488704 }, { "epoch": 0.21, "learning_rate": 0.0003989408649602824, "loss": 0.0679, "theoretical_loss": 3.4593425635014636, "tokens_seen": 1996619776 }, { "epoch": 0.21, "learning_rate": 0.000398900746208778, "loss": 0.0696, "theoretical_loss": 3.459323779144051, "tokens_seen": 1996750848 }, { "epoch": 0.21, "learning_rate": 0.00039886062745727354, "loss": 0.0742, "theoretical_loss": 3.4593049963648825, "tokens_seen": 1996881920 }, { "epoch": 0.21, "learning_rate": 0.00039882050870576906, "loss": 0.0676, "theoretical_loss": 3.4592862151637216, "tokens_seen": 1997012992 }, { "epoch": 0.21, "learning_rate": 0.0003987803899542646, "loss": 0.0701, "theoretical_loss": 3.4592674355403323, "tokens_seen": 1997144064 }, { "epoch": 0.21, "objective/train/advantage_avg": 0.000706835649907589, "objective/train/docs_used": 728467, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.422791600227356, "objective/train/original_loss": 1.422791600227356, "objective/train/theoretical_loss": 3.4592486574944785, "objective/train/tokens_used": 367799776, "objective/train/value_avg": -0.0075836181640625, "objective/train/value_loss": 0.00027689625858329237, "objective/train/value_max": -5.346536636352539e-05, "objective/train/value_min": -0.365234375, "objective/train/value_reward_corr": 0.5822032466044034, "objective/train/value_std": 0.011444091796875, "objective/train/weight_avg": 1.00082266330719, "objective/train/weighted_lm_loss": 1.4235963821411133, "objective/train/weights_max": 1.4195526838302612, "objective/train/weights_min": 0.23035363852977753, "theoretical_loss": 3.4592486574944785, "tokens_seen": 1997275136 }, { "epoch": 0.21, "learning_rate": 0.0003987402712027602, "loss": 0.0714, "theoretical_loss": 3.4592486574944785, "tokens_seen": 1997275136 }, { "epoch": 0.21, "learning_rate": 0.00039870015245125576, "loss": 0.0674, "theoretical_loss": 3.4592298810259243, "tokens_seen": 1997406208 }, { "epoch": 0.21, "learning_rate": 0.0003986600336997513, "loss": 0.0694, "theoretical_loss": 3.4592111061344335, "tokens_seen": 1997537280 }, { "epoch": 0.21, "learning_rate": 0.00039861991494824685, "loss": 0.0682, "theoretical_loss": 3.45919233281977, "tokens_seen": 1997668352 }, { "epoch": 0.21, "learning_rate": 0.00039857979619674236, "loss": 0.072, "theoretical_loss": 3.4591735610816983, "tokens_seen": 1997799424 }, { "epoch": 0.21, "learning_rate": 0.0003985396774452379, "loss": 0.0693, "theoretical_loss": 3.4591547909199827, "tokens_seen": 1997930496 }, { "epoch": 0.21, "learning_rate": 0.00039849955869373344, "loss": 0.0685, "theoretical_loss": 3.459136022334387, "tokens_seen": 1998061568 }, { "epoch": 0.21, "learning_rate": 0.000398459439942229, "loss": 0.0683, "theoretical_loss": 3.4591172553246756, "tokens_seen": 1998192640 }, { "epoch": 0.21, "learning_rate": 0.0003984193211907245, "loss": 0.0686, "theoretical_loss": 3.4590984898906134, "tokens_seen": 1998323712 }, { "epoch": 0.21, "learning_rate": 0.0003983792024392201, "loss": 0.0676, "theoretical_loss": 3.4590797260319635, "tokens_seen": 1998454784 }, { "epoch": 0.21, "learning_rate": 0.00039833908368771566, "loss": 0.0648, "theoretical_loss": 3.4590609637484913, "tokens_seen": 1998585856 }, { "epoch": 0.21, "learning_rate": 0.00039829896493621123, "loss": 0.0693, "theoretical_loss": 3.459042203039961, "tokens_seen": 1998716928 }, { "epoch": 0.21, "learning_rate": 0.00039825884618470674, "loss": 0.0669, "theoretical_loss": 3.459023443906138, "tokens_seen": 1998848000 }, { "epoch": 0.21, "learning_rate": 0.0003982187274332023, "loss": 0.0698, "theoretical_loss": 3.459004686346785, "tokens_seen": 1998979072 }, { "epoch": 0.21, "learning_rate": 0.0003981786086816978, "loss": 0.071, "theoretical_loss": 3.458985930361668, "tokens_seen": 1999110144 }, { "epoch": 0.21, "learning_rate": 0.00039813848993019334, "loss": 0.0703, "theoretical_loss": 3.4589671759505514, "tokens_seen": 1999241216 }, { "epoch": 0.21, "learning_rate": 0.0003980983711786889, "loss": 0.0709, "theoretical_loss": 3.4589484231132, "tokens_seen": 1999372288 }, { "epoch": 0.21, "learning_rate": 0.0003980582524271845, "loss": 0.0675, "theoretical_loss": 3.4589296718493783, "tokens_seen": 1999503360 }, { "epoch": 0.21, "learning_rate": 0.00039801813367568, "loss": 0.0682, "theoretical_loss": 3.4589109221588514, "tokens_seen": 1999634432 }, { "epoch": 0.21, "learning_rate": 0.00039797801492417556, "loss": 0.0685, "theoretical_loss": 3.4588921740413845, "tokens_seen": 1999765504 }, { "epoch": 0.21, "learning_rate": 0.00039793789617267113, "loss": 0.0702, "theoretical_loss": 3.4588734274967416, "tokens_seen": 1999896576 }, { "epoch": 0.21, "learning_rate": 0.0003978977774211667, "loss": 0.0668, "theoretical_loss": 3.458854682524688, "tokens_seen": 2000027648 }, { "epoch": 0.21, "learning_rate": 0.0003978576586696622, "loss": 0.0699, "theoretical_loss": 3.4588359391249894, "tokens_seen": 2000158720 }, { "epoch": 0.21, "learning_rate": 0.0003978175399181578, "loss": 0.0711, "theoretical_loss": 3.45881719729741, "tokens_seen": 2000289792 }, { "epoch": 0.21, "learning_rate": 0.0003977774211666533, "loss": 0.069, "theoretical_loss": 3.458798457041716, "tokens_seen": 2000420864 }, { "epoch": 0.21, "objective/train/advantage_avg": 0.0002663837221916765, "objective/train/docs_used": 729542, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.231395959854126, "objective/train/original_loss": 1.231395959854126, "objective/train/theoretical_loss": 3.458779718357672, "objective/train/tokens_used": 371076576, "objective/train/value_avg": -0.0085296630859375, "objective/train/value_loss": 0.00024775616475380957, "objective/train/value_max": -5.1856040954589844e-05, "objective/train/value_min": -0.8916015625, "objective/train/value_reward_corr": 0.7060971718425838, "objective/train/value_std": 0.0157318115234375, "objective/train/weight_avg": 1.000378131866455, "objective/train/weighted_lm_loss": 1.2311949729919434, "objective/train/weights_max": 1.2982099056243896, "objective/train/weights_min": 0.36880120635032654, "theoretical_loss": 3.458779718357672, "tokens_seen": 2000551936 }, { "epoch": 0.21, "learning_rate": 0.0003977373024151488, "loss": 0.0682, "theoretical_loss": 3.458779718357672, "tokens_seen": 2000551936 }, { "epoch": 0.21, "learning_rate": 0.0003976971836636444, "loss": 0.0732, "theoretical_loss": 3.4587609812450424, "tokens_seen": 2000683008 }, { "epoch": 0.21, "learning_rate": 0.00039765706491213994, "loss": 0.0698, "theoretical_loss": 3.4587422457035935, "tokens_seen": 2000814080 }, { "epoch": 0.21, "learning_rate": 0.00039761694616063546, "loss": 0.0724, "theoretical_loss": 3.4587235117330906, "tokens_seen": 2000945152 }, { "epoch": 0.21, "learning_rate": 0.000397576827409131, "loss": 0.0721, "theoretical_loss": 3.458704779333299, "tokens_seen": 2001076224 }, { "epoch": 0.21, "learning_rate": 0.0003975367086576266, "loss": 0.0685, "theoretical_loss": 3.4586860485039836, "tokens_seen": 2001207296 }, { "epoch": 0.21, "learning_rate": 0.00039749658990612216, "loss": 0.0687, "theoretical_loss": 3.4586673192449107, "tokens_seen": 2001338368 }, { "epoch": 0.21, "learning_rate": 0.0003974564711546177, "loss": 0.0729, "theoretical_loss": 3.4586485915558454, "tokens_seen": 2001469440 }, { "epoch": 0.21, "learning_rate": 0.00039741635240311325, "loss": 0.0682, "theoretical_loss": 3.4586298654365533, "tokens_seen": 2001600512 }, { "epoch": 0.21, "learning_rate": 0.00039737623365160876, "loss": 0.0696, "theoretical_loss": 3.4586111408868, "tokens_seen": 2001731584 }, { "epoch": 0.21, "learning_rate": 0.0003973361149001043, "loss": 0.0664, "theoretical_loss": 3.458592417906351, "tokens_seen": 2001862656 }, { "epoch": 0.21, "learning_rate": 0.00039729599614859984, "loss": 0.0665, "theoretical_loss": 3.4585736964949727, "tokens_seen": 2001993728 }, { "epoch": 0.21, "learning_rate": 0.0003972558773970954, "loss": 0.0645, "theoretical_loss": 3.4585549766524304, "tokens_seen": 2002124800 }, { "epoch": 0.21, "learning_rate": 0.0003972157586455909, "loss": 0.0659, "theoretical_loss": 3.45853625837849, "tokens_seen": 2002255872 }, { "epoch": 0.21, "learning_rate": 0.0003971756398940865, "loss": 0.0671, "theoretical_loss": 3.4585175416729177, "tokens_seen": 2002386944 }, { "epoch": 0.21, "learning_rate": 0.00039713552114258206, "loss": 0.0693, "theoretical_loss": 3.458498826535479, "tokens_seen": 2002518016 }, { "epoch": 0.21, "learning_rate": 0.00039709540239107763, "loss": 0.0687, "theoretical_loss": 3.45848011296594, "tokens_seen": 2002649088 }, { "epoch": 0.21, "learning_rate": 0.00039705528363957315, "loss": 0.066, "theoretical_loss": 3.4584614009640666, "tokens_seen": 2002780160 }, { "epoch": 0.21, "learning_rate": 0.0003970151648880687, "loss": 0.0657, "theoretical_loss": 3.4584426905296253, "tokens_seen": 2002911232 }, { "epoch": 0.21, "learning_rate": 0.00039697504613656423, "loss": 0.0695, "theoretical_loss": 3.4584239816623823, "tokens_seen": 2003042304 }, { "epoch": 0.21, "learning_rate": 0.00039693492738505974, "loss": 0.0689, "theoretical_loss": 3.4584052743621028, "tokens_seen": 2003173376 }, { "epoch": 0.21, "learning_rate": 0.0003968948086335553, "loss": 0.0686, "theoretical_loss": 3.4583865686285544, "tokens_seen": 2003304448 }, { "epoch": 0.21, "learning_rate": 0.0003968546898820509, "loss": 0.0715, "theoretical_loss": 3.4583678644615024, "tokens_seen": 2003435520 }, { "epoch": 0.21, "learning_rate": 0.00039681457113054645, "loss": 0.0723, "theoretical_loss": 3.4583491618607134, "tokens_seen": 2003566592 }, { "epoch": 0.21, "learning_rate": 0.00039677445237904196, "loss": 0.0707, "theoretical_loss": 3.4583304608259544, "tokens_seen": 2003697664 }, { "epoch": 0.21, "objective/train/advantage_avg": 0.0012522207107394934, "objective/train/docs_used": 730811, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.34685480594635, "objective/train/original_loss": 1.34685480594635, "objective/train/theoretical_loss": 3.458311761356991, "objective/train/tokens_used": 374353376, "objective/train/value_avg": -0.00775909423828125, "objective/train/value_loss": 0.00022794802498538047, "objective/train/value_max": -4.684925079345703e-05, "objective/train/value_min": -0.262451171875, "objective/train/value_reward_corr": 0.6751305159231684, "objective/train/value_std": 0.01450347900390625, "objective/train/weight_avg": 1.0013549327850342, "objective/train/weighted_lm_loss": 1.348760724067688, "objective/train/weights_max": 1.2672005891799927, "objective/train/weights_min": 0.368155300617218, "theoretical_loss": 3.458311761356991, "tokens_seen": 2003828736 }, { "epoch": 0.21, "learning_rate": 0.00039673433362753753, "loss": 0.0683, "theoretical_loss": 3.458311761356991, "tokens_seen": 2003828736 }, { "epoch": 0.21, "learning_rate": 0.0003966942148760331, "loss": 0.0684, "theoretical_loss": 3.45829306345359, "tokens_seen": 2003959808 }, { "epoch": 0.21, "learning_rate": 0.0003966540961245286, "loss": 0.0726, "theoretical_loss": 3.4582743671155183, "tokens_seen": 2004090880 }, { "epoch": 0.21, "learning_rate": 0.0003966139773730242, "loss": 0.0691, "theoretical_loss": 3.458255672342542, "tokens_seen": 2004221952 }, { "epoch": 0.21, "learning_rate": 0.00039657385862151975, "loss": 0.0703, "theoretical_loss": 3.458236979134428, "tokens_seen": 2004353024 }, { "epoch": 0.21, "learning_rate": 0.0003965337398700152, "loss": 0.0665, "theoretical_loss": 3.4582182874909426, "tokens_seen": 2004484096 }, { "epoch": 0.21, "learning_rate": 0.0003964936211185108, "loss": 0.0697, "theoretical_loss": 3.458199597411853, "tokens_seen": 2004615168 }, { "epoch": 0.22, "learning_rate": 0.00039645350236700635, "loss": 0.0693, "theoretical_loss": 3.4581809088969253, "tokens_seen": 2004746240 }, { "epoch": 0.22, "learning_rate": 0.0003964133836155019, "loss": 0.0676, "theoretical_loss": 3.4581622219459276, "tokens_seen": 2004877312 }, { "epoch": 0.22, "learning_rate": 0.00039637326486399743, "loss": 0.0725, "theoretical_loss": 3.4581435365586257, "tokens_seen": 2005008384 }, { "epoch": 0.22, "learning_rate": 0.000396333146112493, "loss": 0.0695, "theoretical_loss": 3.4581248527347874, "tokens_seen": 2005139456 }, { "epoch": 0.22, "learning_rate": 0.00039629302736098856, "loss": 0.0719, "theoretical_loss": 3.4581061704741787, "tokens_seen": 2005270528 }, { "epoch": 0.22, "learning_rate": 0.0003962529086094841, "loss": 0.0698, "theoretical_loss": 3.458087489776567, "tokens_seen": 2005401600 }, { "epoch": 0.22, "learning_rate": 0.00039621278985797965, "loss": 0.0686, "theoretical_loss": 3.45806881064172, "tokens_seen": 2005532672 }, { "epoch": 0.22, "learning_rate": 0.0003961726711064752, "loss": 0.068, "theoretical_loss": 3.458050133069404, "tokens_seen": 2005663744 }, { "epoch": 0.22, "learning_rate": 0.0003961325523549707, "loss": 0.0703, "theoretical_loss": 3.458031457059387, "tokens_seen": 2005794816 }, { "epoch": 0.22, "learning_rate": 0.00039609243360346624, "loss": 0.0694, "theoretical_loss": 3.4580127826114353, "tokens_seen": 2005925888 }, { "epoch": 0.22, "learning_rate": 0.0003960523148519618, "loss": 0.0686, "theoretical_loss": 3.457994109725317, "tokens_seen": 2006056960 }, { "epoch": 0.22, "learning_rate": 0.0003960121961004574, "loss": 0.0661, "theoretical_loss": 3.457975438400799, "tokens_seen": 2006188032 }, { "epoch": 0.22, "learning_rate": 0.0003959720773489529, "loss": 0.068, "theoretical_loss": 3.457956768637649, "tokens_seen": 2006319104 }, { "epoch": 0.22, "learning_rate": 0.00039593195859744846, "loss": 0.0699, "theoretical_loss": 3.4579381004356344, "tokens_seen": 2006450176 }, { "epoch": 0.22, "learning_rate": 0.00039589183984594403, "loss": 0.0721, "theoretical_loss": 3.457919433794522, "tokens_seen": 2006581248 }, { "epoch": 0.22, "learning_rate": 0.00039585172109443955, "loss": 0.0677, "theoretical_loss": 3.4579007687140804, "tokens_seen": 2006712320 }, { "epoch": 0.22, "learning_rate": 0.0003958116023429351, "loss": 0.0718, "theoretical_loss": 3.4578821051940767, "tokens_seen": 2006843392 }, { "epoch": 0.22, "learning_rate": 0.0003957714835914307, "loss": 0.0725, "theoretical_loss": 3.4578634432342783, "tokens_seen": 2006974464 }, { "epoch": 0.22, "objective/train/advantage_avg": -0.00031143531668931246, "objective/train/docs_used": 732034, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.479048252105713, "objective/train/original_loss": 1.479048252105713, "objective/train/theoretical_loss": 3.4578447828344534, "objective/train/tokens_used": 377630176, "objective/train/value_avg": -0.00620269775390625, "objective/train/value_loss": 0.0003461028391029686, "objective/train/value_max": -5.650520324707031e-05, "objective/train/value_min": -0.38623046875, "objective/train/value_reward_corr": 0.7836644523547175, "objective/train/value_std": 0.015899658203125, "objective/train/weight_avg": 0.9998400807380676, "objective/train/weighted_lm_loss": 1.4797663688659668, "objective/train/weights_max": 1.2969428300857544, "objective/train/weights_min": 0.3681907653808594, "theoretical_loss": 3.4578447828344534, "tokens_seen": 2007105536 }, { "epoch": 0.22, "learning_rate": 0.00039573136483992614, "loss": 0.0677, "theoretical_loss": 3.4578447828344534, "tokens_seen": 2007105536 }, { "epoch": 0.22, "learning_rate": 0.0003956912460884217, "loss": 0.0691, "theoretical_loss": 3.4578261239943693, "tokens_seen": 2007236608 }, { "epoch": 0.22, "learning_rate": 0.0003956511273369173, "loss": 0.0719, "theoretical_loss": 3.457807466713794, "tokens_seen": 2007367680 }, { "epoch": 0.22, "learning_rate": 0.00039561100858541285, "loss": 0.0676, "theoretical_loss": 3.4577888109924952, "tokens_seen": 2007498752 }, { "epoch": 0.22, "learning_rate": 0.00039557088983390836, "loss": 0.0715, "theoretical_loss": 3.4577701568302412, "tokens_seen": 2007629824 }, { "epoch": 0.22, "learning_rate": 0.00039553077108240393, "loss": 0.0686, "theoretical_loss": 3.4577515042267994, "tokens_seen": 2007760896 }, { "epoch": 0.22, "learning_rate": 0.0003954906523308995, "loss": 0.0698, "theoretical_loss": 3.4577328531819385, "tokens_seen": 2007891968 }, { "epoch": 0.22, "learning_rate": 0.000395450533579395, "loss": 0.0714, "theoretical_loss": 3.457714203695425, "tokens_seen": 2008023040 }, { "epoch": 0.22, "learning_rate": 0.0003954104148278906, "loss": 0.0664, "theoretical_loss": 3.457695555767029, "tokens_seen": 2008154112 }, { "epoch": 0.22, "learning_rate": 0.00039537029607638615, "loss": 0.0702, "theoretical_loss": 3.4576769093965174, "tokens_seen": 2008285184 }, { "epoch": 0.22, "learning_rate": 0.0003953301773248816, "loss": 0.0662, "theoretical_loss": 3.4576582645836584, "tokens_seen": 2008416256 }, { "epoch": 0.22, "learning_rate": 0.0003952900585733772, "loss": 0.0712, "theoretical_loss": 3.4576396213282212, "tokens_seen": 2008547328 }, { "epoch": 0.22, "learning_rate": 0.00039524993982187275, "loss": 0.0703, "theoretical_loss": 3.457620979629973, "tokens_seen": 2008678400 }, { "epoch": 0.22, "learning_rate": 0.0003952098210703683, "loss": 0.0682, "theoretical_loss": 3.457602339488682, "tokens_seen": 2008809472 }, { "epoch": 0.22, "learning_rate": 0.00039516970231886383, "loss": 0.0741, "theoretical_loss": 3.457583700904118, "tokens_seen": 2008940544 }, { "epoch": 0.22, "learning_rate": 0.0003951295835673594, "loss": 0.0711, "theoretical_loss": 3.4575650638760482, "tokens_seen": 2009071616 }, { "epoch": 0.22, "learning_rate": 0.00039508946481585497, "loss": 0.0684, "theoretical_loss": 3.457546428404241, "tokens_seen": 2009202688 }, { "epoch": 0.22, "learning_rate": 0.0003950493460643505, "loss": 0.0729, "theoretical_loss": 3.457527794488466, "tokens_seen": 2009333760 }, { "epoch": 0.22, "learning_rate": 0.00039500922731284605, "loss": 0.0734, "theoretical_loss": 3.457509162128491, "tokens_seen": 2009464832 }, { "epoch": 0.22, "learning_rate": 0.0003949691085613416, "loss": 0.0666, "theoretical_loss": 3.457490531324085, "tokens_seen": 2009595904 }, { "epoch": 0.22, "learning_rate": 0.0003949289898098371, "loss": 0.069, "theoretical_loss": 3.4574719020750164, "tokens_seen": 2009726976 }, { "epoch": 0.22, "learning_rate": 0.00039488887105833265, "loss": 0.0665, "theoretical_loss": 3.4574532743810535, "tokens_seen": 2009858048 }, { "epoch": 0.22, "learning_rate": 0.0003948487523068282, "loss": 0.0689, "theoretical_loss": 3.457434648241966, "tokens_seen": 2009989120 }, { "epoch": 0.22, "learning_rate": 0.0003948086335553238, "loss": 0.0694, "theoretical_loss": 3.4574160236575224, "tokens_seen": 2010120192 }, { "epoch": 0.22, "learning_rate": 0.0003947685148038193, "loss": 0.0693, "theoretical_loss": 3.4573974006274915, "tokens_seen": 2010251264 }, { "epoch": 0.22, "objective/train/advantage_avg": 0.0007804845226928592, "objective/train/docs_used": 733267, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.498191237449646, "objective/train/original_loss": 1.4981913566589355, "objective/train/theoretical_loss": 3.457378779151642, "objective/train/tokens_used": 380906976, "objective/train/value_avg": -0.006832122802734375, "objective/train/value_loss": 0.00019269216863904148, "objective/train/value_max": -0.00010150671005249023, "objective/train/value_min": -0.3193359375, "objective/train/value_reward_corr": 0.651482227333709, "objective/train/value_std": 0.0122222900390625, "objective/train/weight_avg": 1.0008684396743774, "objective/train/weighted_lm_loss": 1.4992730617523193, "objective/train/weights_max": 1.2205920219421387, "objective/train/weights_min": 0.39109206199645996, "theoretical_loss": 3.457378779151642, "tokens_seen": 2010382336 }, { "epoch": 0.22, "learning_rate": 0.00039472839605231486, "loss": 0.0681, "theoretical_loss": 3.457378779151642, "tokens_seen": 2010382336 }, { "epoch": 0.22, "learning_rate": 0.00039468827730081043, "loss": 0.0683, "theoretical_loss": 3.457360159229743, "tokens_seen": 2010513408 }, { "epoch": 0.22, "learning_rate": 0.00039464815854930595, "loss": 0.066, "theoretical_loss": 3.457341540861564, "tokens_seen": 2010644480 }, { "epoch": 0.22, "learning_rate": 0.0003946080397978015, "loss": 0.0668, "theoretical_loss": 3.4573229240468737, "tokens_seen": 2010775552 }, { "epoch": 0.22, "learning_rate": 0.0003945679210462971, "loss": 0.0707, "theoretical_loss": 3.4573043087854414, "tokens_seen": 2010906624 }, { "epoch": 0.22, "learning_rate": 0.00039452780229479254, "loss": 0.0676, "theoretical_loss": 3.4572856950770356, "tokens_seen": 2011037696 }, { "epoch": 0.22, "learning_rate": 0.0003944876835432881, "loss": 0.0677, "theoretical_loss": 3.4572670829214265, "tokens_seen": 2011168768 }, { "epoch": 0.22, "learning_rate": 0.0003944475647917837, "loss": 0.0661, "theoretical_loss": 3.4572484723183825, "tokens_seen": 2011299840 }, { "epoch": 0.22, "learning_rate": 0.00039440744604027925, "loss": 0.0711, "theoretical_loss": 3.457229863267674, "tokens_seen": 2011430912 }, { "epoch": 0.22, "learning_rate": 0.00039436732728877476, "loss": 0.0668, "theoretical_loss": 3.457211255769069, "tokens_seen": 2011561984 }, { "epoch": 0.22, "learning_rate": 0.00039432720853727033, "loss": 0.0687, "theoretical_loss": 3.457192649822338, "tokens_seen": 2011693056 }, { "epoch": 0.22, "learning_rate": 0.0003942870897857659, "loss": 0.0676, "theoretical_loss": 3.4571740454272506, "tokens_seen": 2011824128 }, { "epoch": 0.22, "learning_rate": 0.0003942469710342614, "loss": 0.0712, "theoretical_loss": 3.4571554425835753, "tokens_seen": 2011955200 }, { "epoch": 0.22, "learning_rate": 0.000394206852282757, "loss": 0.0686, "theoretical_loss": 3.4571368412910823, "tokens_seen": 2012086272 }, { "epoch": 0.22, "learning_rate": 0.00039416673353125255, "loss": 0.0682, "theoretical_loss": 3.457118241549541, "tokens_seen": 2012217344 }, { "epoch": 0.22, "learning_rate": 0.00039412661477974807, "loss": 0.0694, "theoretical_loss": 3.4570996433587218, "tokens_seen": 2012348416 }, { "epoch": 0.22, "learning_rate": 0.0003940864960282436, "loss": 0.0692, "theoretical_loss": 3.4570810467183932, "tokens_seen": 2012479488 }, { "epoch": 0.22, "learning_rate": 0.00039404637727673915, "loss": 0.0723, "theoretical_loss": 3.457062451628326, "tokens_seen": 2012610560 }, { "epoch": 0.22, "learning_rate": 0.0003940062585252347, "loss": 0.0663, "theoretical_loss": 3.45704385808829, "tokens_seen": 2012741632 }, { "epoch": 0.22, "learning_rate": 0.00039396613977373023, "loss": 0.0692, "theoretical_loss": 3.4570252660980536, "tokens_seen": 2012872704 }, { "epoch": 0.22, "learning_rate": 0.0003939260210222258, "loss": 0.0684, "theoretical_loss": 3.4570066756573885, "tokens_seen": 2013003776 }, { "epoch": 0.22, "learning_rate": 0.00039388590227072137, "loss": 0.0705, "theoretical_loss": 3.4569880867660636, "tokens_seen": 2013134848 }, { "epoch": 0.22, "learning_rate": 0.0003938457835192169, "loss": 0.0705, "theoretical_loss": 3.45696949942385, "tokens_seen": 2013265920 }, { "epoch": 0.22, "learning_rate": 0.00039380566476771245, "loss": 0.0706, "theoretical_loss": 3.456950913630516, "tokens_seen": 2013396992 }, { "epoch": 0.22, "learning_rate": 0.000393765546016208, "loss": 0.0682, "theoretical_loss": 3.4569323293858334, "tokens_seen": 2013528064 }, { "epoch": 0.22, "objective/train/advantage_avg": -0.00040655815973877907, "objective/train/docs_used": 734578, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4802377223968506, "objective/train/original_loss": 1.4802377223968506, "objective/train/theoretical_loss": 3.4569137466895716, "objective/train/tokens_used": 384183776, "objective/train/value_avg": -0.006103515625, "objective/train/value_loss": 0.00019952132424805313, "objective/train/value_max": -4.756450653076172e-05, "objective/train/value_min": -0.33837890625, "objective/train/value_reward_corr": 0.6021176762567626, "objective/train/value_std": 0.0099029541015625, "objective/train/weight_avg": 0.9996809959411621, "objective/train/weighted_lm_loss": 1.4791508913040161, "objective/train/weights_max": 1.103164792060852, "objective/train/weights_min": 0.3687976598739624, "theoretical_loss": 3.4569137466895716, "tokens_seen": 2013659136 }, { "epoch": 0.22, "learning_rate": 0.00039372542726470353, "loss": 0.0715, "theoretical_loss": 3.4569137466895716, "tokens_seen": 2013659136 }, { "epoch": 0.22, "learning_rate": 0.00039368530851319905, "loss": 0.0692, "theoretical_loss": 3.4568951655415017, "tokens_seen": 2013790208 }, { "epoch": 0.22, "learning_rate": 0.0003936451897616946, "loss": 0.0715, "theoretical_loss": 3.456876585941392, "tokens_seen": 2013921280 }, { "epoch": 0.22, "learning_rate": 0.0003936050710101902, "loss": 0.0666, "theoretical_loss": 3.4568580078890143, "tokens_seen": 2014052352 }, { "epoch": 0.22, "learning_rate": 0.0003935649522586857, "loss": 0.0673, "theoretical_loss": 3.456839431384139, "tokens_seen": 2014183424 }, { "epoch": 0.22, "learning_rate": 0.00039352483350718127, "loss": 0.071, "theoretical_loss": 3.4568208564265364, "tokens_seen": 2014314496 }, { "epoch": 0.22, "learning_rate": 0.00039348471475567683, "loss": 0.0679, "theoretical_loss": 3.456802283015976, "tokens_seen": 2014445568 }, { "epoch": 0.22, "learning_rate": 0.00039344459600417235, "loss": 0.0676, "theoretical_loss": 3.4567837111522293, "tokens_seen": 2014576640 }, { "epoch": 0.22, "learning_rate": 0.0003934044772526679, "loss": 0.0669, "theoretical_loss": 3.456765140835067, "tokens_seen": 2014707712 }, { "epoch": 0.22, "learning_rate": 0.0003933643585011635, "loss": 0.0675, "theoretical_loss": 3.456746572064259, "tokens_seen": 2014838784 }, { "epoch": 0.22, "learning_rate": 0.000393324239749659, "loss": 0.0668, "theoretical_loss": 3.456728004839576, "tokens_seen": 2014969856 }, { "epoch": 0.22, "learning_rate": 0.0003932841209981545, "loss": 0.065, "theoretical_loss": 3.456709439160789, "tokens_seen": 2015100928 }, { "epoch": 0.22, "learning_rate": 0.0003932440022466501, "loss": 0.068, "theoretical_loss": 3.456690875027669, "tokens_seen": 2015232000 }, { "epoch": 0.22, "learning_rate": 0.00039320388349514565, "loss": 0.0719, "theoretical_loss": 3.456672312439986, "tokens_seen": 2015363072 }, { "epoch": 0.22, "learning_rate": 0.00039316376474364116, "loss": 0.0709, "theoretical_loss": 3.4566537513975115, "tokens_seen": 2015494144 }, { "epoch": 0.22, "learning_rate": 0.00039312364599213673, "loss": 0.0679, "theoretical_loss": 3.4566351919000167, "tokens_seen": 2015625216 }, { "epoch": 0.22, "learning_rate": 0.0003930835272406323, "loss": 0.0699, "theoretical_loss": 3.456616633947272, "tokens_seen": 2015756288 }, { "epoch": 0.22, "learning_rate": 0.0003930434084891278, "loss": 0.0669, "theoretical_loss": 3.4565980775390477, "tokens_seen": 2015887360 }, { "epoch": 0.22, "learning_rate": 0.0003930032897376234, "loss": 0.0638, "theoretical_loss": 3.456579522675116, "tokens_seen": 2016018432 }, { "epoch": 0.22, "learning_rate": 0.00039296317098611895, "loss": 0.0699, "theoretical_loss": 3.456560969355248, "tokens_seen": 2016149504 }, { "epoch": 0.22, "learning_rate": 0.00039292305223461447, "loss": 0.0701, "theoretical_loss": 3.4565424175792137, "tokens_seen": 2016280576 }, { "epoch": 0.22, "learning_rate": 0.00039288293348311, "loss": 0.0669, "theoretical_loss": 3.456523867346786, "tokens_seen": 2016411648 }, { "epoch": 0.22, "learning_rate": 0.00039284281473160555, "loss": 0.0682, "theoretical_loss": 3.4565053186577344, "tokens_seen": 2016542720 }, { "epoch": 0.22, "learning_rate": 0.0003928026959801011, "loss": 0.0703, "theoretical_loss": 3.4564867715118313, "tokens_seen": 2016673792 }, { "epoch": 0.22, "learning_rate": 0.00039276257722859663, "loss": 0.0684, "theoretical_loss": 3.4564682259088473, "tokens_seen": 2016804864 }, { "epoch": 0.22, "objective/train/advantage_avg": -7.201087282737717e-05, "objective/train/docs_used": 735776, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3625469207763672, "objective/train/original_loss": 1.3625469207763672, "objective/train/theoretical_loss": 3.4564496818485546, "objective/train/tokens_used": 387460576, "objective/train/value_avg": -0.009307861328125, "objective/train/value_loss": 0.0003534069692250341, "objective/train/value_max": -5.692243576049805e-05, "objective/train/value_min": -0.97802734375, "objective/train/value_reward_corr": 0.7166380593340999, "objective/train/value_std": 0.0193634033203125, "objective/train/weight_avg": 1.000083327293396, "objective/train/weighted_lm_loss": 1.363316535949707, "objective/train/weights_max": 1.5864492654800415, "objective/train/weights_min": 0.36819812655448914, "theoretical_loss": 3.4564496818485546, "tokens_seen": 2016935936 }, { "epoch": 0.22, "learning_rate": 0.0003927224584770922, "loss": 0.0734, "theoretical_loss": 3.4564496818485546, "tokens_seen": 2016935936 }, { "epoch": 0.22, "learning_rate": 0.00039268233972558777, "loss": 0.067, "theoretical_loss": 3.456431139330724, "tokens_seen": 2017067008 }, { "epoch": 0.22, "learning_rate": 0.0003926422209740833, "loss": 0.067, "theoretical_loss": 3.4564125983551275, "tokens_seen": 2017198080 }, { "epoch": 0.22, "learning_rate": 0.00039260210222257885, "loss": 0.0707, "theoretical_loss": 3.456394058921536, "tokens_seen": 2017329152 }, { "epoch": 0.22, "learning_rate": 0.0003925619834710744, "loss": 0.0687, "theoretical_loss": 3.4563755210297216, "tokens_seen": 2017460224 }, { "epoch": 0.22, "learning_rate": 0.00039252186471956993, "loss": 0.0643, "theoretical_loss": 3.4563569846794557, "tokens_seen": 2017591296 }, { "epoch": 0.22, "learning_rate": 0.00039248174596806545, "loss": 0.068, "theoretical_loss": 3.4563384498705094, "tokens_seen": 2017722368 }, { "epoch": 0.22, "learning_rate": 0.000392441627216561, "loss": 0.0708, "theoretical_loss": 3.4563199166026557, "tokens_seen": 2017853440 }, { "epoch": 0.22, "learning_rate": 0.0003924015084650566, "loss": 0.0689, "theoretical_loss": 3.456301384875666, "tokens_seen": 2017984512 }, { "epoch": 0.22, "learning_rate": 0.0003923613897135521, "loss": 0.0702, "theoretical_loss": 3.456282854689311, "tokens_seen": 2018115584 }, { "epoch": 0.22, "learning_rate": 0.00039232127096204767, "loss": 0.0653, "theoretical_loss": 3.456264326043364, "tokens_seen": 2018246656 }, { "epoch": 0.22, "learning_rate": 0.00039228115221054324, "loss": 0.07, "theoretical_loss": 3.456245798937596, "tokens_seen": 2018377728 }, { "epoch": 0.22, "learning_rate": 0.00039224103345903875, "loss": 0.0742, "theoretical_loss": 3.4562272733717796, "tokens_seen": 2018508800 }, { "epoch": 0.22, "learning_rate": 0.0003922009147075343, "loss": 0.0725, "theoretical_loss": 3.456208749345686, "tokens_seen": 2018639872 }, { "epoch": 0.22, "learning_rate": 0.0003921607959560299, "loss": 0.0689, "theoretical_loss": 3.4561902268590883, "tokens_seen": 2018770944 }, { "epoch": 0.22, "learning_rate": 0.0003921206772045254, "loss": 0.0701, "theoretical_loss": 3.456171705911758, "tokens_seen": 2018902016 }, { "epoch": 0.22, "learning_rate": 0.0003920805584530209, "loss": 0.0697, "theoretical_loss": 3.4561531865034665, "tokens_seen": 2019033088 }, { "epoch": 0.22, "learning_rate": 0.0003920404397015165, "loss": 0.0677, "theoretical_loss": 3.4561346686339878, "tokens_seen": 2019164160 }, { "epoch": 0.22, "learning_rate": 0.00039200032095001205, "loss": 0.0669, "theoretical_loss": 3.4561161523030925, "tokens_seen": 2019295232 }, { "epoch": 0.22, "learning_rate": 0.00039196020219850757, "loss": 0.0728, "theoretical_loss": 3.4560976375105543, "tokens_seen": 2019426304 }, { "epoch": 0.22, "learning_rate": 0.00039192008344700313, "loss": 0.0667, "theoretical_loss": 3.456079124256145, "tokens_seen": 2019557376 }, { "epoch": 0.22, "learning_rate": 0.0003918799646954987, "loss": 0.0669, "theoretical_loss": 3.4560606125396363, "tokens_seen": 2019688448 }, { "epoch": 0.22, "learning_rate": 0.00039183984594399427, "loss": 0.0684, "theoretical_loss": 3.4560421023608012, "tokens_seen": 2019819520 }, { "epoch": 0.22, "learning_rate": 0.0003917997271924898, "loss": 0.0726, "theoretical_loss": 3.4560235937194124, "tokens_seen": 2019950592 }, { "epoch": 0.22, "learning_rate": 0.00039175960844098535, "loss": 0.0699, "theoretical_loss": 3.4560050866152423, "tokens_seen": 2020081664 }, { "epoch": 0.22, "objective/train/advantage_avg": -0.00011535571684362367, "objective/train/docs_used": 736878, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2965919971466064, "objective/train/original_loss": 1.296591877937317, "objective/train/theoretical_loss": 3.455986581048064, "objective/train/tokens_used": 390737376, "objective/train/value_avg": -0.00823211669921875, "objective/train/value_loss": 0.00020732979464810342, "objective/train/value_max": -3.147125244140625e-05, "objective/train/value_min": -0.309326171875, "objective/train/value_reward_corr": 0.7815543066597761, "objective/train/value_std": 0.0174407958984375, "objective/train/weight_avg": 0.9999861717224121, "objective/train/weighted_lm_loss": 1.2955617904663086, "objective/train/weights_max": 1.138555884361267, "objective/train/weights_min": 0.6131280660629272, "theoretical_loss": 3.455986581048064, "tokens_seen": 2020212736 }, { "epoch": 0.22, "learning_rate": 0.00039171948968948087, "loss": 0.0664, "theoretical_loss": 3.455986581048064, "tokens_seen": 2020212736 }, { "epoch": 0.22, "learning_rate": 0.0003916793709379764, "loss": 0.0673, "theoretical_loss": 3.455968077017649, "tokens_seen": 2020343808 }, { "epoch": 0.22, "learning_rate": 0.00039163925218647195, "loss": 0.0692, "theoretical_loss": 3.4559495745237707, "tokens_seen": 2020474880 }, { "epoch": 0.22, "learning_rate": 0.0003915991334349675, "loss": 0.0694, "theoretical_loss": 3.455931073566202, "tokens_seen": 2020605952 }, { "epoch": 0.22, "learning_rate": 0.00039155901468346303, "loss": 0.0687, "theoretical_loss": 3.4559125741447154, "tokens_seen": 2020737024 }, { "epoch": 0.22, "learning_rate": 0.0003915188959319586, "loss": 0.0731, "theoretical_loss": 3.4558940762590837, "tokens_seen": 2020868096 }, { "epoch": 0.22, "learning_rate": 0.00039147877718045417, "loss": 0.0715, "theoretical_loss": 3.4558755799090797, "tokens_seen": 2020999168 }, { "epoch": 0.22, "learning_rate": 0.00039143865842894974, "loss": 0.0656, "theoretical_loss": 3.455857085094477, "tokens_seen": 2021130240 }, { "epoch": 0.23, "learning_rate": 0.00039139853967744525, "loss": 0.0725, "theoretical_loss": 3.455838591815048, "tokens_seen": 2021261312 }, { "epoch": 0.23, "learning_rate": 0.0003913584209259408, "loss": 0.0674, "theoretical_loss": 3.4558201000705653, "tokens_seen": 2021392384 }, { "epoch": 0.23, "learning_rate": 0.00039131830217443633, "loss": 0.0679, "theoretical_loss": 3.4558016098608033, "tokens_seen": 2021523456 }, { "epoch": 0.23, "learning_rate": 0.00039127818342293185, "loss": 0.0712, "theoretical_loss": 3.455783121185534, "tokens_seen": 2021654528 }, { "epoch": 0.23, "learning_rate": 0.0003912380646714274, "loss": 0.0686, "theoretical_loss": 3.455764634044531, "tokens_seen": 2021785600 }, { "epoch": 0.23, "learning_rate": 0.000391197945919923, "loss": 0.0686, "theoretical_loss": 3.4557461484375676, "tokens_seen": 2021916672 }, { "epoch": 0.23, "learning_rate": 0.0003911578271684185, "loss": 0.0694, "theoretical_loss": 3.4557276643644164, "tokens_seen": 2022047744 }, { "epoch": 0.23, "learning_rate": 0.00039111770841691407, "loss": 0.0672, "theoretical_loss": 3.4557091818248518, "tokens_seen": 2022178816 }, { "epoch": 0.23, "learning_rate": 0.00039107758966540964, "loss": 0.072, "theoretical_loss": 3.4556907008186464, "tokens_seen": 2022309888 }, { "epoch": 0.23, "learning_rate": 0.0003910374709139052, "loss": 0.0674, "theoretical_loss": 3.4556722213455737, "tokens_seen": 2022440960 }, { "epoch": 0.23, "learning_rate": 0.0003909973521624007, "loss": 0.0677, "theoretical_loss": 3.4556537434054073, "tokens_seen": 2022572032 }, { "epoch": 0.23, "learning_rate": 0.0003909572334108963, "loss": 0.0679, "theoretical_loss": 3.455635266997921, "tokens_seen": 2022703104 }, { "epoch": 0.23, "learning_rate": 0.0003909171146593918, "loss": 0.0689, "theoretical_loss": 3.455616792122888, "tokens_seen": 2022834176 }, { "epoch": 0.23, "learning_rate": 0.0003908769959078873, "loss": 0.0707, "theoretical_loss": 3.4555983187800825, "tokens_seen": 2022965248 }, { "epoch": 0.23, "learning_rate": 0.0003908368771563829, "loss": 0.0704, "theoretical_loss": 3.455579846969277, "tokens_seen": 2023096320 }, { "epoch": 0.23, "learning_rate": 0.00039079675840487845, "loss": 0.0652, "theoretical_loss": 3.455561376690246, "tokens_seen": 2023227392 }, { "epoch": 0.23, "learning_rate": 0.00039075663965337397, "loss": 0.069, "theoretical_loss": 3.455542907942763, "tokens_seen": 2023358464 }, { "epoch": 0.23, "objective/train/advantage_avg": -0.0004598307714331895, "objective/train/docs_used": 738179, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2018293142318726, "objective/train/original_loss": 1.2018293142318726, "objective/train/theoretical_loss": 3.4555244407266024, "objective/train/tokens_used": 394014176, "objective/train/value_avg": -0.00861358642578125, "objective/train/value_loss": 0.0002880470419768244, "objective/train/value_max": -6.204843521118164e-05, "objective/train/value_min": -0.861328125, "objective/train/value_reward_corr": 0.7286260290471078, "objective/train/value_std": 0.0179901123046875, "objective/train/weight_avg": 0.9996785521507263, "objective/train/weighted_lm_loss": 1.2015498876571655, "objective/train/weights_max": 2.2311220169067383, "objective/train/weights_min": 0.42799967527389526, "theoretical_loss": 3.4555244407266024, "tokens_seen": 2023489536 }, { "epoch": 0.23, "learning_rate": 0.00039071652090186954, "loss": 0.0659, "theoretical_loss": 3.4555244407266024, "tokens_seen": 2023489536 }, { "epoch": 0.23, "learning_rate": 0.0003906764021503651, "loss": 0.0659, "theoretical_loss": 3.4555059750415373, "tokens_seen": 2023620608 }, { "epoch": 0.23, "learning_rate": 0.00039063628339886067, "loss": 0.0686, "theoretical_loss": 3.455487510887342, "tokens_seen": 2023751680 }, { "epoch": 0.23, "learning_rate": 0.0003905961646473562, "loss": 0.0727, "theoretical_loss": 3.45546904826379, "tokens_seen": 2023882752 }, { "epoch": 0.23, "learning_rate": 0.00039055604589585175, "loss": 0.0679, "theoretical_loss": 3.455450587170656, "tokens_seen": 2024013824 }, { "epoch": 0.23, "learning_rate": 0.00039051592714434727, "loss": 0.072, "theoretical_loss": 3.455432127607714, "tokens_seen": 2024144896 }, { "epoch": 0.23, "learning_rate": 0.0003904758083928428, "loss": 0.0691, "theoretical_loss": 3.455413669574737, "tokens_seen": 2024275968 }, { "epoch": 0.23, "learning_rate": 0.00039043568964133835, "loss": 0.0655, "theoretical_loss": 3.4553952130715, "tokens_seen": 2024407040 }, { "epoch": 0.23, "learning_rate": 0.0003903955708898339, "loss": 0.07, "theoretical_loss": 3.4553767580977777, "tokens_seen": 2024538112 }, { "epoch": 0.23, "learning_rate": 0.00039035545213832943, "loss": 0.0672, "theoretical_loss": 3.455358304653344, "tokens_seen": 2024669184 }, { "epoch": 0.23, "learning_rate": 0.000390315333386825, "loss": 0.0721, "theoretical_loss": 3.4553398527379717, "tokens_seen": 2024800256 }, { "epoch": 0.23, "learning_rate": 0.00039027521463532057, "loss": 0.0694, "theoretical_loss": 3.455321402351437, "tokens_seen": 2024931328 }, { "epoch": 0.23, "learning_rate": 0.00039023509588381614, "loss": 0.0681, "theoretical_loss": 3.4553029534935136, "tokens_seen": 2025062400 }, { "epoch": 0.23, "learning_rate": 0.00039019497713231165, "loss": 0.0671, "theoretical_loss": 3.4552845061639763, "tokens_seen": 2025193472 }, { "epoch": 0.23, "learning_rate": 0.0003901548583808072, "loss": 0.0665, "theoretical_loss": 3.4552660603625984, "tokens_seen": 2025324544 }, { "epoch": 0.23, "learning_rate": 0.00039011473962930274, "loss": 0.0674, "theoretical_loss": 3.455247616089156, "tokens_seen": 2025455616 }, { "epoch": 0.23, "learning_rate": 0.00039007462087779825, "loss": 0.0702, "theoretical_loss": 3.455229173343423, "tokens_seen": 2025586688 }, { "epoch": 0.23, "learning_rate": 0.0003900345021262938, "loss": 0.0678, "theoretical_loss": 3.4552107321251735, "tokens_seen": 2025717760 }, { "epoch": 0.23, "learning_rate": 0.0003899943833747894, "loss": 0.0703, "theoretical_loss": 3.455192292434183, "tokens_seen": 2025848832 }, { "epoch": 0.23, "learning_rate": 0.0003899542646232849, "loss": 0.0673, "theoretical_loss": 3.4551738542702255, "tokens_seen": 2025979904 }, { "epoch": 0.23, "learning_rate": 0.00038991414587178047, "loss": 0.0721, "theoretical_loss": 3.455155417633076, "tokens_seen": 2026110976 }, { "epoch": 0.23, "learning_rate": 0.00038987402712027604, "loss": 0.0648, "theoretical_loss": 3.455136982522509, "tokens_seen": 2026242048 }, { "epoch": 0.23, "learning_rate": 0.0003898339083687716, "loss": 0.0689, "theoretical_loss": 3.4551185489383007, "tokens_seen": 2026373120 }, { "epoch": 0.23, "learning_rate": 0.0003897937896172671, "loss": 0.0689, "theoretical_loss": 3.455100116880225, "tokens_seen": 2026504192 }, { "epoch": 0.23, "learning_rate": 0.0003897536708657627, "loss": 0.0669, "theoretical_loss": 3.4550816863480565, "tokens_seen": 2026635264 }, { "epoch": 0.23, "objective/train/advantage_avg": 4.118962897337042e-05, "objective/train/docs_used": 739363, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3518918752670288, "objective/train/original_loss": 1.3518917560577393, "objective/train/theoretical_loss": 3.455063257341571, "objective/train/tokens_used": 397290976, "objective/train/value_avg": -0.005977630615234375, "objective/train/value_loss": 0.00046748219756409526, "objective/train/value_max": -3.0219554901123047e-05, "objective/train/value_min": -0.51904296875, "objective/train/value_reward_corr": 0.5898396660692646, "objective/train/value_std": 0.01372528076171875, "objective/train/weight_avg": 1.0002379417419434, "objective/train/weighted_lm_loss": 1.3517839908599854, "objective/train/weights_max": 1.2407280206680298, "objective/train/weights_min": 0.3684791922569275, "theoretical_loss": 3.455063257341571, "tokens_seen": 2026766336 }, { "epoch": 0.23, "learning_rate": 0.0003897135521142582, "loss": 0.0663, "theoretical_loss": 3.455063257341571, "tokens_seen": 2026766336 }, { "epoch": 0.23, "learning_rate": 0.0003896734333627537, "loss": 0.0714, "theoretical_loss": 3.455044829860543, "tokens_seen": 2026897408 }, { "epoch": 0.23, "learning_rate": 0.0003896333146112493, "loss": 0.0692, "theoretical_loss": 3.455026403904747, "tokens_seen": 2027028480 }, { "epoch": 0.23, "learning_rate": 0.00038959319585974485, "loss": 0.0715, "theoretical_loss": 3.45500797947396, "tokens_seen": 2027159552 }, { "epoch": 0.23, "learning_rate": 0.00038955307710824037, "loss": 0.0703, "theoretical_loss": 3.4549895565679556, "tokens_seen": 2027290624 }, { "epoch": 0.23, "learning_rate": 0.00038951295835673594, "loss": 0.0693, "theoretical_loss": 3.45497113518651, "tokens_seen": 2027421696 }, { "epoch": 0.23, "learning_rate": 0.0003894728396052315, "loss": 0.0686, "theoretical_loss": 3.4549527153293975, "tokens_seen": 2027552768 }, { "epoch": 0.23, "learning_rate": 0.0003894327208537271, "loss": 0.0718, "theoretical_loss": 3.4549342969963943, "tokens_seen": 2027683840 }, { "epoch": 0.23, "learning_rate": 0.0003893926021022226, "loss": 0.0685, "theoretical_loss": 3.4549158801872757, "tokens_seen": 2027814912 }, { "epoch": 0.23, "learning_rate": 0.00038935248335071816, "loss": 0.0669, "theoretical_loss": 3.4548974649018165, "tokens_seen": 2027945984 }, { "epoch": 0.23, "learning_rate": 0.00038931236459921367, "loss": 0.0718, "theoretical_loss": 3.4548790511397924, "tokens_seen": 2028077056 }, { "epoch": 0.23, "learning_rate": 0.0003892722458477092, "loss": 0.0691, "theoretical_loss": 3.4548606389009793, "tokens_seen": 2028208128 }, { "epoch": 0.23, "learning_rate": 0.00038923212709620475, "loss": 0.0686, "theoretical_loss": 3.4548422281851527, "tokens_seen": 2028339200 }, { "epoch": 0.23, "learning_rate": 0.0003891920083447003, "loss": 0.0691, "theoretical_loss": 3.4548238189920877, "tokens_seen": 2028470272 }, { "epoch": 0.23, "learning_rate": 0.0003891518895931959, "loss": 0.0716, "theoretical_loss": 3.4548054113215607, "tokens_seen": 2028601344 }, { "epoch": 0.23, "learning_rate": 0.0003891117708416914, "loss": 0.07, "theoretical_loss": 3.4547870051733467, "tokens_seen": 2028732416 }, { "epoch": 0.23, "learning_rate": 0.00038907165209018697, "loss": 0.0672, "theoretical_loss": 3.4547686005472222, "tokens_seen": 2028863488 }, { "epoch": 0.23, "learning_rate": 0.00038903153333868254, "loss": 0.0663, "theoretical_loss": 3.4547501974429626, "tokens_seen": 2028994560 }, { "epoch": 0.23, "learning_rate": 0.00038899141458717805, "loss": 0.0678, "theoretical_loss": 3.4547317958603436, "tokens_seen": 2029125632 }, { "epoch": 0.23, "learning_rate": 0.0003889512958356736, "loss": 0.0678, "theoretical_loss": 3.454713395799142, "tokens_seen": 2029256704 }, { "epoch": 0.23, "learning_rate": 0.00038891117708416914, "loss": 0.0672, "theoretical_loss": 3.454694997259132, "tokens_seen": 2029387776 }, { "epoch": 0.23, "learning_rate": 0.00038887105833266465, "loss": 0.0688, "theoretical_loss": 3.4546766002400915, "tokens_seen": 2029518848 }, { "epoch": 0.23, "learning_rate": 0.0003888309395811602, "loss": 0.0705, "theoretical_loss": 3.4546582047417953, "tokens_seen": 2029649920 }, { "epoch": 0.23, "learning_rate": 0.0003887908208296558, "loss": 0.0713, "theoretical_loss": 3.4546398107640197, "tokens_seen": 2029780992 }, { "epoch": 0.23, "learning_rate": 0.00038875070207815136, "loss": 0.0661, "theoretical_loss": 3.454621418306542, "tokens_seen": 2029912064 }, { "epoch": 0.23, "objective/train/advantage_avg": 0.0004923826199956238, "objective/train/docs_used": 740596, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4202457666397095, "objective/train/original_loss": 1.4202457666397095, "objective/train/theoretical_loss": 3.4546030273691364, "objective/train/tokens_used": 400567776, "objective/train/value_avg": -0.006244659423828125, "objective/train/value_loss": 0.00020335345470812172, "objective/train/value_max": -3.647804260253906e-05, "objective/train/value_min": -0.2391357421875, "objective/train/value_reward_corr": 0.5976162539693206, "objective/train/value_std": 0.011016845703125, "objective/train/weight_avg": 1.0005829334259033, "objective/train/weighted_lm_loss": 1.420710563659668, "objective/train/weights_max": 1.1978663206100464, "objective/train/weights_min": 0.375049352645874, "theoretical_loss": 3.4546030273691364, "tokens_seen": 2030043136 }, { "epoch": 0.23, "learning_rate": 0.00038871058332664687, "loss": 0.0685, "theoretical_loss": 3.4546030273691364, "tokens_seen": 2030043136 }, { "epoch": 0.23, "learning_rate": 0.00038867046457514244, "loss": 0.0688, "theoretical_loss": 3.4545846379515806, "tokens_seen": 2030174208 }, { "epoch": 0.23, "learning_rate": 0.000388630345823638, "loss": 0.0697, "theoretical_loss": 3.4545662500536505, "tokens_seen": 2030305280 }, { "epoch": 0.23, "learning_rate": 0.0003885902270721335, "loss": 0.0679, "theoretical_loss": 3.4545478636751223, "tokens_seen": 2030436352 }, { "epoch": 0.23, "learning_rate": 0.0003885501083206291, "loss": 0.0687, "theoretical_loss": 3.4545294788157728, "tokens_seen": 2030567424 }, { "epoch": 0.23, "learning_rate": 0.00038850998956912466, "loss": 0.0669, "theoretical_loss": 3.4545110954753775, "tokens_seen": 2030698496 }, { "epoch": 0.23, "learning_rate": 0.0003884698708176201, "loss": 0.0691, "theoretical_loss": 3.454492713653714, "tokens_seen": 2030829568 }, { "epoch": 0.23, "learning_rate": 0.0003884297520661157, "loss": 0.0686, "theoretical_loss": 3.454474333350558, "tokens_seen": 2030960640 }, { "epoch": 0.23, "learning_rate": 0.00038838963331461125, "loss": 0.0692, "theoretical_loss": 3.4544559545656863, "tokens_seen": 2031091712 }, { "epoch": 0.23, "learning_rate": 0.0003883495145631068, "loss": 0.0721, "theoretical_loss": 3.454437577298876, "tokens_seen": 2031222784 }, { "epoch": 0.23, "learning_rate": 0.00038830939581160234, "loss": 0.0712, "theoretical_loss": 3.4544192015499027, "tokens_seen": 2031353856 }, { "epoch": 0.23, "learning_rate": 0.0003882692770600979, "loss": 0.0664, "theoretical_loss": 3.4544008273185445, "tokens_seen": 2031484928 }, { "epoch": 0.23, "learning_rate": 0.0003882291583085935, "loss": 0.072, "theoretical_loss": 3.454382454604577, "tokens_seen": 2031616000 }, { "epoch": 0.23, "learning_rate": 0.000388189039557089, "loss": 0.0681, "theoretical_loss": 3.4543640834077776, "tokens_seen": 2031747072 }, { "epoch": 0.23, "learning_rate": 0.00038814892080558456, "loss": 0.0699, "theoretical_loss": 3.454345713727923, "tokens_seen": 2031878144 }, { "epoch": 0.23, "learning_rate": 0.0003881088020540801, "loss": 0.0694, "theoretical_loss": 3.45432734556479, "tokens_seen": 2032009216 }, { "epoch": 0.23, "learning_rate": 0.0003880686833025756, "loss": 0.0701, "theoretical_loss": 3.4543089789181556, "tokens_seen": 2032140288 }, { "epoch": 0.23, "learning_rate": 0.00038802856455107115, "loss": 0.0689, "theoretical_loss": 3.454290613787797, "tokens_seen": 2032271360 }, { "epoch": 0.23, "learning_rate": 0.0003879884457995667, "loss": 0.0644, "theoretical_loss": 3.4542722501734904, "tokens_seen": 2032402432 }, { "epoch": 0.23, "learning_rate": 0.0003879483270480623, "loss": 0.0684, "theoretical_loss": 3.454253888075014, "tokens_seen": 2032533504 }, { "epoch": 0.23, "learning_rate": 0.0003879082082965578, "loss": 0.0683, "theoretical_loss": 3.454235527492145, "tokens_seen": 2032664576 }, { "epoch": 0.23, "learning_rate": 0.00038786808954505337, "loss": 0.0654, "theoretical_loss": 3.4542171684246594, "tokens_seen": 2032795648 }, { "epoch": 0.23, "learning_rate": 0.00038782797079354894, "loss": 0.0665, "theoretical_loss": 3.4541988108723354, "tokens_seen": 2032926720 }, { "epoch": 0.23, "learning_rate": 0.00038778785204204446, "loss": 0.0683, "theoretical_loss": 3.45418045483495, "tokens_seen": 2033057792 }, { "epoch": 0.23, "learning_rate": 0.00038774773329054, "loss": 0.0713, "theoretical_loss": 3.4541621003122804, "tokens_seen": 2033188864 }, { "epoch": 0.23, "objective/train/advantage_avg": 0.0001826168445404619, "objective/train/docs_used": 741743, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4240305423736572, "objective/train/original_loss": 1.4240307807922363, "objective/train/theoretical_loss": 3.454143747304104, "objective/train/tokens_used": 403844576, "objective/train/value_avg": -0.006572723388671875, "objective/train/value_loss": 0.0001325407502008602, "objective/train/value_max": -7.253885269165039e-05, "objective/train/value_min": -0.2454833984375, "objective/train/value_reward_corr": 0.7629041734820269, "objective/train/value_std": 0.01139068603515625, "objective/train/weight_avg": 1.0002474784851074, "objective/train/weighted_lm_loss": 1.4241974353790283, "objective/train/weights_max": 1.097815990447998, "objective/train/weights_min": 0.6203081011772156, "theoretical_loss": 3.454143747304104, "tokens_seen": 2033319936 }, { "epoch": 0.23, "learning_rate": 0.0003877076145390356, "loss": 0.0708, "theoretical_loss": 3.454143747304104, "tokens_seen": 2033319936 }, { "epoch": 0.23, "learning_rate": 0.00038766749578753105, "loss": 0.069, "theoretical_loss": 3.4541253958101983, "tokens_seen": 2033451008 }, { "epoch": 0.23, "learning_rate": 0.0003876273770360266, "loss": 0.0668, "theoretical_loss": 3.454107045830341, "tokens_seen": 2033582080 }, { "epoch": 0.23, "learning_rate": 0.0003875872582845222, "loss": 0.0711, "theoretical_loss": 3.454088697364309, "tokens_seen": 2033713152 }, { "epoch": 0.23, "learning_rate": 0.00038754713953301776, "loss": 0.0664, "theoretical_loss": 3.4540703504118806, "tokens_seen": 2033844224 }, { "epoch": 0.23, "learning_rate": 0.00038750702078151327, "loss": 0.0656, "theoretical_loss": 3.454052004972833, "tokens_seen": 2033975296 }, { "epoch": 0.23, "learning_rate": 0.00038746690203000884, "loss": 0.0707, "theoretical_loss": 3.454033661046944, "tokens_seen": 2034106368 }, { "epoch": 0.23, "learning_rate": 0.0003874267832785044, "loss": 0.0709, "theoretical_loss": 3.4540153186339912, "tokens_seen": 2034237440 }, { "epoch": 0.23, "learning_rate": 0.0003873866645269999, "loss": 0.0675, "theoretical_loss": 3.453996977733752, "tokens_seen": 2034368512 }, { "epoch": 0.23, "learning_rate": 0.0003873465457754955, "loss": 0.0679, "theoretical_loss": 3.4539786383460047, "tokens_seen": 2034499584 }, { "epoch": 0.23, "learning_rate": 0.00038730642702399106, "loss": 0.0713, "theoretical_loss": 3.453960300470527, "tokens_seen": 2034630656 }, { "epoch": 0.23, "learning_rate": 0.0003872663082724865, "loss": 0.0677, "theoretical_loss": 3.4539419641070968, "tokens_seen": 2034761728 }, { "epoch": 0.23, "learning_rate": 0.0003872261895209821, "loss": 0.0677, "theoretical_loss": 3.453923629255492, "tokens_seen": 2034892800 }, { "epoch": 0.23, "learning_rate": 0.00038718607076947766, "loss": 0.0688, "theoretical_loss": 3.4539052959154906, "tokens_seen": 2035023872 }, { "epoch": 0.23, "learning_rate": 0.0003871459520179732, "loss": 0.0656, "theoretical_loss": 3.4538869640868706, "tokens_seen": 2035154944 }, { "epoch": 0.23, "learning_rate": 0.00038710583326646874, "loss": 0.0707, "theoretical_loss": 3.4538686337694102, "tokens_seen": 2035286016 }, { "epoch": 0.23, "learning_rate": 0.0003870657145149643, "loss": 0.0672, "theoretical_loss": 3.453850304962887, "tokens_seen": 2035417088 }, { "epoch": 0.23, "learning_rate": 0.0003870255957634599, "loss": 0.0686, "theoretical_loss": 3.45383197766708, "tokens_seen": 2035548160 }, { "epoch": 0.23, "learning_rate": 0.0003869854770119554, "loss": 0.0712, "theoretical_loss": 3.453813651881767, "tokens_seen": 2035679232 }, { "epoch": 0.23, "learning_rate": 0.00038694535826045096, "loss": 0.0685, "theoretical_loss": 3.453795327606726, "tokens_seen": 2035810304 }, { "epoch": 0.23, "learning_rate": 0.0003869052395089465, "loss": 0.0699, "theoretical_loss": 3.453777004841736, "tokens_seen": 2035941376 }, { "epoch": 0.23, "learning_rate": 0.000386865120757442, "loss": 0.0684, "theoretical_loss": 3.4537586835865746, "tokens_seen": 2036072448 }, { "epoch": 0.23, "learning_rate": 0.00038682500200593755, "loss": 0.0689, "theoretical_loss": 3.4537403638410207, "tokens_seen": 2036203520 }, { "epoch": 0.23, "learning_rate": 0.0003867848832544331, "loss": 0.0685, "theoretical_loss": 3.4537220456048523, "tokens_seen": 2036334592 }, { "epoch": 0.23, "learning_rate": 0.0003867447645029287, "loss": 0.0718, "theoretical_loss": 3.4537037288778487, "tokens_seen": 2036465664 }, { "epoch": 0.23, "objective/train/advantage_avg": 0.0010665758745744824, "objective/train/docs_used": 742794, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.451891541481018, "objective/train/original_loss": 1.4518916606903076, "objective/train/theoretical_loss": 3.453685413659788, "objective/train/tokens_used": 407121376, "objective/train/value_avg": -0.00504302978515625, "objective/train/value_loss": 9.951597166946158e-05, "objective/train/value_max": -4.1961669921875e-05, "objective/train/value_min": -0.27294921875, "objective/train/value_reward_corr": 0.648999231433149, "objective/train/value_std": 0.00887298583984375, "objective/train/weight_avg": 1.0011121034622192, "objective/train/weighted_lm_loss": 1.4536211490631104, "objective/train/weights_max": 1.2527363300323486, "objective/train/weights_min": 0.3691045045852661, "theoretical_loss": 3.453685413659788, "tokens_seen": 2036596736 }, { "epoch": 0.23, "learning_rate": 0.0003867046457514242, "loss": 0.0672, "theoretical_loss": 3.453685413659788, "tokens_seen": 2036596736 }, { "epoch": 0.23, "learning_rate": 0.0003866645269999198, "loss": 0.0665, "theoretical_loss": 3.453667099950448, "tokens_seen": 2036727808 }, { "epoch": 0.23, "learning_rate": 0.00038662440824841534, "loss": 0.0701, "theoretical_loss": 3.4536487877496085, "tokens_seen": 2036858880 }, { "epoch": 0.23, "learning_rate": 0.00038658428949691086, "loss": 0.0668, "theoretical_loss": 3.4536304770570476, "tokens_seen": 2036989952 }, { "epoch": 0.23, "learning_rate": 0.0003865441707454064, "loss": 0.0717, "theoretical_loss": 3.4536121678725444, "tokens_seen": 2037121024 }, { "epoch": 0.23, "learning_rate": 0.000386504051993902, "loss": 0.07, "theoretical_loss": 3.4535938601958773, "tokens_seen": 2037252096 }, { "epoch": 0.23, "learning_rate": 0.0003864639332423975, "loss": 0.069, "theoretical_loss": 3.453575554026825, "tokens_seen": 2037383168 }, { "epoch": 0.23, "learning_rate": 0.000386423814490893, "loss": 0.0684, "theoretical_loss": 3.4535572493651676, "tokens_seen": 2037514240 }, { "epoch": 0.23, "learning_rate": 0.0003863836957393886, "loss": 0.0686, "theoretical_loss": 3.4535389462106822, "tokens_seen": 2037645312 }, { "epoch": 0.24, "learning_rate": 0.00038634357698788416, "loss": 0.0705, "theoretical_loss": 3.453520644563149, "tokens_seen": 2037776384 }, { "epoch": 0.24, "learning_rate": 0.00038630345823637967, "loss": 0.0673, "theoretical_loss": 3.4535023444223465, "tokens_seen": 2037907456 }, { "epoch": 0.24, "learning_rate": 0.00038626333948487524, "loss": 0.0701, "theoretical_loss": 3.4534840457880542, "tokens_seen": 2038038528 }, { "epoch": 0.24, "learning_rate": 0.0003862232207333708, "loss": 0.0667, "theoretical_loss": 3.453465748660051, "tokens_seen": 2038169600 }, { "epoch": 0.24, "learning_rate": 0.0003861831019818663, "loss": 0.0685, "theoretical_loss": 3.453447453038115, "tokens_seen": 2038300672 }, { "epoch": 0.24, "learning_rate": 0.0003861429832303619, "loss": 0.0682, "theoretical_loss": 3.4534291589220274, "tokens_seen": 2038431744 }, { "epoch": 0.24, "learning_rate": 0.00038610286447885746, "loss": 0.0708, "theoretical_loss": 3.453410866311566, "tokens_seen": 2038562816 }, { "epoch": 0.24, "learning_rate": 0.000386062745727353, "loss": 0.0693, "theoretical_loss": 3.4533925752065104, "tokens_seen": 2038693888 }, { "epoch": 0.24, "learning_rate": 0.0003860226269758485, "loss": 0.0695, "theoretical_loss": 3.4533742856066403, "tokens_seen": 2038824960 }, { "epoch": 0.24, "learning_rate": 0.00038598250822434406, "loss": 0.0673, "theoretical_loss": 3.4533559975117347, "tokens_seen": 2038956032 }, { "epoch": 0.24, "learning_rate": 0.0003859423894728396, "loss": 0.0681, "theoretical_loss": 3.453337710921573, "tokens_seen": 2039087104 }, { "epoch": 0.24, "learning_rate": 0.00038590227072133514, "loss": 0.0691, "theoretical_loss": 3.453319425835935, "tokens_seen": 2039218176 }, { "epoch": 0.24, "learning_rate": 0.0003858621519698307, "loss": 0.0681, "theoretical_loss": 3.4533011422546, "tokens_seen": 2039349248 }, { "epoch": 0.24, "learning_rate": 0.0003858220332183263, "loss": 0.0715, "theoretical_loss": 3.4532828601773478, "tokens_seen": 2039480320 }, { "epoch": 0.24, "learning_rate": 0.0003857819144668218, "loss": 0.0687, "theoretical_loss": 3.4532645796039576, "tokens_seen": 2039611392 }, { "epoch": 0.24, "learning_rate": 0.00038574179571531736, "loss": 0.0661, "theoretical_loss": 3.453246300534209, "tokens_seen": 2039742464 }, { "epoch": 0.24, "objective/train/advantage_avg": 0.00018867237668018788, "objective/train/docs_used": 744036, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4881644248962402, "objective/train/original_loss": 1.4881644248962402, "objective/train/theoretical_loss": 3.453228022967883, "objective/train/tokens_used": 410398176, "objective/train/value_avg": -0.005931854248046875, "objective/train/value_loss": 0.00012204200902488083, "objective/train/value_max": -3.510713577270508e-05, "objective/train/value_min": -0.181640625, "objective/train/value_reward_corr": 0.6900020011022656, "objective/train/value_std": 0.009490966796875, "objective/train/weight_avg": 1.000244379043579, "objective/train/weighted_lm_loss": 1.4889687299728394, "objective/train/weights_max": 1.1562033891677856, "objective/train/weights_min": 0.3757195472717285, "theoretical_loss": 3.453228022967883, "tokens_seen": 2039873536 }, { "epoch": 0.24, "learning_rate": 0.00038570167696381293, "loss": 0.0694, "theoretical_loss": 3.453228022967883, "tokens_seen": 2039873536 }, { "epoch": 0.24, "learning_rate": 0.00038566155821230844, "loss": 0.07, "theoretical_loss": 3.4532097469047573, "tokens_seen": 2040004608 }, { "epoch": 0.24, "learning_rate": 0.00038562143946080396, "loss": 0.0678, "theoretical_loss": 3.4531914723446135, "tokens_seen": 2040135680 }, { "epoch": 0.24, "learning_rate": 0.0003855813207092995, "loss": 0.0698, "theoretical_loss": 3.4531731992872303, "tokens_seen": 2040266752 }, { "epoch": 0.24, "learning_rate": 0.0003855412019577951, "loss": 0.0712, "theoretical_loss": 3.4531549277323883, "tokens_seen": 2040397824 }, { "epoch": 0.24, "learning_rate": 0.0003855010832062906, "loss": 0.0635, "theoretical_loss": 3.4531366576798668, "tokens_seen": 2040528896 }, { "epoch": 0.24, "learning_rate": 0.0003854609644547862, "loss": 0.0686, "theoretical_loss": 3.4531183891294464, "tokens_seen": 2040659968 }, { "epoch": 0.24, "learning_rate": 0.00038542084570328174, "loss": 0.0664, "theoretical_loss": 3.4531001220809068, "tokens_seen": 2040791040 }, { "epoch": 0.24, "learning_rate": 0.00038538072695177726, "loss": 0.0686, "theoretical_loss": 3.453081856534028, "tokens_seen": 2040922112 }, { "epoch": 0.24, "learning_rate": 0.0003853406082002728, "loss": 0.0677, "theoretical_loss": 3.453063592488591, "tokens_seen": 2041053184 }, { "epoch": 0.24, "learning_rate": 0.0003853004894487684, "loss": 0.073, "theoretical_loss": 3.4530453299443744, "tokens_seen": 2041184256 }, { "epoch": 0.24, "learning_rate": 0.0003852603706972639, "loss": 0.0697, "theoretical_loss": 3.4530270689011595, "tokens_seen": 2041315328 }, { "epoch": 0.24, "learning_rate": 0.0003852202519457594, "loss": 0.0655, "theoretical_loss": 3.4530088093587263, "tokens_seen": 2041446400 }, { "epoch": 0.24, "learning_rate": 0.000385180133194255, "loss": 0.0673, "theoretical_loss": 3.4529905513168555, "tokens_seen": 2041577472 }, { "epoch": 0.24, "learning_rate": 0.00038514001444275056, "loss": 0.0692, "theoretical_loss": 3.4529722947753267, "tokens_seen": 2041708544 }, { "epoch": 0.24, "learning_rate": 0.0003850998956912461, "loss": 0.0689, "theoretical_loss": 3.452954039733921, "tokens_seen": 2041839616 }, { "epoch": 0.24, "learning_rate": 0.00038505977693974164, "loss": 0.0694, "theoretical_loss": 3.452935786192419, "tokens_seen": 2041970688 }, { "epoch": 0.24, "learning_rate": 0.0003850196581882372, "loss": 0.068, "theoretical_loss": 3.4529175341505995, "tokens_seen": 2042101760 }, { "epoch": 0.24, "learning_rate": 0.0003849795394367327, "loss": 0.0711, "theoretical_loss": 3.4528992836082453, "tokens_seen": 2042232832 }, { "epoch": 0.24, "learning_rate": 0.0003849394206852283, "loss": 0.0673, "theoretical_loss": 3.4528810345651357, "tokens_seen": 2042363904 }, { "epoch": 0.24, "learning_rate": 0.00038489930193372386, "loss": 0.0702, "theoretical_loss": 3.4528627870210515, "tokens_seen": 2042494976 }, { "epoch": 0.24, "learning_rate": 0.0003848591831822194, "loss": 0.0683, "theoretical_loss": 3.4528445409757738, "tokens_seen": 2042626048 }, { "epoch": 0.24, "learning_rate": 0.0003848190644307149, "loss": 0.0684, "theoretical_loss": 3.452826296429083, "tokens_seen": 2042757120 }, { "epoch": 0.24, "learning_rate": 0.00038477894567921046, "loss": 0.0656, "theoretical_loss": 3.45280805338076, "tokens_seen": 2042888192 }, { "epoch": 0.24, "learning_rate": 0.000384738826927706, "loss": 0.0707, "theoretical_loss": 3.452789811830585, "tokens_seen": 2043019264 }, { "epoch": 0.24, "objective/train/advantage_avg": 0.00033412178163416684, "objective/train/docs_used": 745253, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3693361282348633, "objective/train/original_loss": 1.3693358898162842, "objective/train/theoretical_loss": 3.45277157177834, "objective/train/tokens_used": 413674976, "objective/train/value_avg": -0.006221771240234375, "objective/train/value_loss": 0.0002278939646203071, "objective/train/value_max": -4.6133995056152344e-05, "objective/train/value_min": -0.75927734375, "objective/train/value_reward_corr": 0.5099760198421093, "objective/train/value_std": 0.0114288330078125, "objective/train/weight_avg": 1.0004394054412842, "objective/train/weighted_lm_loss": 1.3689289093017578, "objective/train/weights_max": 2.0643069744110107, "objective/train/weights_min": 0.37368983030319214, "theoretical_loss": 3.45277157177834, "tokens_seen": 2043150336 }, { "epoch": 0.24, "learning_rate": 0.00038469870817620154, "loss": 0.0705, "theoretical_loss": 3.45277157177834, "tokens_seen": 2043150336 }, { "epoch": 0.24, "learning_rate": 0.0003846585894246971, "loss": 0.0723, "theoretical_loss": 3.452753333223805, "tokens_seen": 2043281408 }, { "epoch": 0.24, "learning_rate": 0.0003846184706731927, "loss": 0.0677, "theoretical_loss": 3.4527350961667613, "tokens_seen": 2043412480 }, { "epoch": 0.24, "learning_rate": 0.0003845783519216882, "loss": 0.067, "theoretical_loss": 3.45271686060699, "tokens_seen": 2043543552 }, { "epoch": 0.24, "learning_rate": 0.00038453823317018376, "loss": 0.0689, "theoretical_loss": 3.452698626544272, "tokens_seen": 2043674624 }, { "epoch": 0.24, "learning_rate": 0.00038449811441867933, "loss": 0.0687, "theoretical_loss": 3.4526803939783886, "tokens_seen": 2043805696 }, { "epoch": 0.24, "learning_rate": 0.00038445799566717484, "loss": 0.0733, "theoretical_loss": 3.45266216290912, "tokens_seen": 2043936768 }, { "epoch": 0.24, "learning_rate": 0.00038441787691567036, "loss": 0.0677, "theoretical_loss": 3.4526439333362493, "tokens_seen": 2044067840 }, { "epoch": 0.24, "learning_rate": 0.0003843777581641659, "loss": 0.0707, "theoretical_loss": 3.452625705259556, "tokens_seen": 2044198912 }, { "epoch": 0.24, "learning_rate": 0.0003843376394126615, "loss": 0.0663, "theoretical_loss": 3.4526074786788215, "tokens_seen": 2044329984 }, { "epoch": 0.24, "learning_rate": 0.000384297520661157, "loss": 0.0688, "theoretical_loss": 3.4525892535938283, "tokens_seen": 2044461056 }, { "epoch": 0.24, "learning_rate": 0.0003842574019096526, "loss": 0.0676, "theoretical_loss": 3.452571030004357, "tokens_seen": 2044592128 }, { "epoch": 0.24, "learning_rate": 0.00038421728315814814, "loss": 0.0703, "theoretical_loss": 3.4525528079101884, "tokens_seen": 2044723200 }, { "epoch": 0.24, "learning_rate": 0.0003841771644066437, "loss": 0.0708, "theoretical_loss": 3.4525345873111055, "tokens_seen": 2044854272 }, { "epoch": 0.24, "learning_rate": 0.0003841370456551392, "loss": 0.0705, "theoretical_loss": 3.4525163682068882, "tokens_seen": 2044985344 }, { "epoch": 0.24, "learning_rate": 0.0003840969269036348, "loss": 0.0714, "theoretical_loss": 3.45249815059732, "tokens_seen": 2045116416 }, { "epoch": 0.24, "learning_rate": 0.0003840568081521303, "loss": 0.0679, "theoretical_loss": 3.45247993448218, "tokens_seen": 2045247488 }, { "epoch": 0.24, "learning_rate": 0.0003840166894006258, "loss": 0.0664, "theoretical_loss": 3.452461719861252, "tokens_seen": 2045378560 }, { "epoch": 0.24, "learning_rate": 0.0003839765706491214, "loss": 0.069, "theoretical_loss": 3.4524435067343164, "tokens_seen": 2045509632 }, { "epoch": 0.24, "learning_rate": 0.00038393645189761696, "loss": 0.0638, "theoretical_loss": 3.452425295101156, "tokens_seen": 2045640704 }, { "epoch": 0.24, "learning_rate": 0.0003838963331461125, "loss": 0.0712, "theoretical_loss": 3.452407084961551, "tokens_seen": 2045771776 }, { "epoch": 0.24, "learning_rate": 0.00038385621439460804, "loss": 0.0692, "theoretical_loss": 3.4523888763152852, "tokens_seen": 2045902848 }, { "epoch": 0.24, "learning_rate": 0.0003838160956431036, "loss": 0.0656, "theoretical_loss": 3.452370669162139, "tokens_seen": 2046033920 }, { "epoch": 0.24, "learning_rate": 0.0003837759768915992, "loss": 0.0723, "theoretical_loss": 3.452352463501895, "tokens_seen": 2046164992 }, { "epoch": 0.24, "learning_rate": 0.0003837358581400947, "loss": 0.0653, "theoretical_loss": 3.452334259334335, "tokens_seen": 2046296064 }, { "epoch": 0.24, "objective/train/advantage_avg": 0.00040395648102276027, "objective/train/docs_used": 746480, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3045461177825928, "objective/train/original_loss": 1.3045461177825928, "objective/train/theoretical_loss": 3.4523160566592406, "objective/train/tokens_used": 416951776, "objective/train/value_avg": -0.00965118408203125, "objective/train/value_loss": 0.00019918604812119156, "objective/train/value_max": -4.947185516357422e-05, "objective/train/value_min": -0.436279296875, "objective/train/value_reward_corr": 0.7336048167183327, "objective/train/value_std": 0.014373779296875, "objective/train/weight_avg": 1.0004984140396118, "objective/train/weighted_lm_loss": 1.3058470487594604, "objective/train/weights_max": 1.3410087823867798, "objective/train/weights_min": 0.3732965886592865, "theoretical_loss": 3.4523160566592406, "tokens_seen": 2046427136 }, { "epoch": 0.24, "learning_rate": 0.00038369573938859026, "loss": 0.0685, "theoretical_loss": 3.4523160566592406, "tokens_seen": 2046427136 }, { "epoch": 0.24, "learning_rate": 0.0003836556206370858, "loss": 0.0684, "theoretical_loss": 3.452297855476395, "tokens_seen": 2046558208 }, { "epoch": 0.24, "learning_rate": 0.0003836155018855813, "loss": 0.0658, "theoretical_loss": 3.4522796557855786, "tokens_seen": 2046689280 }, { "epoch": 0.24, "learning_rate": 0.00038357538313407686, "loss": 0.0698, "theoretical_loss": 3.4522614575865753, "tokens_seen": 2046820352 }, { "epoch": 0.24, "learning_rate": 0.00038353526438257243, "loss": 0.0701, "theoretical_loss": 3.4522432608791664, "tokens_seen": 2046951424 }, { "epoch": 0.24, "learning_rate": 0.00038349514563106794, "loss": 0.0683, "theoretical_loss": 3.452225065663134, "tokens_seen": 2047082496 }, { "epoch": 0.24, "learning_rate": 0.0003834550268795635, "loss": 0.0746, "theoretical_loss": 3.452206871938261, "tokens_seen": 2047213568 }, { "epoch": 0.24, "learning_rate": 0.0003834149081280591, "loss": 0.0717, "theoretical_loss": 3.4521886797043293, "tokens_seen": 2047344640 }, { "epoch": 0.24, "learning_rate": 0.00038337478937655465, "loss": 0.0687, "theoretical_loss": 3.4521704889611216, "tokens_seen": 2047475712 }, { "epoch": 0.24, "learning_rate": 0.00038333467062505016, "loss": 0.066, "theoretical_loss": 3.4521522997084197, "tokens_seen": 2047606784 }, { "epoch": 0.24, "learning_rate": 0.00038329455187354573, "loss": 0.0706, "theoretical_loss": 3.4521341119460067, "tokens_seen": 2047737856 }, { "epoch": 0.24, "learning_rate": 0.00038325443312204124, "loss": 0.0664, "theoretical_loss": 3.452115925673665, "tokens_seen": 2047868928 }, { "epoch": 0.24, "learning_rate": 0.00038321431437053676, "loss": 0.0674, "theoretical_loss": 3.4520977408911766, "tokens_seen": 2048000000 }, { "epoch": 0.24, "learning_rate": 0.0003831741956190323, "loss": 0.0734, "theoretical_loss": 3.4520795575983247, "tokens_seen": 2048131072 }, { "epoch": 0.24, "learning_rate": 0.0003831340768675279, "loss": 0.0696, "theoretical_loss": 3.4520613757948917, "tokens_seen": 2048262144 }, { "epoch": 0.24, "learning_rate": 0.0003830939581160234, "loss": 0.0676, "theoretical_loss": 3.4520431954806607, "tokens_seen": 2048393216 }, { "epoch": 0.24, "learning_rate": 0.000383053839364519, "loss": 0.0681, "theoretical_loss": 3.4520250166554147, "tokens_seen": 2048524288 }, { "epoch": 0.24, "learning_rate": 0.00038301372061301455, "loss": 0.0659, "theoretical_loss": 3.452006839318935, "tokens_seen": 2048655360 }, { "epoch": 0.24, "learning_rate": 0.0003829736018615101, "loss": 0.0723, "theoretical_loss": 3.451988663471006, "tokens_seen": 2048786432 }, { "epoch": 0.24, "learning_rate": 0.00038293348311000563, "loss": 0.0652, "theoretical_loss": 3.45197048911141, "tokens_seen": 2048917504 }, { "epoch": 0.24, "learning_rate": 0.0003828933643585012, "loss": 0.0716, "theoretical_loss": 3.4519523162399297, "tokens_seen": 2049048576 }, { "epoch": 0.24, "learning_rate": 0.0003828532456069967, "loss": 0.0674, "theoretical_loss": 3.451934144856348, "tokens_seen": 2049179648 }, { "epoch": 0.24, "learning_rate": 0.0003828131268554922, "loss": 0.0686, "theoretical_loss": 3.4519159749604484, "tokens_seen": 2049310720 }, { "epoch": 0.24, "learning_rate": 0.0003827730081039878, "loss": 0.0677, "theoretical_loss": 3.451897806552014, "tokens_seen": 2049441792 }, { "epoch": 0.24, "learning_rate": 0.00038273288935248336, "loss": 0.0687, "theoretical_loss": 3.4518796396308273, "tokens_seen": 2049572864 }, { "epoch": 0.24, "objective/train/advantage_avg": 0.0002215143176726997, "objective/train/docs_used": 747702, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.433394432067871, "objective/train/original_loss": 1.433394193649292, "objective/train/theoretical_loss": 3.451861474196672, "objective/train/tokens_used": 420228576, "objective/train/value_avg": -0.01090240478515625, "objective/train/value_loss": 0.00027676441823132336, "objective/train/value_max": -2.4139881134033203e-05, "objective/train/value_min": -0.658203125, "objective/train/value_reward_corr": 0.790962317766713, "objective/train/value_std": 0.021270751953125, "objective/train/weight_avg": 1.0003517866134644, "objective/train/weighted_lm_loss": 1.4332689046859741, "objective/train/weights_max": 1.2624177932739258, "objective/train/weights_min": 0.37432897090911865, "theoretical_loss": 3.451861474196672, "tokens_seen": 2049703936 }, { "epoch": 0.24, "learning_rate": 0.0003826927706009789, "loss": 0.069, "theoretical_loss": 3.451861474196672, "tokens_seen": 2049703936 }, { "epoch": 0.24, "learning_rate": 0.00038265265184947444, "loss": 0.0683, "theoretical_loss": 3.4518433102493313, "tokens_seen": 2049835008 }, { "epoch": 0.24, "learning_rate": 0.00038261253309797, "loss": 0.0712, "theoretical_loss": 3.4518251477885884, "tokens_seen": 2049966080 }, { "epoch": 0.24, "learning_rate": 0.0003825724143464656, "loss": 0.0699, "theoretical_loss": 3.4518069868142263, "tokens_seen": 2050097152 }, { "epoch": 0.24, "learning_rate": 0.0003825322955949611, "loss": 0.067, "theoretical_loss": 3.4517888273260287, "tokens_seen": 2050228224 }, { "epoch": 0.24, "learning_rate": 0.00038249217684345666, "loss": 0.0689, "theoretical_loss": 3.4517706693237784, "tokens_seen": 2050359296 }, { "epoch": 0.24, "learning_rate": 0.0003824520580919522, "loss": 0.0658, "theoretical_loss": 3.4517525128072593, "tokens_seen": 2050490368 }, { "epoch": 0.24, "learning_rate": 0.0003824119393404477, "loss": 0.0677, "theoretical_loss": 3.451734357776255, "tokens_seen": 2050621440 }, { "epoch": 0.24, "learning_rate": 0.00038237182058894326, "loss": 0.0659, "theoretical_loss": 3.4517162042305483, "tokens_seen": 2050752512 }, { "epoch": 0.24, "learning_rate": 0.00038233170183743883, "loss": 0.0671, "theoretical_loss": 3.451698052169924, "tokens_seen": 2050883584 }, { "epoch": 0.24, "learning_rate": 0.00038229158308593434, "loss": 0.0683, "theoretical_loss": 3.4516799015941646, "tokens_seen": 2051014656 }, { "epoch": 0.24, "learning_rate": 0.0003822514643344299, "loss": 0.0705, "theoretical_loss": 3.451661752503054, "tokens_seen": 2051145728 }, { "epoch": 0.24, "learning_rate": 0.0003822113455829255, "loss": 0.0693, "theoretical_loss": 3.451643604896377, "tokens_seen": 2051276800 }, { "epoch": 0.24, "learning_rate": 0.00038217122683142105, "loss": 0.0664, "theoretical_loss": 3.4516254587739157, "tokens_seen": 2051407872 }, { "epoch": 0.24, "learning_rate": 0.00038213110807991656, "loss": 0.0685, "theoretical_loss": 3.4516073141354546, "tokens_seen": 2051538944 }, { "epoch": 0.24, "learning_rate": 0.00038209098932841213, "loss": 0.0685, "theoretical_loss": 3.4515891709807773, "tokens_seen": 2051670016 }, { "epoch": 0.24, "learning_rate": 0.00038205087057690764, "loss": 0.0714, "theoretical_loss": 3.451571029309668, "tokens_seen": 2051801088 }, { "epoch": 0.24, "learning_rate": 0.00038201075182540316, "loss": 0.0675, "theoretical_loss": 3.4515528891219107, "tokens_seen": 2051932160 }, { "epoch": 0.24, "learning_rate": 0.00038197063307389873, "loss": 0.0699, "theoretical_loss": 3.4515347504172893, "tokens_seen": 2052063232 }, { "epoch": 0.24, "learning_rate": 0.0003819305143223943, "loss": 0.0677, "theoretical_loss": 3.4515166131955874, "tokens_seen": 2052194304 }, { "epoch": 0.24, "learning_rate": 0.0003818903955708898, "loss": 0.0655, "theoretical_loss": 3.4514984774565898, "tokens_seen": 2052325376 }, { "epoch": 0.24, "learning_rate": 0.0003818502768193854, "loss": 0.0677, "theoretical_loss": 3.45148034320008, "tokens_seen": 2052456448 }, { "epoch": 0.24, "learning_rate": 0.00038181015806788095, "loss": 0.0673, "theoretical_loss": 3.4514622104258423, "tokens_seen": 2052587520 }, { "epoch": 0.24, "learning_rate": 0.0003817700393163765, "loss": 0.0683, "theoretical_loss": 3.451444079133661, "tokens_seen": 2052718592 }, { "epoch": 0.24, "learning_rate": 0.00038172992056487203, "loss": 0.0717, "theoretical_loss": 3.451425949323321, "tokens_seen": 2052849664 }, { "epoch": 0.24, "objective/train/advantage_avg": -0.0018435309175401926, "objective/train/docs_used": 748910, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4042625427246094, "objective/train/original_loss": 1.4042625427246094, "objective/train/theoretical_loss": 3.4514078209946044, "objective/train/tokens_used": 423505376, "objective/train/value_avg": -0.0082550048828125, "objective/train/value_loss": 0.0001957119966391474, "objective/train/value_max": -2.6881694793701172e-05, "objective/train/value_min": -0.984375, "objective/train/value_reward_corr": 0.8414892906569198, "objective/train/value_std": 0.0201263427734375, "objective/train/weight_avg": 0.9982486367225647, "objective/train/weighted_lm_loss": 1.4026358127593994, "objective/train/weights_max": 1.217265248298645, "objective/train/weights_min": 0.36803728342056274, "theoretical_loss": 3.4514078209946044, "tokens_seen": 2052980736 }, { "epoch": 0.24, "learning_rate": 0.0003816898018133676, "loss": 0.0684, "theoretical_loss": 3.4514078209946044, "tokens_seen": 2052980736 }, { "epoch": 0.24, "learning_rate": 0.0003816496830618631, "loss": 0.068, "theoretical_loss": 3.451389694147298, "tokens_seen": 2053111808 }, { "epoch": 0.24, "learning_rate": 0.0003816095643103586, "loss": 0.0684, "theoretical_loss": 3.4513715687811852, "tokens_seen": 2053242880 }, { "epoch": 0.24, "learning_rate": 0.0003815694455588542, "loss": 0.0676, "theoretical_loss": 3.45135344489605, "tokens_seen": 2053373952 }, { "epoch": 0.24, "learning_rate": 0.00038152932680734976, "loss": 0.0696, "theoretical_loss": 3.451335322491678, "tokens_seen": 2053505024 }, { "epoch": 0.24, "learning_rate": 0.00038148920805584533, "loss": 0.0714, "theoretical_loss": 3.4513172015678526, "tokens_seen": 2053636096 }, { "epoch": 0.24, "learning_rate": 0.00038144908930434085, "loss": 0.0724, "theoretical_loss": 3.4512990821243585, "tokens_seen": 2053767168 }, { "epoch": 0.24, "learning_rate": 0.0003814089705528364, "loss": 0.0711, "theoretical_loss": 3.451280964160981, "tokens_seen": 2053898240 }, { "epoch": 0.24, "learning_rate": 0.000381368851801332, "loss": 0.0678, "theoretical_loss": 3.451262847677504, "tokens_seen": 2054029312 }, { "epoch": 0.24, "learning_rate": 0.0003813287330498275, "loss": 0.0686, "theoretical_loss": 3.451244732673713, "tokens_seen": 2054160384 }, { "epoch": 0.25, "learning_rate": 0.00038128861429832306, "loss": 0.0735, "theoretical_loss": 3.451226619149392, "tokens_seen": 2054291456 }, { "epoch": 0.25, "learning_rate": 0.0003812484955468186, "loss": 0.0702, "theoretical_loss": 3.451208507104326, "tokens_seen": 2054422528 }, { "epoch": 0.25, "learning_rate": 0.0003812083767953141, "loss": 0.0658, "theoretical_loss": 3.4511903965382995, "tokens_seen": 2054553600 }, { "epoch": 0.25, "learning_rate": 0.00038116825804380966, "loss": 0.0682, "theoretical_loss": 3.451172287451098, "tokens_seen": 2054684672 }, { "epoch": 0.25, "learning_rate": 0.00038112813929230523, "loss": 0.0707, "theoretical_loss": 3.451154179842507, "tokens_seen": 2054815744 }, { "epoch": 0.25, "learning_rate": 0.0003810880205408008, "loss": 0.0691, "theoretical_loss": 3.45113607371231, "tokens_seen": 2054946816 }, { "epoch": 0.25, "learning_rate": 0.0003810479017892963, "loss": 0.0689, "theoretical_loss": 3.4511179690602924, "tokens_seen": 2055077888 }, { "epoch": 0.25, "learning_rate": 0.0003810077830377919, "loss": 0.0732, "theoretical_loss": 3.4510998658862397, "tokens_seen": 2055208960 }, { "epoch": 0.25, "learning_rate": 0.00038096766428628745, "loss": 0.0701, "theoretical_loss": 3.4510817641899365, "tokens_seen": 2055340032 }, { "epoch": 0.25, "learning_rate": 0.00038092754553478296, "loss": 0.0672, "theoretical_loss": 3.451063663971169, "tokens_seen": 2055471104 }, { "epoch": 0.25, "learning_rate": 0.00038088742678327853, "loss": 0.0686, "theoretical_loss": 3.4510455652297214, "tokens_seen": 2055602176 }, { "epoch": 0.25, "learning_rate": 0.0003808473080317741, "loss": 0.0665, "theoretical_loss": 3.4510274679653787, "tokens_seen": 2055733248 }, { "epoch": 0.25, "learning_rate": 0.00038080718928026956, "loss": 0.0658, "theoretical_loss": 3.451009372177927, "tokens_seen": 2055864320 }, { "epoch": 0.25, "learning_rate": 0.00038076707052876513, "loss": 0.0668, "theoretical_loss": 3.4509912778671517, "tokens_seen": 2055995392 }, { "epoch": 0.25, "learning_rate": 0.0003807269517772607, "loss": 0.0665, "theoretical_loss": 3.4509731850328373, "tokens_seen": 2056126464 }, { "epoch": 0.25, "objective/train/advantage_avg": -0.0022307720500975847, "objective/train/docs_used": 750166, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2988632917404175, "objective/train/original_loss": 1.298863410949707, "objective/train/theoretical_loss": 3.4509550936747697, "objective/train/tokens_used": 426782176, "objective/train/value_avg": -0.008087158203125, "objective/train/value_loss": 0.00032781774643808603, "objective/train/value_max": -3.218650817871094e-05, "objective/train/value_min": -0.41650390625, "objective/train/value_reward_corr": 0.7855276184636695, "objective/train/value_std": 0.01415252685546875, "objective/train/weight_avg": 0.9979264736175537, "objective/train/weighted_lm_loss": 1.2962883710861206, "objective/train/weights_max": 1.5166499614715576, "objective/train/weights_min": 0.37208810448646545, "theoretical_loss": 3.4509550936747697, "tokens_seen": 2056257536 }, { "epoch": 0.25, "learning_rate": 0.00038068683302575627, "loss": 0.0686, "theoretical_loss": 3.4509550936747697, "tokens_seen": 2056257536 }, { "epoch": 0.25, "learning_rate": 0.0003806467142742518, "loss": 0.0703, "theoretical_loss": 3.4509370037927343, "tokens_seen": 2056388608 }, { "epoch": 0.25, "learning_rate": 0.00038060659552274735, "loss": 0.0677, "theoretical_loss": 3.4509189153865165, "tokens_seen": 2056519680 }, { "epoch": 0.25, "learning_rate": 0.0003805664767712429, "loss": 0.0691, "theoretical_loss": 3.450900828455902, "tokens_seen": 2056650752 }, { "epoch": 0.25, "learning_rate": 0.00038052635801973843, "loss": 0.0741, "theoretical_loss": 3.450882743000677, "tokens_seen": 2056781824 }, { "epoch": 0.25, "learning_rate": 0.000380486239268234, "loss": 0.0686, "theoretical_loss": 3.450864659020626, "tokens_seen": 2056912896 }, { "epoch": 0.25, "learning_rate": 0.00038044612051672957, "loss": 0.0718, "theoretical_loss": 3.450846576515535, "tokens_seen": 2057043968 }, { "epoch": 0.25, "learning_rate": 0.00038040600176522503, "loss": 0.0667, "theoretical_loss": 3.45082849548519, "tokens_seen": 2057175040 }, { "epoch": 0.25, "learning_rate": 0.0003803658830137206, "loss": 0.0655, "theoretical_loss": 3.4508104159293773, "tokens_seen": 2057306112 }, { "epoch": 0.25, "learning_rate": 0.00038032576426221616, "loss": 0.0673, "theoretical_loss": 3.450792337847882, "tokens_seen": 2057437184 }, { "epoch": 0.25, "learning_rate": 0.00038028564551071173, "loss": 0.0735, "theoretical_loss": 3.4507742612404897, "tokens_seen": 2057568256 }, { "epoch": 0.25, "learning_rate": 0.00038024552675920725, "loss": 0.0729, "theoretical_loss": 3.450756186106987, "tokens_seen": 2057699328 }, { "epoch": 0.25, "learning_rate": 0.0003802054080077028, "loss": 0.0713, "theoretical_loss": 3.4507381124471594, "tokens_seen": 2057830400 }, { "epoch": 0.25, "learning_rate": 0.0003801652892561984, "loss": 0.0687, "theoretical_loss": 3.4507200402607925, "tokens_seen": 2057961472 }, { "epoch": 0.25, "learning_rate": 0.0003801251705046939, "loss": 0.0662, "theoretical_loss": 3.4507019695476737, "tokens_seen": 2058092544 }, { "epoch": 0.25, "learning_rate": 0.00038008505175318947, "loss": 0.0689, "theoretical_loss": 3.450683900307588, "tokens_seen": 2058223616 }, { "epoch": 0.25, "learning_rate": 0.00038004493300168503, "loss": 0.0695, "theoretical_loss": 3.450665832540322, "tokens_seen": 2058354688 }, { "epoch": 0.25, "learning_rate": 0.0003800048142501805, "loss": 0.0687, "theoretical_loss": 3.4506477662456616, "tokens_seen": 2058485760 }, { "epoch": 0.25, "learning_rate": 0.00037996469549867606, "loss": 0.0704, "theoretical_loss": 3.450629701423393, "tokens_seen": 2058616832 }, { "epoch": 0.25, "learning_rate": 0.00037992457674717163, "loss": 0.0709, "theoretical_loss": 3.4506116380733025, "tokens_seen": 2058747904 }, { "epoch": 0.25, "learning_rate": 0.0003798844579956672, "loss": 0.0726, "theoretical_loss": 3.4505935761951765, "tokens_seen": 2058878976 }, { "epoch": 0.25, "learning_rate": 0.0003798443392441627, "loss": 0.0687, "theoretical_loss": 3.4505755157888016, "tokens_seen": 2059010048 }, { "epoch": 0.25, "learning_rate": 0.0003798042204926583, "loss": 0.0685, "theoretical_loss": 3.4505574568539634, "tokens_seen": 2059141120 }, { "epoch": 0.25, "learning_rate": 0.00037976410174115385, "loss": 0.0705, "theoretical_loss": 3.450539399390449, "tokens_seen": 2059272192 }, { "epoch": 0.25, "learning_rate": 0.00037972398298964936, "loss": 0.0734, "theoretical_loss": 3.4505213433980453, "tokens_seen": 2059403264 }, { "epoch": 0.25, "objective/train/advantage_avg": -0.0013541670050472021, "objective/train/docs_used": 751330, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2888433933258057, "objective/train/original_loss": 1.2888433933258057, "objective/train/theoretical_loss": 3.450503288876538, "objective/train/tokens_used": 430058976, "objective/train/value_avg": -0.0184783935546875, "objective/train/value_loss": 0.0004620641120709479, "objective/train/value_max": -5.3882598876953125e-05, "objective/train/value_min": -0.50048828125, "objective/train/value_reward_corr": 0.9523936641224368, "objective/train/value_std": 0.06219482421875, "objective/train/weight_avg": 0.9988586902618408, "objective/train/weighted_lm_loss": 1.2885040044784546, "objective/train/weights_max": 1.1792951822280884, "objective/train/weights_min": 0.368646502494812, "theoretical_loss": 3.450503288876538, "tokens_seen": 2059534336 }, { "epoch": 0.25, "learning_rate": 0.00037968386423814493, "loss": 0.0673, "theoretical_loss": 3.450503288876538, "tokens_seen": 2059534336 }, { "epoch": 0.25, "learning_rate": 0.0003796437454866405, "loss": 0.0721, "theoretical_loss": 3.450485235825714, "tokens_seen": 2059665408 }, { "epoch": 0.25, "learning_rate": 0.00037960362673513596, "loss": 0.0681, "theoretical_loss": 3.4504671842453596, "tokens_seen": 2059796480 }, { "epoch": 0.25, "learning_rate": 0.00037956350798363153, "loss": 0.0651, "theoretical_loss": 3.450449134135262, "tokens_seen": 2059927552 }, { "epoch": 0.25, "learning_rate": 0.0003795233892321271, "loss": 0.0686, "theoretical_loss": 3.4504310854952074, "tokens_seen": 2060058624 }, { "epoch": 0.25, "learning_rate": 0.00037948327048062267, "loss": 0.0681, "theoretical_loss": 3.4504130383249834, "tokens_seen": 2060189696 }, { "epoch": 0.25, "learning_rate": 0.0003794431517291182, "loss": 0.0667, "theoretical_loss": 3.4503949926243758, "tokens_seen": 2060320768 }, { "epoch": 0.25, "learning_rate": 0.00037940303297761375, "loss": 0.0721, "theoretical_loss": 3.4503769483931723, "tokens_seen": 2060451840 }, { "epoch": 0.25, "learning_rate": 0.0003793629142261093, "loss": 0.0672, "theoretical_loss": 3.450358905631159, "tokens_seen": 2060582912 }, { "epoch": 0.25, "learning_rate": 0.00037932279547460483, "loss": 0.0661, "theoretical_loss": 3.4503408643381235, "tokens_seen": 2060713984 }, { "epoch": 0.25, "learning_rate": 0.0003792826767231004, "loss": 0.0718, "theoretical_loss": 3.4503228245138517, "tokens_seen": 2060845056 }, { "epoch": 0.25, "learning_rate": 0.00037924255797159597, "loss": 0.0712, "theoretical_loss": 3.4503047861581324, "tokens_seen": 2060976128 }, { "epoch": 0.25, "learning_rate": 0.00037920243922009143, "loss": 0.0691, "theoretical_loss": 3.450286749270751, "tokens_seen": 2061107200 }, { "epoch": 0.25, "learning_rate": 0.000379162320468587, "loss": 0.0666, "theoretical_loss": 3.4502687138514956, "tokens_seen": 2061238272 }, { "epoch": 0.25, "learning_rate": 0.00037912220171708257, "loss": 0.071, "theoretical_loss": 3.4502506799001527, "tokens_seen": 2061369344 }, { "epoch": 0.25, "learning_rate": 0.00037908208296557813, "loss": 0.0703, "theoretical_loss": 3.4502326474165104, "tokens_seen": 2061500416 }, { "epoch": 0.25, "learning_rate": 0.00037904196421407365, "loss": 0.0692, "theoretical_loss": 3.4502146164003547, "tokens_seen": 2061631488 }, { "epoch": 0.25, "learning_rate": 0.0003790018454625692, "loss": 0.0661, "theoretical_loss": 3.450196586851474, "tokens_seen": 2061762560 }, { "epoch": 0.25, "learning_rate": 0.0003789617267110648, "loss": 0.0692, "theoretical_loss": 3.4501785587696547, "tokens_seen": 2061893632 }, { "epoch": 0.25, "learning_rate": 0.0003789216079595603, "loss": 0.0685, "theoretical_loss": 3.450160532154685, "tokens_seen": 2062024704 }, { "epoch": 0.25, "learning_rate": 0.00037888148920805587, "loss": 0.0718, "theoretical_loss": 3.4501425070063525, "tokens_seen": 2062155776 }, { "epoch": 0.25, "learning_rate": 0.00037884137045655144, "loss": 0.0675, "theoretical_loss": 3.4501244833244438, "tokens_seen": 2062286848 }, { "epoch": 0.25, "learning_rate": 0.00037880125170504695, "loss": 0.0701, "theoretical_loss": 3.4501064611087466, "tokens_seen": 2062417920 }, { "epoch": 0.25, "learning_rate": 0.00037876113295354246, "loss": 0.071, "theoretical_loss": 3.450088440359049, "tokens_seen": 2062548992 }, { "epoch": 0.25, "learning_rate": 0.00037872101420203803, "loss": 0.069, "theoretical_loss": 3.4500704210751376, "tokens_seen": 2062680064 }, { "epoch": 0.25, "objective/train/advantage_avg": 0.0007980787777341902, "objective/train/docs_used": 752534, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3225648403167725, "objective/train/original_loss": 1.3225650787353516, "objective/train/theoretical_loss": 3.450052403256801, "objective/train/tokens_used": 433335776, "objective/train/value_avg": -0.0100860595703125, "objective/train/value_loss": 0.00039157646824605763, "objective/train/value_max": -6.973743438720703e-05, "objective/train/value_min": -0.7490234375, "objective/train/value_reward_corr": 0.7804201738462571, "objective/train/value_std": 0.0255279541015625, "objective/train/weight_avg": 1.0009784698486328, "objective/train/weighted_lm_loss": 1.3230760097503662, "objective/train/weights_max": 1.5002918243408203, "objective/train/weights_min": 0.3692460358142853, "theoretical_loss": 3.450052403256801, "tokens_seen": 2062811136 }, { "epoch": 0.25, "learning_rate": 0.0003786808954505336, "loss": 0.0695, "theoretical_loss": 3.450052403256801, "tokens_seen": 2062811136 }, { "epoch": 0.25, "learning_rate": 0.0003786407766990291, "loss": 0.0719, "theoretical_loss": 3.450034386903827, "tokens_seen": 2062942208 }, { "epoch": 0.25, "learning_rate": 0.0003786006579475247, "loss": 0.0704, "theoretical_loss": 3.450016372016002, "tokens_seen": 2063073280 }, { "epoch": 0.25, "learning_rate": 0.00037856053919602025, "loss": 0.0699, "theoretical_loss": 3.449998358593115, "tokens_seen": 2063204352 }, { "epoch": 0.25, "learning_rate": 0.00037852042044451577, "loss": 0.0698, "theoretical_loss": 3.4499803466349537, "tokens_seen": 2063335424 }, { "epoch": 0.25, "learning_rate": 0.00037848030169301133, "loss": 0.069, "theoretical_loss": 3.449962336141306, "tokens_seen": 2063466496 }, { "epoch": 0.25, "learning_rate": 0.0003784401829415069, "loss": 0.067, "theoretical_loss": 3.449944327111959, "tokens_seen": 2063597568 }, { "epoch": 0.25, "learning_rate": 0.0003784000641900024, "loss": 0.07, "theoretical_loss": 3.4499263195467016, "tokens_seen": 2063728640 }, { "epoch": 0.25, "learning_rate": 0.00037835994543849793, "loss": 0.0684, "theoretical_loss": 3.449908313445321, "tokens_seen": 2063859712 }, { "epoch": 0.25, "learning_rate": 0.0003783198266869935, "loss": 0.0685, "theoretical_loss": 3.449890308807606, "tokens_seen": 2063990784 }, { "epoch": 0.25, "learning_rate": 0.00037827970793548907, "loss": 0.0696, "theoretical_loss": 3.4498723056333445, "tokens_seen": 2064121856 }, { "epoch": 0.25, "learning_rate": 0.0003782395891839846, "loss": 0.0689, "theoretical_loss": 3.449854303922324, "tokens_seen": 2064252928 }, { "epoch": 0.25, "learning_rate": 0.00037819947043248015, "loss": 0.0668, "theoretical_loss": 3.4498363036743336, "tokens_seen": 2064384000 }, { "epoch": 0.25, "learning_rate": 0.0003781593516809757, "loss": 0.0759, "theoretical_loss": 3.4498183048891606, "tokens_seen": 2064515072 }, { "epoch": 0.25, "learning_rate": 0.00037811923292947123, "loss": 0.0711, "theoretical_loss": 3.449800307566594, "tokens_seen": 2064646144 }, { "epoch": 0.25, "learning_rate": 0.0003780791141779668, "loss": 0.0652, "theoretical_loss": 3.4497823117064215, "tokens_seen": 2064777216 }, { "epoch": 0.25, "learning_rate": 0.00037803899542646237, "loss": 0.0709, "theoretical_loss": 3.449764317308432, "tokens_seen": 2064908288 }, { "epoch": 0.25, "learning_rate": 0.0003779988766749579, "loss": 0.0644, "theoretical_loss": 3.4497463243724136, "tokens_seen": 2065039360 }, { "epoch": 0.25, "learning_rate": 0.0003779587579234534, "loss": 0.0698, "theoretical_loss": 3.449728332898155, "tokens_seen": 2065170432 }, { "epoch": 0.25, "learning_rate": 0.00037791863917194897, "loss": 0.068, "theoretical_loss": 3.4497103428854445, "tokens_seen": 2065301504 }, { "epoch": 0.25, "learning_rate": 0.00037787852042044453, "loss": 0.0705, "theoretical_loss": 3.4496923543340703, "tokens_seen": 2065432576 }, { "epoch": 0.25, "learning_rate": 0.00037783840166894005, "loss": 0.0696, "theoretical_loss": 3.449674367243822, "tokens_seen": 2065563648 }, { "epoch": 0.25, "learning_rate": 0.0003777982829174356, "loss": 0.0714, "theoretical_loss": 3.4496563816144867, "tokens_seen": 2065694720 }, { "epoch": 0.25, "learning_rate": 0.0003777581641659312, "loss": 0.0677, "theoretical_loss": 3.449638397445854, "tokens_seen": 2065825792 }, { "epoch": 0.25, "learning_rate": 0.0003777180454144267, "loss": 0.0723, "theoretical_loss": 3.449620414737713, "tokens_seen": 2065956864 }, { "epoch": 0.25, "objective/train/advantage_avg": 0.00031781819416210055, "objective/train/docs_used": 753515, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3063762187957764, "objective/train/original_loss": 1.3063762187957764, "objective/train/theoretical_loss": 3.449602433489851, "objective/train/tokens_used": 436612576, "objective/train/value_avg": -0.00408172607421875, "objective/train/value_loss": 5.410406811279245e-05, "objective/train/value_max": -2.9802322387695312e-05, "objective/train/value_min": -0.15771484375, "objective/train/value_reward_corr": 0.6691391245937439, "objective/train/value_std": 0.006710052490234375, "objective/train/weight_avg": 1.0003446340560913, "objective/train/weighted_lm_loss": 1.307616114616394, "objective/train/weights_max": 1.1434959173202515, "objective/train/weights_min": 0.8551669716835022, "theoretical_loss": 3.449602433489851, "tokens_seen": 2066087936 }, { "epoch": 0.25, "learning_rate": 0.00037767792666292227, "loss": 0.0656, "theoretical_loss": 3.449602433489851, "tokens_seen": 2066087936 }, { "epoch": 0.25, "learning_rate": 0.00037763780791141784, "loss": 0.0712, "theoretical_loss": 3.449584453702059, "tokens_seen": 2066219008 }, { "epoch": 0.25, "learning_rate": 0.00037759768915991335, "loss": 0.0645, "theoretical_loss": 3.449566475374123, "tokens_seen": 2066350080 }, { "epoch": 0.25, "learning_rate": 0.00037755757040840886, "loss": 0.0679, "theoretical_loss": 3.449548498505834, "tokens_seen": 2066481152 }, { "epoch": 0.25, "learning_rate": 0.00037751745165690443, "loss": 0.069, "theoretical_loss": 3.449530523096981, "tokens_seen": 2066612224 }, { "epoch": 0.25, "learning_rate": 0.0003774773329054, "loss": 0.0675, "theoretical_loss": 3.4495125491473515, "tokens_seen": 2066743296 }, { "epoch": 0.25, "learning_rate": 0.0003774372141538955, "loss": 0.0703, "theoretical_loss": 3.4494945766567358, "tokens_seen": 2066874368 }, { "epoch": 0.25, "learning_rate": 0.0003773970954023911, "loss": 0.0736, "theoretical_loss": 3.449476605624922, "tokens_seen": 2067005440 }, { "epoch": 0.25, "learning_rate": 0.00037735697665088665, "loss": 0.0698, "theoretical_loss": 3.4494586360517, "tokens_seen": 2067136512 }, { "epoch": 0.25, "learning_rate": 0.00037731685789938217, "loss": 0.0662, "theoretical_loss": 3.4494406679368583, "tokens_seen": 2067267584 }, { "epoch": 0.25, "learning_rate": 0.00037727673914787774, "loss": 0.0723, "theoretical_loss": 3.4494227012801866, "tokens_seen": 2067398656 }, { "epoch": 0.25, "learning_rate": 0.0003772366203963733, "loss": 0.0705, "theoretical_loss": 3.449404736081474, "tokens_seen": 2067529728 }, { "epoch": 0.25, "learning_rate": 0.0003771965016448688, "loss": 0.0685, "theoretical_loss": 3.4493867723405094, "tokens_seen": 2067660800 }, { "epoch": 0.25, "learning_rate": 0.00037715638289336433, "loss": 0.0689, "theoretical_loss": 3.4493688100570825, "tokens_seen": 2067791872 }, { "epoch": 0.25, "learning_rate": 0.0003771162641418599, "loss": 0.0723, "theoretical_loss": 3.4493508492309823, "tokens_seen": 2067922944 }, { "epoch": 0.25, "learning_rate": 0.00037707614539035547, "loss": 0.0691, "theoretical_loss": 3.4493328898619993, "tokens_seen": 2068054016 }, { "epoch": 0.25, "learning_rate": 0.000377036026638851, "loss": 0.0715, "theoretical_loss": 3.449314931949921, "tokens_seen": 2068185088 }, { "epoch": 0.25, "learning_rate": 0.00037699590788734655, "loss": 0.0697, "theoretical_loss": 3.449296975494539, "tokens_seen": 2068316160 }, { "epoch": 0.25, "learning_rate": 0.0003769557891358421, "loss": 0.0703, "theoretical_loss": 3.4492790204956414, "tokens_seen": 2068447232 }, { "epoch": 0.25, "learning_rate": 0.0003769156703843377, "loss": 0.0691, "theoretical_loss": 3.449261066953018, "tokens_seen": 2068578304 }, { "epoch": 0.25, "learning_rate": 0.0003768755516328332, "loss": 0.0704, "theoretical_loss": 3.4492431148664586, "tokens_seen": 2068709376 }, { "epoch": 0.25, "learning_rate": 0.00037683543288132877, "loss": 0.0653, "theoretical_loss": 3.4492251642357536, "tokens_seen": 2068840448 }, { "epoch": 0.25, "learning_rate": 0.0003767953141298243, "loss": 0.0693, "theoretical_loss": 3.4492072150606914, "tokens_seen": 2068971520 }, { "epoch": 0.25, "learning_rate": 0.0003767551953783198, "loss": 0.0693, "theoretical_loss": 3.4491892673410627, "tokens_seen": 2069102592 }, { "epoch": 0.25, "learning_rate": 0.00037671507662681537, "loss": 0.0709, "theoretical_loss": 3.4491713210766566, "tokens_seen": 2069233664 }, { "epoch": 0.25, "objective/train/advantage_avg": -0.0012716982746496797, "objective/train/docs_used": 754612, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4899065494537354, "objective/train/original_loss": 1.4899067878723145, "objective/train/theoretical_loss": 3.449153376267264, "objective/train/tokens_used": 439889376, "objective/train/value_avg": -0.0118865966796875, "objective/train/value_loss": 0.0004828266683034599, "objective/train/value_max": -9.918212890625e-05, "objective/train/value_min": -0.7119140625, "objective/train/value_reward_corr": 0.743757740566982, "objective/train/value_std": 0.0223846435546875, "objective/train/weight_avg": 0.9989442229270935, "objective/train/weighted_lm_loss": 1.4878995418548584, "objective/train/weights_max": 1.4750205278396606, "objective/train/weights_min": 0.3730488717556, "theoretical_loss": 3.449153376267264, "tokens_seen": 2069364736 }, { "epoch": 0.25, "learning_rate": 0.00037667495787531094, "loss": 0.0715, "theoretical_loss": 3.449153376267264, "tokens_seen": 2069364736 }, { "epoch": 0.25, "learning_rate": 0.00037663483912380645, "loss": 0.0665, "theoretical_loss": 3.4491354329126738, "tokens_seen": 2069495808 }, { "epoch": 0.25, "learning_rate": 0.000376594720372302, "loss": 0.0657, "theoretical_loss": 3.449117491012676, "tokens_seen": 2069626880 }, { "epoch": 0.25, "learning_rate": 0.0003765546016207976, "loss": 0.0677, "theoretical_loss": 3.4490995505670607, "tokens_seen": 2069757952 }, { "epoch": 0.25, "learning_rate": 0.00037651448286929315, "loss": 0.0712, "theoretical_loss": 3.449081611575618, "tokens_seen": 2069889024 }, { "epoch": 0.25, "learning_rate": 0.00037647436411778867, "loss": 0.066, "theoretical_loss": 3.449063674038138, "tokens_seen": 2070020096 }, { "epoch": 0.25, "learning_rate": 0.00037643424536628424, "loss": 0.0673, "theoretical_loss": 3.4490457379544113, "tokens_seen": 2070151168 }, { "epoch": 0.25, "learning_rate": 0.00037639412661477975, "loss": 0.071, "theoretical_loss": 3.4490278033242268, "tokens_seen": 2070282240 }, { "epoch": 0.25, "learning_rate": 0.00037635400786327527, "loss": 0.0684, "theoretical_loss": 3.4490098701473757, "tokens_seen": 2070413312 }, { "epoch": 0.25, "learning_rate": 0.00037631388911177083, "loss": 0.0734, "theoretical_loss": 3.4489919384236485, "tokens_seen": 2070544384 }, { "epoch": 0.25, "learning_rate": 0.0003762737703602664, "loss": 0.0701, "theoretical_loss": 3.448974008152834, "tokens_seen": 2070675456 }, { "epoch": 0.26, "learning_rate": 0.0003762336516087619, "loss": 0.0719, "theoretical_loss": 3.448956079334724, "tokens_seen": 2070806528 }, { "epoch": 0.26, "learning_rate": 0.0003761935328572575, "loss": 0.0691, "theoretical_loss": 3.4489381519691085, "tokens_seen": 2070937600 }, { "epoch": 0.26, "learning_rate": 0.00037615341410575305, "loss": 0.0673, "theoretical_loss": 3.448920226055777, "tokens_seen": 2071068672 }, { "epoch": 0.26, "learning_rate": 0.0003761132953542486, "loss": 0.0677, "theoretical_loss": 3.4489023015945213, "tokens_seen": 2071199744 }, { "epoch": 0.26, "learning_rate": 0.00037607317660274414, "loss": 0.0719, "theoretical_loss": 3.448884378585131, "tokens_seen": 2071330816 }, { "epoch": 0.26, "learning_rate": 0.0003760330578512397, "loss": 0.0708, "theoretical_loss": 3.448866457027397, "tokens_seen": 2071461888 }, { "epoch": 0.26, "learning_rate": 0.0003759929390997352, "loss": 0.0671, "theoretical_loss": 3.4488485369211093, "tokens_seen": 2071592960 }, { "epoch": 0.26, "learning_rate": 0.00037595282034823073, "loss": 0.0715, "theoretical_loss": 3.4488306182660597, "tokens_seen": 2071724032 }, { "epoch": 0.26, "learning_rate": 0.0003759127015967263, "loss": 0.0665, "theoretical_loss": 3.448812701062038, "tokens_seen": 2071855104 }, { "epoch": 0.26, "learning_rate": 0.00037587258284522187, "loss": 0.0701, "theoretical_loss": 3.448794785308835, "tokens_seen": 2071986176 }, { "epoch": 0.26, "learning_rate": 0.0003758324640937174, "loss": 0.0721, "theoretical_loss": 3.4487768710062414, "tokens_seen": 2072117248 }, { "epoch": 0.26, "learning_rate": 0.00037579234534221295, "loss": 0.0701, "theoretical_loss": 3.4487589581540483, "tokens_seen": 2072248320 }, { "epoch": 0.26, "learning_rate": 0.0003757522265907085, "loss": 0.0695, "theoretical_loss": 3.4487410467520463, "tokens_seen": 2072379392 }, { "epoch": 0.26, "learning_rate": 0.0003757121078392041, "loss": 0.0678, "theoretical_loss": 3.4487231368000266, "tokens_seen": 2072510464 }, { "epoch": 0.26, "objective/train/advantage_avg": 0.0015559203457087278, "objective/train/docs_used": 755744, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3379839658737183, "objective/train/original_loss": 1.3379839658737183, "objective/train/theoretical_loss": 3.4487052282977797, "objective/train/tokens_used": 443166176, "objective/train/value_avg": -0.006626129150390625, "objective/train/value_loss": 0.00010183933045482263, "objective/train/value_max": -0.00013768672943115234, "objective/train/value_min": -0.2237548828125, "objective/train/value_reward_corr": 0.690925835631694, "objective/train/value_std": 0.009490966796875, "objective/train/weight_avg": 1.0016027688980103, "objective/train/weighted_lm_loss": 1.3402736186981201, "objective/train/weights_max": 1.1563444137573242, "objective/train/weights_min": 0.37196043133735657, "theoretical_loss": 3.4487052282977797, "tokens_seen": 2072641536 }, { "epoch": 0.26, "learning_rate": 0.0003756719890876996, "loss": 0.068, "theoretical_loss": 3.4487052282977797, "tokens_seen": 2072641536 }, { "epoch": 0.26, "learning_rate": 0.00037563187033619517, "loss": 0.0683, "theoretical_loss": 3.448687321245097, "tokens_seen": 2072772608 }, { "epoch": 0.26, "learning_rate": 0.0003755917515846907, "loss": 0.0668, "theoretical_loss": 3.4486694156417688, "tokens_seen": 2072903680 }, { "epoch": 0.26, "learning_rate": 0.0003755516328331862, "loss": 0.0681, "theoretical_loss": 3.4486515114875873, "tokens_seen": 2073034752 }, { "epoch": 0.26, "learning_rate": 0.00037551151408168177, "loss": 0.0721, "theoretical_loss": 3.4486336087823424, "tokens_seen": 2073165824 }, { "epoch": 0.26, "learning_rate": 0.00037547139533017734, "loss": 0.071, "theoretical_loss": 3.448615707525826, "tokens_seen": 2073296896 }, { "epoch": 0.26, "learning_rate": 0.00037543127657867285, "loss": 0.0659, "theoretical_loss": 3.4485978077178294, "tokens_seen": 2073427968 }, { "epoch": 0.26, "learning_rate": 0.0003753911578271684, "loss": 0.0714, "theoretical_loss": 3.448579909358143, "tokens_seen": 2073559040 }, { "epoch": 0.26, "learning_rate": 0.000375351039075664, "loss": 0.0679, "theoretical_loss": 3.4485620124465592, "tokens_seen": 2073690112 }, { "epoch": 0.26, "learning_rate": 0.00037531092032415956, "loss": 0.0746, "theoretical_loss": 3.448544116982868, "tokens_seen": 2073821184 }, { "epoch": 0.26, "learning_rate": 0.00037527080157265507, "loss": 0.0741, "theoretical_loss": 3.448526222966862, "tokens_seen": 2073952256 }, { "epoch": 0.26, "learning_rate": 0.00037523068282115064, "loss": 0.0673, "theoretical_loss": 3.448508330398332, "tokens_seen": 2074083328 }, { "epoch": 0.26, "learning_rate": 0.00037519056406964615, "loss": 0.0699, "theoretical_loss": 3.4484904392770694, "tokens_seen": 2074214400 }, { "epoch": 0.26, "learning_rate": 0.00037515044531814167, "loss": 0.0676, "theoretical_loss": 3.448472549602866, "tokens_seen": 2074345472 }, { "epoch": 0.26, "learning_rate": 0.00037511032656663724, "loss": 0.0684, "theoretical_loss": 3.4484546613755134, "tokens_seen": 2074476544 }, { "epoch": 0.26, "learning_rate": 0.0003750702078151328, "loss": 0.0716, "theoretical_loss": 3.4484367745948026, "tokens_seen": 2074607616 }, { "epoch": 0.26, "learning_rate": 0.0003750300890636283, "loss": 0.0665, "theoretical_loss": 3.448418889260525, "tokens_seen": 2074738688 }, { "epoch": 0.26, "learning_rate": 0.0003749899703121239, "loss": 0.0661, "theoretical_loss": 3.4484010053724736, "tokens_seen": 2074869760 }, { "epoch": 0.26, "learning_rate": 0.00037494985156061945, "loss": 0.0703, "theoretical_loss": 3.448383122930439, "tokens_seen": 2075000832 }, { "epoch": 0.26, "learning_rate": 0.000374909732809115, "loss": 0.0673, "theoretical_loss": 3.448365241934214, "tokens_seen": 2075131904 }, { "epoch": 0.26, "learning_rate": 0.00037486961405761054, "loss": 0.0704, "theoretical_loss": 3.448347362383589, "tokens_seen": 2075262976 }, { "epoch": 0.26, "learning_rate": 0.0003748294953061061, "loss": 0.0691, "theoretical_loss": 3.4483294842783563, "tokens_seen": 2075394048 }, { "epoch": 0.26, "learning_rate": 0.0003747893765546016, "loss": 0.0687, "theoretical_loss": 3.4483116076183085, "tokens_seen": 2075525120 }, { "epoch": 0.26, "learning_rate": 0.00037474925780309713, "loss": 0.0645, "theoretical_loss": 3.4482937324032368, "tokens_seen": 2075656192 }, { "epoch": 0.26, "learning_rate": 0.0003747091390515927, "loss": 0.0693, "theoretical_loss": 3.4482758586329334, "tokens_seen": 2075787264 }, { "epoch": 0.26, "objective/train/advantage_avg": 0.00077469227835536, "objective/train/docs_used": 756913, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3792953491210938, "objective/train/original_loss": 1.3792953491210938, "objective/train/theoretical_loss": 3.4482579863071905, "objective/train/tokens_used": 446442976, "objective/train/value_avg": -0.005222320556640625, "objective/train/value_loss": 0.0003163651272188872, "objective/train/value_max": -9.608268737792969e-05, "objective/train/value_min": -0.410400390625, "objective/train/value_reward_corr": 0.43615886951447186, "objective/train/value_std": 0.0093231201171875, "objective/train/weight_avg": 1.0008985996246338, "objective/train/weighted_lm_loss": 1.3812917470932007, "objective/train/weights_max": 1.2136479616165161, "objective/train/weights_min": 0.22625350952148438, "theoretical_loss": 3.4482579863071905, "tokens_seen": 2075918336 }, { "epoch": 0.26, "learning_rate": 0.00037466902030008827, "loss": 0.0693, "theoretical_loss": 3.4482579863071905, "tokens_seen": 2075918336 }, { "epoch": 0.26, "learning_rate": 0.0003746289015485838, "loss": 0.0663, "theoretical_loss": 3.4482401154258, "tokens_seen": 2076049408 }, { "epoch": 0.26, "learning_rate": 0.00037458878279707935, "loss": 0.0687, "theoretical_loss": 3.4482222459885534, "tokens_seen": 2076180480 }, { "epoch": 0.26, "learning_rate": 0.0003745486640455749, "loss": 0.0669, "theoretical_loss": 3.4482043779952436, "tokens_seen": 2076311552 }, { "epoch": 0.26, "learning_rate": 0.0003745085452940705, "loss": 0.0705, "theoretical_loss": 3.4481865114456625, "tokens_seen": 2076442624 }, { "epoch": 0.26, "learning_rate": 0.000374468426542566, "loss": 0.0691, "theoretical_loss": 3.448168646339603, "tokens_seen": 2076573696 }, { "epoch": 0.26, "learning_rate": 0.00037442830779106157, "loss": 0.0683, "theoretical_loss": 3.448150782676856, "tokens_seen": 2076704768 }, { "epoch": 0.26, "learning_rate": 0.0003743881890395571, "loss": 0.0659, "theoretical_loss": 3.448132920457215, "tokens_seen": 2076835840 }, { "epoch": 0.26, "learning_rate": 0.0003743480702880526, "loss": 0.0657, "theoretical_loss": 3.4481150596804717, "tokens_seen": 2076966912 }, { "epoch": 0.26, "learning_rate": 0.00037430795153654817, "loss": 0.0692, "theoretical_loss": 3.448097200346419, "tokens_seen": 2077097984 }, { "epoch": 0.26, "learning_rate": 0.00037426783278504374, "loss": 0.0661, "theoretical_loss": 3.4480793424548493, "tokens_seen": 2077229056 }, { "epoch": 0.26, "learning_rate": 0.0003742277140335393, "loss": 0.0666, "theoretical_loss": 3.448061486005554, "tokens_seen": 2077360128 }, { "epoch": 0.26, "learning_rate": 0.0003741875952820348, "loss": 0.0678, "theoretical_loss": 3.4480436309983276, "tokens_seen": 2077491200 }, { "epoch": 0.26, "learning_rate": 0.0003741474765305304, "loss": 0.0694, "theoretical_loss": 3.448025777432961, "tokens_seen": 2077622272 }, { "epoch": 0.26, "learning_rate": 0.00037410735777902596, "loss": 0.0684, "theoretical_loss": 3.448007925309247, "tokens_seen": 2077753344 }, { "epoch": 0.26, "learning_rate": 0.00037406723902752147, "loss": 0.0741, "theoretical_loss": 3.4479900746269787, "tokens_seen": 2077884416 }, { "epoch": 0.26, "learning_rate": 0.00037402712027601704, "loss": 0.0701, "theoretical_loss": 3.447972225385949, "tokens_seen": 2078015488 }, { "epoch": 0.26, "learning_rate": 0.00037398700152451255, "loss": 0.0677, "theoretical_loss": 3.4479543775859502, "tokens_seen": 2078146560 }, { "epoch": 0.26, "learning_rate": 0.00037394688277300807, "loss": 0.0649, "theoretical_loss": 3.447936531226776, "tokens_seen": 2078277632 }, { "epoch": 0.26, "learning_rate": 0.00037390676402150364, "loss": 0.0714, "theoretical_loss": 3.447918686308217, "tokens_seen": 2078408704 }, { "epoch": 0.26, "learning_rate": 0.0003738666452699992, "loss": 0.068, "theoretical_loss": 3.4479008428300686, "tokens_seen": 2078539776 }, { "epoch": 0.26, "learning_rate": 0.0003738265265184948, "loss": 0.0682, "theoretical_loss": 3.447883000792122, "tokens_seen": 2078670848 }, { "epoch": 0.26, "learning_rate": 0.0003737864077669903, "loss": 0.0687, "theoretical_loss": 3.447865160194171, "tokens_seen": 2078801920 }, { "epoch": 0.26, "learning_rate": 0.00037374628901548586, "loss": 0.0694, "theoretical_loss": 3.4478473210360088, "tokens_seen": 2078932992 }, { "epoch": 0.26, "learning_rate": 0.0003737061702639814, "loss": 0.0675, "theoretical_loss": 3.447829483317428, "tokens_seen": 2079064064 }, { "epoch": 0.26, "objective/train/advantage_avg": 0.0005164333852007985, "objective/train/docs_used": 758165, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4010387659072876, "objective/train/original_loss": 1.4010388851165771, "objective/train/theoretical_loss": 3.447811647038221, "objective/train/tokens_used": 449719776, "objective/train/value_avg": -0.0099029541015625, "objective/train/value_loss": 0.0004460581112653017, "objective/train/value_max": -4.649162292480469e-05, "objective/train/value_min": -0.70556640625, "objective/train/value_reward_corr": 0.6760893441416981, "objective/train/value_std": 0.02227783203125, "objective/train/weight_avg": 1.0007216930389404, "objective/train/weighted_lm_loss": 1.4016330242156982, "objective/train/weights_max": 1.660217523574829, "objective/train/weights_min": 0.37631624937057495, "theoretical_loss": 3.447811647038221, "tokens_seen": 2079195136 }, { "epoch": 0.26, "learning_rate": 0.00037366605151247694, "loss": 0.0708, "theoretical_loss": 3.447811647038221, "tokens_seen": 2079195136 }, { "epoch": 0.26, "learning_rate": 0.0003736259327609725, "loss": 0.0653, "theoretical_loss": 3.4477938121981824, "tokens_seen": 2079326208 }, { "epoch": 0.26, "learning_rate": 0.000373585814009468, "loss": 0.0658, "theoretical_loss": 3.4477759787971043, "tokens_seen": 2079457280 }, { "epoch": 0.26, "learning_rate": 0.00037354569525796354, "loss": 0.0686, "theoretical_loss": 3.44775814683478, "tokens_seen": 2079588352 }, { "epoch": 0.26, "learning_rate": 0.0003735055765064591, "loss": 0.0662, "theoretical_loss": 3.4477403163110036, "tokens_seen": 2079719424 }, { "epoch": 0.26, "learning_rate": 0.00037346545775495467, "loss": 0.0742, "theoretical_loss": 3.447722487225567, "tokens_seen": 2079850496 }, { "epoch": 0.26, "learning_rate": 0.00037342533900345024, "loss": 0.0693, "theoretical_loss": 3.4477046595782648, "tokens_seen": 2079981568 }, { "epoch": 0.26, "learning_rate": 0.00037338522025194575, "loss": 0.0636, "theoretical_loss": 3.4476868333688904, "tokens_seen": 2080112640 }, { "epoch": 0.26, "learning_rate": 0.0003733451015004413, "loss": 0.0699, "theoretical_loss": 3.4476690085972357, "tokens_seen": 2080243712 }, { "epoch": 0.26, "learning_rate": 0.0003733049827489369, "loss": 0.0686, "theoretical_loss": 3.447651185263096, "tokens_seen": 2080374784 }, { "epoch": 0.26, "learning_rate": 0.0003732648639974324, "loss": 0.067, "theoretical_loss": 3.4476333633662635, "tokens_seen": 2080505856 }, { "epoch": 0.26, "learning_rate": 0.000373224745245928, "loss": 0.0664, "theoretical_loss": 3.447615542906532, "tokens_seen": 2080636928 }, { "epoch": 0.26, "learning_rate": 0.0003731846264944235, "loss": 0.0665, "theoretical_loss": 3.447597723883696, "tokens_seen": 2080768000 }, { "epoch": 0.26, "learning_rate": 0.000373144507742919, "loss": 0.0707, "theoretical_loss": 3.4475799062975483, "tokens_seen": 2080899072 }, { "epoch": 0.26, "learning_rate": 0.00037310438899141457, "loss": 0.0701, "theoretical_loss": 3.4475620901478825, "tokens_seen": 2081030144 }, { "epoch": 0.26, "learning_rate": 0.00037306427023991014, "loss": 0.0712, "theoretical_loss": 3.4475442754344927, "tokens_seen": 2081161216 }, { "epoch": 0.26, "learning_rate": 0.0003730241514884057, "loss": 0.0718, "theoretical_loss": 3.447526462157173, "tokens_seen": 2081292288 }, { "epoch": 0.26, "learning_rate": 0.0003729840327369012, "loss": 0.0658, "theoretical_loss": 3.447508650315716, "tokens_seen": 2081423360 }, { "epoch": 0.26, "learning_rate": 0.0003729439139853968, "loss": 0.0725, "theoretical_loss": 3.447490839909917, "tokens_seen": 2081554432 }, { "epoch": 0.26, "learning_rate": 0.00037290379523389236, "loss": 0.0668, "theoretical_loss": 3.4474730309395687, "tokens_seen": 2081685504 }, { "epoch": 0.26, "learning_rate": 0.00037286367648238787, "loss": 0.068, "theoretical_loss": 3.447455223404466, "tokens_seen": 2081816576 }, { "epoch": 0.26, "learning_rate": 0.00037282355773088344, "loss": 0.0684, "theoretical_loss": 3.4474374173044025, "tokens_seen": 2081947648 }, { "epoch": 0.26, "learning_rate": 0.000372783438979379, "loss": 0.0662, "theoretical_loss": 3.4474196126391714, "tokens_seen": 2082078720 }, { "epoch": 0.26, "learning_rate": 0.00037274332022787447, "loss": 0.0687, "theoretical_loss": 3.4474018094085683, "tokens_seen": 2082209792 }, { "epoch": 0.26, "learning_rate": 0.00037270320147637004, "loss": 0.0695, "theoretical_loss": 3.4473840076123863, "tokens_seen": 2082340864 }, { "epoch": 0.26, "objective/train/advantage_avg": 0.0006520000752061605, "objective/train/docs_used": 759184, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.303635835647583, "objective/train/original_loss": 1.303635835647583, "objective/train/theoretical_loss": 3.4473662072504196, "objective/train/tokens_used": 452996576, "objective/train/value_avg": -0.004886627197265625, "objective/train/value_loss": 0.00011258190352236852, "objective/train/value_max": -2.4139881134033203e-05, "objective/train/value_min": -0.1822509765625, "objective/train/value_reward_corr": 0.6200367030503149, "objective/train/value_std": 0.0086212158203125, "objective/train/weight_avg": 1.000706434249878, "objective/train/weighted_lm_loss": 1.3047555685043335, "objective/train/weights_max": 1.1645605564117432, "objective/train/weights_min": 0.5289608836174011, "theoretical_loss": 3.4473662072504196, "tokens_seen": 2082471936 }, { "epoch": 0.26, "learning_rate": 0.0003726630827248656, "loss": 0.0672, "theoretical_loss": 3.4473662072504196, "tokens_seen": 2082471936 }, { "epoch": 0.26, "learning_rate": 0.0003726229639733612, "loss": 0.0717, "theoretical_loss": 3.447348408322463, "tokens_seen": 2082603008 }, { "epoch": 0.26, "learning_rate": 0.0003725828452218567, "loss": 0.0683, "theoretical_loss": 3.4473306108283097, "tokens_seen": 2082734080 }, { "epoch": 0.26, "learning_rate": 0.00037254272647035226, "loss": 0.0713, "theoretical_loss": 3.4473128147677548, "tokens_seen": 2082865152 }, { "epoch": 0.26, "learning_rate": 0.0003725026077188478, "loss": 0.0671, "theoretical_loss": 3.4472950201405923, "tokens_seen": 2082996224 }, { "epoch": 0.26, "learning_rate": 0.00037246248896734334, "loss": 0.0668, "theoretical_loss": 3.4472772269466168, "tokens_seen": 2083127296 }, { "epoch": 0.26, "learning_rate": 0.0003724223702158389, "loss": 0.0664, "theoretical_loss": 3.4472594351856225, "tokens_seen": 2083258368 }, { "epoch": 0.26, "learning_rate": 0.0003723822514643345, "loss": 0.0675, "theoretical_loss": 3.447241644857404, "tokens_seen": 2083389440 }, { "epoch": 0.26, "learning_rate": 0.00037234213271282994, "loss": 0.0701, "theoretical_loss": 3.447223855961756, "tokens_seen": 2083520512 }, { "epoch": 0.26, "learning_rate": 0.0003723020139613255, "loss": 0.069, "theoretical_loss": 3.447206068498472, "tokens_seen": 2083651584 }, { "epoch": 0.26, "learning_rate": 0.0003722618952098211, "loss": 0.0671, "theoretical_loss": 3.447188282467348, "tokens_seen": 2083782656 }, { "epoch": 0.26, "learning_rate": 0.00037222177645831664, "loss": 0.069, "theoretical_loss": 3.4471704978681776, "tokens_seen": 2083913728 }, { "epoch": 0.26, "learning_rate": 0.00037218165770681216, "loss": 0.0686, "theoretical_loss": 3.4471527147007555, "tokens_seen": 2084044800 }, { "epoch": 0.26, "learning_rate": 0.0003721415389553077, "loss": 0.0712, "theoretical_loss": 3.447134932964877, "tokens_seen": 2084175872 }, { "epoch": 0.26, "learning_rate": 0.0003721014202038033, "loss": 0.0712, "theoretical_loss": 3.447117152660337, "tokens_seen": 2084306944 }, { "epoch": 0.26, "learning_rate": 0.0003720613014522988, "loss": 0.07, "theoretical_loss": 3.447099373786929, "tokens_seen": 2084438016 }, { "epoch": 0.26, "learning_rate": 0.0003720211827007944, "loss": 0.0682, "theoretical_loss": 3.447081596344449, "tokens_seen": 2084569088 }, { "epoch": 0.26, "learning_rate": 0.00037198106394928994, "loss": 0.0655, "theoretical_loss": 3.4470638203326915, "tokens_seen": 2084700160 }, { "epoch": 0.26, "learning_rate": 0.0003719409451977854, "loss": 0.0676, "theoretical_loss": 3.447046045751451, "tokens_seen": 2084831232 }, { "epoch": 0.26, "learning_rate": 0.00037190082644628097, "loss": 0.062, "theoretical_loss": 3.4470282726005235, "tokens_seen": 2084962304 }, { "epoch": 0.26, "learning_rate": 0.00037186070769477654, "loss": 0.0724, "theoretical_loss": 3.447010500879703, "tokens_seen": 2085093376 }, { "epoch": 0.26, "learning_rate": 0.0003718205889432721, "loss": 0.0736, "theoretical_loss": 3.446992730588785, "tokens_seen": 2085224448 }, { "epoch": 0.26, "learning_rate": 0.0003717804701917676, "loss": 0.0698, "theoretical_loss": 3.4469749617275642, "tokens_seen": 2085355520 }, { "epoch": 0.26, "learning_rate": 0.0003717403514402632, "loss": 0.07, "theoretical_loss": 3.4469571942958366, "tokens_seen": 2085486592 }, { "epoch": 0.26, "learning_rate": 0.00037170023268875876, "loss": 0.0695, "theoretical_loss": 3.4469394282933967, "tokens_seen": 2085617664 }, { "epoch": 0.26, "objective/train/advantage_avg": -0.0005239701131358743, "objective/train/docs_used": 760445, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4388078451156616, "objective/train/original_loss": 1.438807725906372, "objective/train/theoretical_loss": 3.4469216637200395, "objective/train/tokens_used": 456273376, "objective/train/value_avg": -0.00567626953125, "objective/train/value_loss": 0.0001230546913575381, "objective/train/value_max": -4.5418739318847656e-05, "objective/train/value_min": -0.223388671875, "objective/train/value_reward_corr": 0.7061940361195937, "objective/train/value_std": 0.00942230224609375, "objective/train/weight_avg": 0.99953293800354, "objective/train/weighted_lm_loss": 1.4384214878082275, "objective/train/weights_max": 1.1131106615066528, "objective/train/weights_min": 0.3683748245239258, "theoretical_loss": 3.4469216637200395, "tokens_seen": 2085748736 }, { "epoch": 0.26, "learning_rate": 0.0003716601139372543, "loss": 0.0699, "theoretical_loss": 3.4469216637200395, "tokens_seen": 2085748736 }, { "epoch": 0.26, "learning_rate": 0.00037161999518574984, "loss": 0.0686, "theoretical_loss": 3.4469039005755606, "tokens_seen": 2085879808 }, { "epoch": 0.26, "learning_rate": 0.0003715798764342454, "loss": 0.0706, "theoretical_loss": 3.4468861388597554, "tokens_seen": 2086010880 }, { "epoch": 0.26, "learning_rate": 0.0003715397576827409, "loss": 0.067, "theoretical_loss": 3.446868378572419, "tokens_seen": 2086141952 }, { "epoch": 0.26, "learning_rate": 0.00037149963893123644, "loss": 0.0651, "theoretical_loss": 3.446850619713347, "tokens_seen": 2086273024 }, { "epoch": 0.26, "learning_rate": 0.000371459520179732, "loss": 0.0669, "theoretical_loss": 3.446832862282334, "tokens_seen": 2086404096 }, { "epoch": 0.26, "learning_rate": 0.0003714194014282276, "loss": 0.0656, "theoretical_loss": 3.4468151062791765, "tokens_seen": 2086535168 }, { "epoch": 0.26, "learning_rate": 0.0003713792826767231, "loss": 0.0723, "theoretical_loss": 3.44679735170367, "tokens_seen": 2086666240 }, { "epoch": 0.26, "learning_rate": 0.00037133916392521866, "loss": 0.066, "theoretical_loss": 3.446779598555609, "tokens_seen": 2086797312 }, { "epoch": 0.26, "learning_rate": 0.0003712990451737142, "loss": 0.0674, "theoretical_loss": 3.4467618468347903, "tokens_seen": 2086928384 }, { "epoch": 0.26, "learning_rate": 0.00037125892642220974, "loss": 0.0713, "theoretical_loss": 3.4467440965410088, "tokens_seen": 2087059456 }, { "epoch": 0.26, "learning_rate": 0.0003712188076707053, "loss": 0.0717, "theoretical_loss": 3.44672634767406, "tokens_seen": 2087190528 }, { "epoch": 0.27, "learning_rate": 0.0003711786889192009, "loss": 0.0694, "theoretical_loss": 3.446708600233741, "tokens_seen": 2087321600 }, { "epoch": 0.27, "learning_rate": 0.0003711385701676964, "loss": 0.0668, "theoretical_loss": 3.4466908542198453, "tokens_seen": 2087452672 }, { "epoch": 0.27, "learning_rate": 0.0003710984514161919, "loss": 0.0701, "theoretical_loss": 3.4466731096321706, "tokens_seen": 2087583744 }, { "epoch": 0.27, "learning_rate": 0.0003710583326646875, "loss": 0.0676, "theoretical_loss": 3.4466553664705124, "tokens_seen": 2087714816 }, { "epoch": 0.27, "learning_rate": 0.00037101821391318304, "loss": 0.0647, "theoretical_loss": 3.4466376247346657, "tokens_seen": 2087845888 }, { "epoch": 0.27, "learning_rate": 0.00037097809516167856, "loss": 0.0685, "theoretical_loss": 3.446619884424427, "tokens_seen": 2087976960 }, { "epoch": 0.27, "learning_rate": 0.0003709379764101741, "loss": 0.0703, "theoretical_loss": 3.4466021455395923, "tokens_seen": 2088108032 }, { "epoch": 0.27, "learning_rate": 0.0003708978576586697, "loss": 0.0656, "theoretical_loss": 3.446584408079958, "tokens_seen": 2088239104 }, { "epoch": 0.27, "learning_rate": 0.0003708577389071652, "loss": 0.07, "theoretical_loss": 3.446566672045319, "tokens_seen": 2088370176 }, { "epoch": 0.27, "learning_rate": 0.0003708176201556608, "loss": 0.0711, "theoretical_loss": 3.4465489374354727, "tokens_seen": 2088501248 }, { "epoch": 0.27, "learning_rate": 0.00037077750140415634, "loss": 0.066, "theoretical_loss": 3.4465312042502143, "tokens_seen": 2088632320 }, { "epoch": 0.27, "learning_rate": 0.00037073738265265186, "loss": 0.0696, "theoretical_loss": 3.4465134724893405, "tokens_seen": 2088763392 }, { "epoch": 0.27, "learning_rate": 0.00037069726390114737, "loss": 0.0681, "theoretical_loss": 3.446495742152647, "tokens_seen": 2088894464 }, { "epoch": 0.27, "objective/train/advantage_avg": 0.0007204703870229423, "objective/train/docs_used": 761616, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2865874767303467, "objective/train/original_loss": 1.2865874767303467, "objective/train/theoretical_loss": 3.446478013239931, "objective/train/tokens_used": 459550176, "objective/train/value_avg": -0.0085601806640625, "objective/train/value_loss": 0.00021204305812716484, "objective/train/value_max": -7.724761962890625e-05, "objective/train/value_min": -0.6162109375, "objective/train/value_reward_corr": 0.6969658744125964, "objective/train/value_std": 0.0143585205078125, "objective/train/weight_avg": 1.0008164644241333, "objective/train/weighted_lm_loss": 1.2873796224594116, "objective/train/weights_max": 1.118717074394226, "objective/train/weights_min": 0.36836710572242737, "theoretical_loss": 3.446478013239931, "tokens_seen": 2089025536 }, { "epoch": 0.27, "learning_rate": 0.00037065714514964294, "loss": 0.0667, "theoretical_loss": 3.446478013239931, "tokens_seen": 2089025536 }, { "epoch": 0.27, "learning_rate": 0.0003706170263981385, "loss": 0.0696, "theoretical_loss": 3.4464602857509874, "tokens_seen": 2089156608 }, { "epoch": 0.27, "learning_rate": 0.000370576907646634, "loss": 0.0626, "theoretical_loss": 3.4464425596856136, "tokens_seen": 2089287680 }, { "epoch": 0.27, "learning_rate": 0.0003705367888951296, "loss": 0.0714, "theoretical_loss": 3.446424835043606, "tokens_seen": 2089418752 }, { "epoch": 0.27, "learning_rate": 0.00037049667014362516, "loss": 0.068, "theoretical_loss": 3.446407111824761, "tokens_seen": 2089549824 }, { "epoch": 0.27, "learning_rate": 0.0003704565513921207, "loss": 0.0743, "theoretical_loss": 3.446389390028874, "tokens_seen": 2089680896 }, { "epoch": 0.27, "learning_rate": 0.00037041643264061624, "loss": 0.0647, "theoretical_loss": 3.4463716696557425, "tokens_seen": 2089811968 }, { "epoch": 0.27, "learning_rate": 0.0003703763138891118, "loss": 0.0685, "theoretical_loss": 3.4463539507051633, "tokens_seen": 2089943040 }, { "epoch": 0.27, "learning_rate": 0.0003703361951376073, "loss": 0.0714, "theoretical_loss": 3.4463362331769325, "tokens_seen": 2090074112 }, { "epoch": 0.27, "learning_rate": 0.00037029607638610284, "loss": 0.0687, "theoretical_loss": 3.4463185170708464, "tokens_seen": 2090205184 }, { "epoch": 0.27, "learning_rate": 0.0003702559576345984, "loss": 0.0692, "theoretical_loss": 3.4463008023867028, "tokens_seen": 2090336256 }, { "epoch": 0.27, "learning_rate": 0.000370215838883094, "loss": 0.0688, "theoretical_loss": 3.446283089124297, "tokens_seen": 2090467328 }, { "epoch": 0.27, "learning_rate": 0.0003701757201315895, "loss": 0.0699, "theoretical_loss": 3.4462653772834266, "tokens_seen": 2090598400 }, { "epoch": 0.27, "learning_rate": 0.00037013560138008506, "loss": 0.0707, "theoretical_loss": 3.4462476668638886, "tokens_seen": 2090729472 }, { "epoch": 0.27, "learning_rate": 0.00037009548262858063, "loss": 0.0693, "theoretical_loss": 3.446229957865479, "tokens_seen": 2090860544 }, { "epoch": 0.27, "learning_rate": 0.00037005536387707614, "loss": 0.0687, "theoretical_loss": 3.4462122502879957, "tokens_seen": 2090991616 }, { "epoch": 0.27, "learning_rate": 0.0003700152451255717, "loss": 0.0691, "theoretical_loss": 3.4461945441312354, "tokens_seen": 2091122688 }, { "epoch": 0.27, "learning_rate": 0.0003699751263740673, "loss": 0.0724, "theoretical_loss": 3.4461768393949943, "tokens_seen": 2091253760 }, { "epoch": 0.27, "learning_rate": 0.0003699350076225628, "loss": 0.0692, "theoretical_loss": 3.4461591360790704, "tokens_seen": 2091384832 }, { "epoch": 0.27, "learning_rate": 0.0003698948888710583, "loss": 0.0692, "theoretical_loss": 3.44614143418326, "tokens_seen": 2091515904 }, { "epoch": 0.27, "learning_rate": 0.0003698547701195539, "loss": 0.068, "theoretical_loss": 3.44612373370736, "tokens_seen": 2091646976 }, { "epoch": 0.27, "learning_rate": 0.00036981465136804944, "loss": 0.0689, "theoretical_loss": 3.4461060346511685, "tokens_seen": 2091778048 }, { "epoch": 0.27, "learning_rate": 0.00036977453261654496, "loss": 0.0693, "theoretical_loss": 3.446088337014482, "tokens_seen": 2091909120 }, { "epoch": 0.27, "learning_rate": 0.0003697344138650405, "loss": 0.0688, "theoretical_loss": 3.4460706407970982, "tokens_seen": 2092040192 }, { "epoch": 0.27, "learning_rate": 0.0003696942951135361, "loss": 0.0675, "theoretical_loss": 3.446052945998814, "tokens_seen": 2092171264 }, { "epoch": 0.27, "objective/train/advantage_avg": 0.0012236007023602724, "objective/train/docs_used": 762773, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.486613392829895, "objective/train/original_loss": 1.4866135120391846, "objective/train/theoretical_loss": 3.446035252619427, "objective/train/tokens_used": 462826976, "objective/train/value_avg": -0.009521484375, "objective/train/value_loss": 0.00021660851780325174, "objective/train/value_max": -8.887052536010742e-05, "objective/train/value_min": -0.5751953125, "objective/train/value_reward_corr": 0.6955922489067047, "objective/train/value_std": 0.01465606689453125, "objective/train/weight_avg": 1.0013253688812256, "objective/train/weighted_lm_loss": 1.4885613918304443, "objective/train/weights_max": 1.2346844673156738, "objective/train/weights_min": 0.3840547502040863, "theoretical_loss": 3.446035252619427, "tokens_seen": 2092302336 }, { "epoch": 0.27, "learning_rate": 0.0003696541763620316, "loss": 0.0701, "theoretical_loss": 3.446035252619427, "tokens_seen": 2092302336 }, { "epoch": 0.27, "learning_rate": 0.0003696140576105272, "loss": 0.0698, "theoretical_loss": 3.446017560658734, "tokens_seen": 2092433408 }, { "epoch": 0.27, "learning_rate": 0.00036957393885902275, "loss": 0.0676, "theoretical_loss": 3.4459998701165326, "tokens_seen": 2092564480 }, { "epoch": 0.27, "learning_rate": 0.00036953382010751826, "loss": 0.0688, "theoretical_loss": 3.4459821809926208, "tokens_seen": 2092695552 }, { "epoch": 0.27, "learning_rate": 0.0003694937013560138, "loss": 0.0654, "theoretical_loss": 3.4459644932867954, "tokens_seen": 2092826624 }, { "epoch": 0.27, "learning_rate": 0.00036945358260450934, "loss": 0.0663, "theoretical_loss": 3.4459468069988537, "tokens_seen": 2092957696 }, { "epoch": 0.27, "learning_rate": 0.0003694134638530049, "loss": 0.0705, "theoretical_loss": 3.445929122128594, "tokens_seen": 2093088768 }, { "epoch": 0.27, "learning_rate": 0.0003693733451015004, "loss": 0.0701, "theoretical_loss": 3.445911438675814, "tokens_seen": 2093219840 }, { "epoch": 0.27, "learning_rate": 0.000369333226349996, "loss": 0.067, "theoretical_loss": 3.4458937566403103, "tokens_seen": 2093350912 }, { "epoch": 0.27, "learning_rate": 0.00036929310759849156, "loss": 0.0689, "theoretical_loss": 3.445876076021882, "tokens_seen": 2093481984 }, { "epoch": 0.27, "learning_rate": 0.00036925298884698713, "loss": 0.0711, "theoretical_loss": 3.4458583968203254, "tokens_seen": 2093613056 }, { "epoch": 0.27, "learning_rate": 0.00036921287009548264, "loss": 0.0708, "theoretical_loss": 3.4458407190354388, "tokens_seen": 2093744128 }, { "epoch": 0.27, "learning_rate": 0.0003691727513439782, "loss": 0.0675, "theoretical_loss": 3.4458230426670204, "tokens_seen": 2093875200 }, { "epoch": 0.27, "learning_rate": 0.0003691326325924737, "loss": 0.0628, "theoretical_loss": 3.445805367714868, "tokens_seen": 2094006272 }, { "epoch": 0.27, "learning_rate": 0.00036909251384096924, "loss": 0.0714, "theoretical_loss": 3.4457876941787786, "tokens_seen": 2094137344 }, { "epoch": 0.27, "learning_rate": 0.0003690523950894648, "loss": 0.0716, "theoretical_loss": 3.445770022058551, "tokens_seen": 2094268416 }, { "epoch": 0.27, "learning_rate": 0.0003690122763379604, "loss": 0.0721, "theoretical_loss": 3.445752351353983, "tokens_seen": 2094399488 }, { "epoch": 0.27, "learning_rate": 0.0003689721575864559, "loss": 0.0716, "theoretical_loss": 3.445734682064873, "tokens_seen": 2094530560 }, { "epoch": 0.27, "learning_rate": 0.00036893203883495146, "loss": 0.0664, "theoretical_loss": 3.445717014191018, "tokens_seen": 2094661632 }, { "epoch": 0.27, "learning_rate": 0.00036889192008344703, "loss": 0.0715, "theoretical_loss": 3.4456993477322166, "tokens_seen": 2094792704 }, { "epoch": 0.27, "learning_rate": 0.0003688518013319426, "loss": 0.0646, "theoretical_loss": 3.4456816826882672, "tokens_seen": 2094923776 }, { "epoch": 0.27, "learning_rate": 0.0003688116825804381, "loss": 0.0708, "theoretical_loss": 3.4456640190589676, "tokens_seen": 2095054848 }, { "epoch": 0.27, "learning_rate": 0.0003687715638289337, "loss": 0.0674, "theoretical_loss": 3.4456463568441165, "tokens_seen": 2095185920 }, { "epoch": 0.27, "learning_rate": 0.0003687314450774292, "loss": 0.0655, "theoretical_loss": 3.445628696043512, "tokens_seen": 2095316992 }, { "epoch": 0.27, "learning_rate": 0.0003686913263259247, "loss": 0.065, "theoretical_loss": 3.4456110366569517, "tokens_seen": 2095448064 }, { "epoch": 0.27, "objective/train/advantage_avg": 0.00015580607578158379, "objective/train/docs_used": 764034, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2378027439117432, "objective/train/original_loss": 1.2378026247024536, "objective/train/theoretical_loss": 3.4455933786842348, "objective/train/tokens_used": 466103776, "objective/train/value_avg": -0.00687408447265625, "objective/train/value_loss": 0.0002478698443155736, "objective/train/value_max": -5.066394805908203e-05, "objective/train/value_min": -0.7451171875, "objective/train/value_reward_corr": 0.7613647753616819, "objective/train/value_std": 0.018218994140625, "objective/train/weight_avg": 1.0002690553665161, "objective/train/weighted_lm_loss": 1.2381939888000488, "objective/train/weights_max": 1.776610016822815, "objective/train/weights_min": 0.37362140417099, "theoretical_loss": 3.4455933786842348, "tokens_seen": 2095579136 }, { "epoch": 0.27, "learning_rate": 0.0003686512075744203, "loss": 0.0689, "theoretical_loss": 3.4455933786842348, "tokens_seen": 2095579136 }, { "epoch": 0.27, "learning_rate": 0.00036861108882291584, "loss": 0.0664, "theoretical_loss": 3.4455757221251595, "tokens_seen": 2095710208 }, { "epoch": 0.27, "learning_rate": 0.00036857097007141136, "loss": 0.068, "theoretical_loss": 3.4455580669795243, "tokens_seen": 2095841280 }, { "epoch": 0.27, "learning_rate": 0.00036853085131990693, "loss": 0.0722, "theoretical_loss": 3.4455404132471275, "tokens_seen": 2095972352 }, { "epoch": 0.27, "learning_rate": 0.0003684907325684025, "loss": 0.0693, "theoretical_loss": 3.4455227609277674, "tokens_seen": 2096103424 }, { "epoch": 0.27, "learning_rate": 0.00036845061381689806, "loss": 0.0683, "theoretical_loss": 3.4455051100212426, "tokens_seen": 2096234496 }, { "epoch": 0.27, "learning_rate": 0.0003684104950653936, "loss": 0.0686, "theoretical_loss": 3.445487460527352, "tokens_seen": 2096365568 }, { "epoch": 0.27, "learning_rate": 0.00036837037631388915, "loss": 0.0686, "theoretical_loss": 3.4454698124458942, "tokens_seen": 2096496640 }, { "epoch": 0.27, "learning_rate": 0.00036833025756238466, "loss": 0.0703, "theoretical_loss": 3.4454521657766675, "tokens_seen": 2096627712 }, { "epoch": 0.27, "learning_rate": 0.0003682901388108802, "loss": 0.0659, "theoretical_loss": 3.445434520519471, "tokens_seen": 2096758784 }, { "epoch": 0.27, "learning_rate": 0.00036825002005937574, "loss": 0.0702, "theoretical_loss": 3.445416876674104, "tokens_seen": 2096889856 }, { "epoch": 0.27, "learning_rate": 0.0003682099013078713, "loss": 0.0689, "theoretical_loss": 3.445399234240363, "tokens_seen": 2097020928 }, { "epoch": 0.27, "learning_rate": 0.0003681697825563668, "loss": 0.0691, "theoretical_loss": 3.44538159321805, "tokens_seen": 2097152000 }, { "epoch": 0.27, "learning_rate": 0.0003681296638048624, "loss": 0.0702, "theoretical_loss": 3.445363953606962, "tokens_seen": 2097283072 }, { "epoch": 0.27, "learning_rate": 0.00036808954505335796, "loss": 0.0666, "theoretical_loss": 3.4453463154068977, "tokens_seen": 2097414144 }, { "epoch": 0.27, "learning_rate": 0.00036804942630185353, "loss": 0.0724, "theoretical_loss": 3.445328678617657, "tokens_seen": 2097545216 }, { "epoch": 0.27, "learning_rate": 0.00036800930755034905, "loss": 0.068, "theoretical_loss": 3.4453110432390384, "tokens_seen": 2097676288 }, { "epoch": 0.27, "learning_rate": 0.0003679691887988446, "loss": 0.0704, "theoretical_loss": 3.445293409270841, "tokens_seen": 2097807360 }, { "epoch": 0.27, "learning_rate": 0.00036792907004734013, "loss": 0.0669, "theoretical_loss": 3.4452757767128643, "tokens_seen": 2097938432 }, { "epoch": 0.27, "learning_rate": 0.00036788895129583564, "loss": 0.0672, "theoretical_loss": 3.4452581455649067, "tokens_seen": 2098069504 }, { "epoch": 0.27, "learning_rate": 0.0003678488325443312, "loss": 0.0685, "theoretical_loss": 3.445240515826768, "tokens_seen": 2098200576 }, { "epoch": 0.27, "learning_rate": 0.0003678087137928268, "loss": 0.0695, "theoretical_loss": 3.4452228874982467, "tokens_seen": 2098331648 }, { "epoch": 0.27, "learning_rate": 0.0003677685950413223, "loss": 0.0691, "theoretical_loss": 3.4452052605791432, "tokens_seen": 2098462720 }, { "epoch": 0.27, "learning_rate": 0.00036772847628981786, "loss": 0.0626, "theoretical_loss": 3.4451876350692556, "tokens_seen": 2098593792 }, { "epoch": 0.27, "learning_rate": 0.00036768835753831343, "loss": 0.0676, "theoretical_loss": 3.4451700109683836, "tokens_seen": 2098724864 }, { "epoch": 0.27, "objective/train/advantage_avg": 0.0007613692432641983, "objective/train/docs_used": 764825, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.478780746459961, "objective/train/original_loss": 1.4787805080413818, "objective/train/theoretical_loss": 3.4451523882763273, "objective/train/tokens_used": 469380576, "objective/train/value_avg": -0.00847625732421875, "objective/train/value_loss": 0.00032453035237267613, "objective/train/value_max": -8.350610733032227e-05, "objective/train/value_min": -0.6376953125, "objective/train/value_reward_corr": 0.7295066285375629, "objective/train/value_std": 0.0185089111328125, "objective/train/weight_avg": 1.000903844833374, "objective/train/weighted_lm_loss": 1.4798256158828735, "objective/train/weights_max": 1.3752059936523438, "objective/train/weights_min": 0.39122337102890015, "theoretical_loss": 3.4451523882763273, "tokens_seen": 2098855936 }, { "epoch": 0.27, "learning_rate": 0.000367648238786809, "loss": 0.0742, "theoretical_loss": 3.4451523882763273, "tokens_seen": 2098855936 }, { "epoch": 0.27, "learning_rate": 0.0003676081200353045, "loss": 0.0675, "theoretical_loss": 3.4451347669928856, "tokens_seen": 2098987008 }, { "epoch": 0.27, "learning_rate": 0.0003675680012838001, "loss": 0.0695, "theoretical_loss": 3.445117147117857, "tokens_seen": 2099118080 }, { "epoch": 0.27, "learning_rate": 0.0003675278825322956, "loss": 0.0671, "theoretical_loss": 3.4450995286510424, "tokens_seen": 2099249152 }, { "epoch": 0.27, "learning_rate": 0.0003674877637807911, "loss": 0.068, "theoretical_loss": 3.445081911592241, "tokens_seen": 2099380224 }, { "epoch": 0.27, "learning_rate": 0.0003674476450292867, "loss": 0.0708, "theoretical_loss": 3.445064295941252, "tokens_seen": 2099511296 }, { "epoch": 0.27, "learning_rate": 0.00036740752627778225, "loss": 0.0655, "theoretical_loss": 3.445046681697875, "tokens_seen": 2099642368 }, { "epoch": 0.27, "learning_rate": 0.00036736740752627776, "loss": 0.0684, "theoretical_loss": 3.4450290688619103, "tokens_seen": 2099773440 }, { "epoch": 0.27, "learning_rate": 0.00036732728877477333, "loss": 0.0669, "theoretical_loss": 3.4450114574331567, "tokens_seen": 2099904512 }, { "epoch": 0.27, "learning_rate": 0.0003672871700232689, "loss": 0.0672, "theoretical_loss": 3.444993847411415, "tokens_seen": 2100035584 }, { "epoch": 0.27, "learning_rate": 0.00036724705127176447, "loss": 0.0671, "theoretical_loss": 3.444976238796484, "tokens_seen": 2100166656 }, { "epoch": 0.27, "learning_rate": 0.00036720693252026, "loss": 0.0688, "theoretical_loss": 3.4449586315881637, "tokens_seen": 2100297728 }, { "epoch": 0.27, "learning_rate": 0.00036716681376875555, "loss": 0.0642, "theoretical_loss": 3.4449410257862545, "tokens_seen": 2100428800 }, { "epoch": 0.27, "learning_rate": 0.00036712669501725106, "loss": 0.0665, "theoretical_loss": 3.4449234213905564, "tokens_seen": 2100559872 }, { "epoch": 0.27, "learning_rate": 0.0003670865762657466, "loss": 0.0686, "theoretical_loss": 3.4449058184008683, "tokens_seen": 2100690944 }, { "epoch": 0.27, "learning_rate": 0.00036704645751424214, "loss": 0.0658, "theoretical_loss": 3.4448882168169908, "tokens_seen": 2100822016 }, { "epoch": 0.27, "learning_rate": 0.0003670063387627377, "loss": 0.0651, "theoretical_loss": 3.4448706166387244, "tokens_seen": 2100953088 }, { "epoch": 0.27, "learning_rate": 0.00036696622001123323, "loss": 0.0633, "theoretical_loss": 3.444853017865869, "tokens_seen": 2101084160 }, { "epoch": 0.27, "learning_rate": 0.0003669261012597288, "loss": 0.0637, "theoretical_loss": 3.4448354204982237, "tokens_seen": 2101215232 }, { "epoch": 0.27, "learning_rate": 0.00036688598250822436, "loss": 0.0679, "theoretical_loss": 3.4448178245355896, "tokens_seen": 2101346304 }, { "epoch": 0.27, "learning_rate": 0.00036684586375671993, "loss": 0.0662, "theoretical_loss": 3.444800229977767, "tokens_seen": 2101477376 }, { "epoch": 0.27, "learning_rate": 0.00036680574500521545, "loss": 0.0676, "theoretical_loss": 3.4447826368245558, "tokens_seen": 2101608448 }, { "epoch": 0.27, "learning_rate": 0.000366765626253711, "loss": 0.0675, "theoretical_loss": 3.444765045075756, "tokens_seen": 2101739520 }, { "epoch": 0.27, "learning_rate": 0.00036672550750220653, "loss": 0.0639, "theoretical_loss": 3.4447474547311683, "tokens_seen": 2101870592 }, { "epoch": 0.27, "learning_rate": 0.00036668538875070204, "loss": 0.0678, "theoretical_loss": 3.444729865790593, "tokens_seen": 2102001664 }, { "epoch": 0.27, "objective/train/advantage_avg": -0.0013871404808014631, "objective/train/docs_used": 765966, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.333207607269287, "objective/train/original_loss": 1.333207607269287, "objective/train/theoretical_loss": 3.4447122782538306, "objective/train/tokens_used": 472657376, "objective/train/value_avg": -0.007678985595703125, "objective/train/value_loss": 0.00022608257131651044, "objective/train/value_max": -4.2319297790527344e-05, "objective/train/value_min": -0.481689453125, "objective/train/value_reward_corr": 0.6696205806776884, "objective/train/value_std": 0.01371002197265625, "objective/train/weight_avg": 0.9987199306488037, "objective/train/weighted_lm_loss": 1.3315794467926025, "objective/train/weights_max": 1.3142428398132324, "objective/train/weights_min": 0.40482643246650696, "theoretical_loss": 3.4447122782538306, "tokens_seen": 2102132736 }, { "epoch": 0.27, "learning_rate": 0.0003666452699991976, "loss": 0.066, "theoretical_loss": 3.4447122782538306, "tokens_seen": 2102132736 }, { "epoch": 0.27, "learning_rate": 0.0003666051512476932, "loss": 0.0703, "theoretical_loss": 3.4446946921206814, "tokens_seen": 2102263808 }, { "epoch": 0.27, "learning_rate": 0.00036656503249618875, "loss": 0.0729, "theoretical_loss": 3.444677107390946, "tokens_seen": 2102394880 }, { "epoch": 0.27, "learning_rate": 0.00036652491374468426, "loss": 0.0683, "theoretical_loss": 3.4446595240644244, "tokens_seen": 2102525952 }, { "epoch": 0.27, "learning_rate": 0.00036648479499317983, "loss": 0.0682, "theoretical_loss": 3.444641942140918, "tokens_seen": 2102657024 }, { "epoch": 0.27, "learning_rate": 0.0003664446762416754, "loss": 0.0687, "theoretical_loss": 3.444624361620227, "tokens_seen": 2102788096 }, { "epoch": 0.27, "learning_rate": 0.0003664045574901709, "loss": 0.071, "theoretical_loss": 3.4446067825021514, "tokens_seen": 2102919168 }, { "epoch": 0.27, "learning_rate": 0.0003663644387386665, "loss": 0.068, "theoretical_loss": 3.444589204786493, "tokens_seen": 2103050240 }, { "epoch": 0.27, "learning_rate": 0.000366324319987162, "loss": 0.0684, "theoretical_loss": 3.444571628473052, "tokens_seen": 2103181312 }, { "epoch": 0.27, "learning_rate": 0.0003662842012356575, "loss": 0.0651, "theoretical_loss": 3.4445540535616295, "tokens_seen": 2103312384 }, { "epoch": 0.27, "learning_rate": 0.0003662440824841531, "loss": 0.0642, "theoretical_loss": 3.4445364800520255, "tokens_seen": 2103443456 }, { "epoch": 0.27, "learning_rate": 0.00036620396373264865, "loss": 0.07, "theoretical_loss": 3.4445189079440417, "tokens_seen": 2103574528 }, { "epoch": 0.28, "learning_rate": 0.0003661638449811442, "loss": 0.068, "theoretical_loss": 3.444501337237478, "tokens_seen": 2103705600 }, { "epoch": 0.28, "learning_rate": 0.00036612372622963973, "loss": 0.0689, "theoretical_loss": 3.4444837679321365, "tokens_seen": 2103836672 }, { "epoch": 0.28, "learning_rate": 0.0003660836074781353, "loss": 0.0664, "theoretical_loss": 3.4444662000278177, "tokens_seen": 2103967744 }, { "epoch": 0.28, "learning_rate": 0.00036604348872663087, "loss": 0.0654, "theoretical_loss": 3.4444486335243223, "tokens_seen": 2104098816 }, { "epoch": 0.28, "learning_rate": 0.0003660033699751264, "loss": 0.0665, "theoretical_loss": 3.4444310684214514, "tokens_seen": 2104229888 }, { "epoch": 0.28, "learning_rate": 0.00036596325122362195, "loss": 0.0667, "theoretical_loss": 3.444413504719007, "tokens_seen": 2104360960 }, { "epoch": 0.28, "learning_rate": 0.00036592313247211746, "loss": 0.0716, "theoretical_loss": 3.444395942416789, "tokens_seen": 2104492032 }, { "epoch": 0.28, "learning_rate": 0.000365883013720613, "loss": 0.0661, "theoretical_loss": 3.444378381514599, "tokens_seen": 2104623104 }, { "epoch": 0.28, "learning_rate": 0.00036584289496910855, "loss": 0.0623, "theoretical_loss": 3.4443608220122384, "tokens_seen": 2104754176 }, { "epoch": 0.28, "learning_rate": 0.0003658027762176041, "loss": 0.0669, "theoretical_loss": 3.4443432639095084, "tokens_seen": 2104885248 }, { "epoch": 0.28, "learning_rate": 0.0003657626574660997, "loss": 0.0656, "theoretical_loss": 3.44432570720621, "tokens_seen": 2105016320 }, { "epoch": 0.28, "learning_rate": 0.0003657225387145952, "loss": 0.0661, "theoretical_loss": 3.4443081519021455, "tokens_seen": 2105147392 }, { "epoch": 0.28, "learning_rate": 0.00036568241996309076, "loss": 0.0659, "theoretical_loss": 3.4442905979971146, "tokens_seen": 2105278464 }, { "epoch": 0.28, "objective/train/advantage_avg": 0.0011220506858080626, "objective/train/docs_used": 767157, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3012733459472656, "objective/train/original_loss": 1.3012734651565552, "objective/train/theoretical_loss": 3.4442730454909203, "objective/train/tokens_used": 475934176, "objective/train/value_avg": -0.00641632080078125, "objective/train/value_loss": 0.0001616570370970294, "objective/train/value_max": -7.086992263793945e-05, "objective/train/value_min": -0.2509765625, "objective/train/value_reward_corr": 0.5754473664931911, "objective/train/value_std": 0.010101318359375, "objective/train/weight_avg": 1.0011937618255615, "objective/train/weighted_lm_loss": 1.301928162574768, "objective/train/weights_max": 1.245431900024414, "objective/train/weights_min": 0.36834144592285156, "theoretical_loss": 3.4442730454909203, "tokens_seen": 2105409536 }, { "epoch": 0.28, "learning_rate": 0.00036564230121158633, "loss": 0.0664, "theoretical_loss": 3.4442730454909203, "tokens_seen": 2105409536 }, { "epoch": 0.28, "learning_rate": 0.00036560218246008185, "loss": 0.07, "theoretical_loss": 3.4442554943833628, "tokens_seen": 2105540608 }, { "epoch": 0.28, "learning_rate": 0.0003655620637085774, "loss": 0.0667, "theoretical_loss": 3.444237944674245, "tokens_seen": 2105671680 }, { "epoch": 0.28, "learning_rate": 0.00036552194495707293, "loss": 0.0684, "theoretical_loss": 3.444220396363367, "tokens_seen": 2105802752 }, { "epoch": 0.28, "learning_rate": 0.00036548182620556844, "loss": 0.0712, "theoretical_loss": 3.444202849450531, "tokens_seen": 2105933824 }, { "epoch": 0.28, "learning_rate": 0.000365441707454064, "loss": 0.0693, "theoretical_loss": 3.444185303935539, "tokens_seen": 2106064896 }, { "epoch": 0.28, "learning_rate": 0.0003654015887025596, "loss": 0.0658, "theoretical_loss": 3.4441677598181926, "tokens_seen": 2106195968 }, { "epoch": 0.28, "learning_rate": 0.00036536146995105515, "loss": 0.064, "theoretical_loss": 3.4441502170982927, "tokens_seen": 2106327040 }, { "epoch": 0.28, "learning_rate": 0.00036532135119955066, "loss": 0.0682, "theoretical_loss": 3.4441326757756414, "tokens_seen": 2106458112 }, { "epoch": 0.28, "learning_rate": 0.00036528123244804623, "loss": 0.0634, "theoretical_loss": 3.444115135850041, "tokens_seen": 2106589184 }, { "epoch": 0.28, "learning_rate": 0.0003652411136965418, "loss": 0.0663, "theoretical_loss": 3.444097597321293, "tokens_seen": 2106720256 }, { "epoch": 0.28, "learning_rate": 0.0003652009949450373, "loss": 0.0652, "theoretical_loss": 3.444080060189199, "tokens_seen": 2106851328 }, { "epoch": 0.28, "learning_rate": 0.0003651608761935329, "loss": 0.0693, "theoretical_loss": 3.444062524453561, "tokens_seen": 2106982400 }, { "epoch": 0.28, "learning_rate": 0.0003651207574420284, "loss": 0.0681, "theoretical_loss": 3.444044990114181, "tokens_seen": 2107113472 }, { "epoch": 0.28, "learning_rate": 0.0003650806386905239, "loss": 0.0677, "theoretical_loss": 3.4440274571708613, "tokens_seen": 2107244544 }, { "epoch": 0.28, "learning_rate": 0.0003650405199390195, "loss": 0.0686, "theoretical_loss": 3.4440099256234036, "tokens_seen": 2107375616 }, { "epoch": 0.28, "learning_rate": 0.00036500040118751505, "loss": 0.0679, "theoretical_loss": 3.44399239547161, "tokens_seen": 2107506688 }, { "epoch": 0.28, "learning_rate": 0.0003649602824360106, "loss": 0.0688, "theoretical_loss": 3.4439748667152825, "tokens_seen": 2107637760 }, { "epoch": 0.28, "learning_rate": 0.00036492016368450613, "loss": 0.0667, "theoretical_loss": 3.443957339354223, "tokens_seen": 2107768832 }, { "epoch": 0.28, "learning_rate": 0.0003648800449330017, "loss": 0.0674, "theoretical_loss": 3.443939813388235, "tokens_seen": 2107899904 }, { "epoch": 0.28, "learning_rate": 0.00036483992618149727, "loss": 0.0683, "theoretical_loss": 3.4439222888171193, "tokens_seen": 2108030976 }, { "epoch": 0.28, "learning_rate": 0.0003647998074299928, "loss": 0.0682, "theoretical_loss": 3.4439047656406783, "tokens_seen": 2108162048 }, { "epoch": 0.28, "learning_rate": 0.00036475968867848835, "loss": 0.0699, "theoretical_loss": 3.4438872438587147, "tokens_seen": 2108293120 }, { "epoch": 0.28, "learning_rate": 0.0003647195699269839, "loss": 0.0677, "theoretical_loss": 3.443869723471031, "tokens_seen": 2108424192 }, { "epoch": 0.28, "learning_rate": 0.0003646794511754794, "loss": 0.0664, "theoretical_loss": 3.4438522044774293, "tokens_seen": 2108555264 }, { "epoch": 0.28, "objective/train/advantage_avg": -0.002506568329408765, "objective/train/docs_used": 768317, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2821872234344482, "objective/train/original_loss": 1.2821873426437378, "objective/train/theoretical_loss": 3.443834686877712, "objective/train/tokens_used": 479210976, "objective/train/value_avg": -0.0077972412109375, "objective/train/value_loss": 0.000320006744004786, "objective/train/value_max": -0.000110626220703125, "objective/train/value_min": -0.2734375, "objective/train/value_reward_corr": 0.7234458038210987, "objective/train/value_std": 0.01219940185546875, "objective/train/weight_avg": 0.9976415634155273, "objective/train/weighted_lm_loss": 1.2785577774047852, "objective/train/weights_max": 1.2217286825180054, "objective/train/weights_min": 0.3685055077075958, "theoretical_loss": 3.443834686877712, "tokens_seen": 2108686336 }, { "epoch": 0.28, "learning_rate": 0.00036463933242397495, "loss": 0.0682, "theoretical_loss": 3.443834686877712, "tokens_seen": 2108686336 }, { "epoch": 0.28, "learning_rate": 0.0003645992136724705, "loss": 0.0685, "theoretical_loss": 3.443817170671682, "tokens_seen": 2108817408 }, { "epoch": 0.28, "learning_rate": 0.0003645590949209661, "loss": 0.0669, "theoretical_loss": 3.443799655859141, "tokens_seen": 2108948480 }, { "epoch": 0.28, "learning_rate": 0.0003645189761694616, "loss": 0.0696, "theoretical_loss": 3.4437821424398924, "tokens_seen": 2109079552 }, { "epoch": 0.28, "learning_rate": 0.00036447885741795717, "loss": 0.0712, "theoretical_loss": 3.443764630413738, "tokens_seen": 2109210624 }, { "epoch": 0.28, "learning_rate": 0.00036443873866645273, "loss": 0.0628, "theoretical_loss": 3.4437471197804808, "tokens_seen": 2109341696 }, { "epoch": 0.28, "learning_rate": 0.00036439861991494825, "loss": 0.0698, "theoretical_loss": 3.4437296105399238, "tokens_seen": 2109472768 }, { "epoch": 0.28, "learning_rate": 0.0003643585011634438, "loss": 0.0704, "theoretical_loss": 3.4437121026918693, "tokens_seen": 2109603840 }, { "epoch": 0.28, "learning_rate": 0.0003643183824119394, "loss": 0.0689, "theoretical_loss": 3.4436945962361203, "tokens_seen": 2109734912 }, { "epoch": 0.28, "learning_rate": 0.00036427826366043485, "loss": 0.0656, "theoretical_loss": 3.4436770911724794, "tokens_seen": 2109865984 }, { "epoch": 0.28, "learning_rate": 0.0003642381449089304, "loss": 0.0655, "theoretical_loss": 3.4436595875007487, "tokens_seen": 2109997056 }, { "epoch": 0.28, "learning_rate": 0.000364198026157426, "loss": 0.0652, "theoretical_loss": 3.443642085220733, "tokens_seen": 2110128128 }, { "epoch": 0.28, "learning_rate": 0.00036415790740592155, "loss": 0.0662, "theoretical_loss": 3.443624584332233, "tokens_seen": 2110259200 }, { "epoch": 0.28, "learning_rate": 0.00036411778865441706, "loss": 0.0663, "theoretical_loss": 3.4436070848350533, "tokens_seen": 2110390272 }, { "epoch": 0.28, "learning_rate": 0.00036407766990291263, "loss": 0.0684, "theoretical_loss": 3.4435895867289963, "tokens_seen": 2110521344 }, { "epoch": 0.28, "learning_rate": 0.0003640375511514082, "loss": 0.0648, "theoretical_loss": 3.4435720900138644, "tokens_seen": 2110652416 }, { "epoch": 0.28, "learning_rate": 0.0003639974323999037, "loss": 0.0644, "theoretical_loss": 3.443554594689462, "tokens_seen": 2110783488 }, { "epoch": 0.28, "learning_rate": 0.0003639573136483993, "loss": 0.0673, "theoretical_loss": 3.443537100755591, "tokens_seen": 2110914560 }, { "epoch": 0.28, "learning_rate": 0.00036391719489689485, "loss": 0.0699, "theoretical_loss": 3.443519608212055, "tokens_seen": 2111045632 }, { "epoch": 0.28, "learning_rate": 0.00036387707614539037, "loss": 0.067, "theoretical_loss": 3.4435021170586575, "tokens_seen": 2111176704 }, { "epoch": 0.28, "learning_rate": 0.0003638369573938859, "loss": 0.0659, "theoretical_loss": 3.4434846272952013, "tokens_seen": 2111307776 }, { "epoch": 0.28, "learning_rate": 0.00036379683864238145, "loss": 0.0712, "theoretical_loss": 3.4434671389214904, "tokens_seen": 2111438848 }, { "epoch": 0.28, "learning_rate": 0.000363756719890877, "loss": 0.0684, "theoretical_loss": 3.4434496519373265, "tokens_seen": 2111569920 }, { "epoch": 0.28, "learning_rate": 0.00036371660113937253, "loss": 0.0684, "theoretical_loss": 3.4434321663425145, "tokens_seen": 2111700992 }, { "epoch": 0.28, "learning_rate": 0.0003636764823878681, "loss": 0.0694, "theoretical_loss": 3.443414682136857, "tokens_seen": 2111832064 }, { "epoch": 0.28, "objective/train/advantage_avg": 0.0004081126826349646, "objective/train/docs_used": 769436, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3227648735046387, "objective/train/original_loss": 1.3227647542953491, "objective/train/theoretical_loss": 3.443397199320158, "objective/train/tokens_used": 482487776, "objective/train/value_avg": -0.0084991455078125, "objective/train/value_loss": 0.0003818977274931967, "objective/train/value_max": -4.57763671875e-05, "objective/train/value_min": -0.640625, "objective/train/value_reward_corr": 0.6583072007107675, "objective/train/value_std": 0.0169219970703125, "objective/train/weight_avg": 1.0005717277526855, "objective/train/weighted_lm_loss": 1.3219845294952393, "objective/train/weights_max": 1.4685527086257935, "objective/train/weights_min": 0.3682684004306793, "theoretical_loss": 3.443397199320158, "tokens_seen": 2111963136 }, { "epoch": 0.28, "learning_rate": 0.00036363636363636367, "loss": 0.0705, "theoretical_loss": 3.443397199320158, "tokens_seen": 2111963136 }, { "epoch": 0.28, "learning_rate": 0.0003635962448848592, "loss": 0.0615, "theoretical_loss": 3.4433797178922205, "tokens_seen": 2112094208 }, { "epoch": 0.28, "learning_rate": 0.00036355612613335475, "loss": 0.0678, "theoretical_loss": 3.443362237852848, "tokens_seen": 2112225280 }, { "epoch": 0.28, "learning_rate": 0.0003635160073818503, "loss": 0.0673, "theoretical_loss": 3.4433447592018447, "tokens_seen": 2112356352 }, { "epoch": 0.28, "learning_rate": 0.00036347588863034583, "loss": 0.0684, "theoretical_loss": 3.4433272819390135, "tokens_seen": 2112487424 }, { "epoch": 0.28, "learning_rate": 0.00036343576987884135, "loss": 0.0678, "theoretical_loss": 3.443309806064158, "tokens_seen": 2112618496 }, { "epoch": 0.28, "learning_rate": 0.0003633956511273369, "loss": 0.067, "theoretical_loss": 3.4432923315770823, "tokens_seen": 2112749568 }, { "epoch": 0.28, "learning_rate": 0.0003633555323758325, "loss": 0.0675, "theoretical_loss": 3.44327485847759, "tokens_seen": 2112880640 }, { "epoch": 0.28, "learning_rate": 0.000363315413624328, "loss": 0.0651, "theoretical_loss": 3.4432573867654845, "tokens_seen": 2113011712 }, { "epoch": 0.28, "learning_rate": 0.00036327529487282357, "loss": 0.066, "theoretical_loss": 3.44323991644057, "tokens_seen": 2113142784 }, { "epoch": 0.28, "learning_rate": 0.00036323517612131914, "loss": 0.068, "theoretical_loss": 3.44322244750265, "tokens_seen": 2113273856 }, { "epoch": 0.28, "learning_rate": 0.00036319505736981465, "loss": 0.0665, "theoretical_loss": 3.443204979951529, "tokens_seen": 2113404928 }, { "epoch": 0.28, "learning_rate": 0.0003631549386183102, "loss": 0.0671, "theoretical_loss": 3.4431875137870103, "tokens_seen": 2113536000 }, { "epoch": 0.28, "learning_rate": 0.0003631148198668058, "loss": 0.0648, "theoretical_loss": 3.4431700490088977, "tokens_seen": 2113667072 }, { "epoch": 0.28, "learning_rate": 0.0003630747011153013, "loss": 0.0654, "theoretical_loss": 3.4431525856169953, "tokens_seen": 2113798144 }, { "epoch": 0.28, "learning_rate": 0.0003630345823637968, "loss": 0.0661, "theoretical_loss": 3.443135123611108, "tokens_seen": 2113929216 }, { "epoch": 0.28, "learning_rate": 0.0003629944636122924, "loss": 0.0647, "theoretical_loss": 3.443117662991039, "tokens_seen": 2114060288 }, { "epoch": 0.28, "learning_rate": 0.00036295434486078795, "loss": 0.0638, "theoretical_loss": 3.4431002037565923, "tokens_seen": 2114191360 }, { "epoch": 0.28, "learning_rate": 0.00036291422610928347, "loss": 0.0647, "theoretical_loss": 3.4430827459075726, "tokens_seen": 2114322432 }, { "epoch": 0.28, "learning_rate": 0.00036287410735777903, "loss": 0.0682, "theoretical_loss": 3.4430652894437834, "tokens_seen": 2114453504 }, { "epoch": 0.28, "learning_rate": 0.0003628339886062746, "loss": 0.0682, "theoretical_loss": 3.44304783436503, "tokens_seen": 2114584576 }, { "epoch": 0.28, "learning_rate": 0.0003627938698547701, "loss": 0.0616, "theoretical_loss": 3.4430303806711158, "tokens_seen": 2114715648 }, { "epoch": 0.28, "learning_rate": 0.0003627537511032657, "loss": 0.0699, "theoretical_loss": 3.443012928361845, "tokens_seen": 2114846720 }, { "epoch": 0.28, "learning_rate": 0.00036271363235176125, "loss": 0.0635, "theoretical_loss": 3.442995477437023, "tokens_seen": 2114977792 }, { "epoch": 0.28, "learning_rate": 0.00036267351360025677, "loss": 0.0655, "theoretical_loss": 3.442978027896453, "tokens_seen": 2115108864 }, { "epoch": 0.28, "objective/train/advantage_avg": 0.0007956673507578671, "objective/train/docs_used": 770743, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4216159582138062, "objective/train/original_loss": 1.4216159582138062, "objective/train/theoretical_loss": 3.44296057973994, "objective/train/tokens_used": 485764576, "objective/train/value_avg": -0.007350921630859375, "objective/train/value_loss": 0.0001341701135970652, "objective/train/value_max": -5.346536636352539e-05, "objective/train/value_min": -0.220703125, "objective/train/value_reward_corr": 0.6723495429638754, "objective/train/value_std": 0.0128173828125, "objective/train/weight_avg": 1.0008609294891357, "objective/train/weighted_lm_loss": 1.4222744703292847, "objective/train/weights_max": 1.192177176475525, "objective/train/weights_min": 0.6093227863311768, "theoretical_loss": 3.44296057973994, "tokens_seen": 2115239936 }, { "epoch": 0.28, "learning_rate": 0.0003626333948487523, "loss": 0.0703, "theoretical_loss": 3.44296057973994, "tokens_seen": 2115239936 }, { "epoch": 0.28, "learning_rate": 0.00036259327609724785, "loss": 0.0678, "theoretical_loss": 3.442943132967288, "tokens_seen": 2115371008 }, { "epoch": 0.28, "learning_rate": 0.0003625531573457434, "loss": 0.0686, "theoretical_loss": 3.442925687578302, "tokens_seen": 2115502080 }, { "epoch": 0.28, "learning_rate": 0.00036251303859423893, "loss": 0.0671, "theoretical_loss": 3.442908243572787, "tokens_seen": 2115633152 }, { "epoch": 0.28, "learning_rate": 0.0003624729198427345, "loss": 0.0676, "theoretical_loss": 3.442890800950546, "tokens_seen": 2115764224 }, { "epoch": 0.28, "learning_rate": 0.00036243280109123007, "loss": 0.0665, "theoretical_loss": 3.4428733597113856, "tokens_seen": 2115895296 }, { "epoch": 0.28, "learning_rate": 0.0003623926823397256, "loss": 0.0679, "theoretical_loss": 3.442855919855109, "tokens_seen": 2116026368 }, { "epoch": 0.28, "learning_rate": 0.00036235256358822115, "loss": 0.0649, "theoretical_loss": 3.4428384813815214, "tokens_seen": 2116157440 }, { "epoch": 0.28, "learning_rate": 0.0003623124448367167, "loss": 0.0661, "theoretical_loss": 3.4428210442904277, "tokens_seen": 2116288512 }, { "epoch": 0.28, "learning_rate": 0.00036227232608521223, "loss": 0.0659, "theoretical_loss": 3.4428036085816327, "tokens_seen": 2116419584 }, { "epoch": 0.28, "learning_rate": 0.00036223220733370775, "loss": 0.0653, "theoretical_loss": 3.4427861742549406, "tokens_seen": 2116550656 }, { "epoch": 0.28, "learning_rate": 0.0003621920885822033, "loss": 0.0682, "theoretical_loss": 3.442768741310157, "tokens_seen": 2116681728 }, { "epoch": 0.28, "learning_rate": 0.0003621519698306989, "loss": 0.0698, "theoretical_loss": 3.442751309747086, "tokens_seen": 2116812800 }, { "epoch": 0.28, "learning_rate": 0.0003621118510791944, "loss": 0.0721, "theoretical_loss": 3.442733879565534, "tokens_seen": 2116943872 }, { "epoch": 0.28, "learning_rate": 0.00036207173232768997, "loss": 0.0655, "theoretical_loss": 3.442716450765304, "tokens_seen": 2117074944 }, { "epoch": 0.28, "learning_rate": 0.00036203161357618554, "loss": 0.0694, "theoretical_loss": 3.442699023346203, "tokens_seen": 2117206016 }, { "epoch": 0.28, "learning_rate": 0.00036199149482468105, "loss": 0.0663, "theoretical_loss": 3.4426815973080345, "tokens_seen": 2117337088 }, { "epoch": 0.28, "learning_rate": 0.0003619513760731766, "loss": 0.071, "theoretical_loss": 3.4426641726506046, "tokens_seen": 2117468160 }, { "epoch": 0.28, "learning_rate": 0.0003619112573216722, "loss": 0.069, "theoretical_loss": 3.4426467493737176, "tokens_seen": 2117599232 }, { "epoch": 0.28, "learning_rate": 0.0003618711385701677, "loss": 0.0656, "theoretical_loss": 3.4426293274771798, "tokens_seen": 2117730304 }, { "epoch": 0.28, "learning_rate": 0.0003618310198186632, "loss": 0.0622, "theoretical_loss": 3.4426119069607948, "tokens_seen": 2117861376 }, { "epoch": 0.28, "learning_rate": 0.0003617909010671588, "loss": 0.0662, "theoretical_loss": 3.44259448782437, "tokens_seen": 2117992448 }, { "epoch": 0.28, "learning_rate": 0.00036175078231565435, "loss": 0.0738, "theoretical_loss": 3.4425770700677085, "tokens_seen": 2118123520 }, { "epoch": 0.28, "learning_rate": 0.00036171066356414987, "loss": 0.0709, "theoretical_loss": 3.4425596536906173, "tokens_seen": 2118254592 }, { "epoch": 0.28, "learning_rate": 0.00036167054481264544, "loss": 0.0628, "theoretical_loss": 3.4425422386929005, "tokens_seen": 2118385664 }, { "epoch": 0.28, "objective/train/advantage_avg": 0.0011272344272583723, "objective/train/docs_used": 771858, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3770737648010254, "objective/train/original_loss": 1.3770737648010254, "objective/train/theoretical_loss": 3.442524825074365, "objective/train/tokens_used": 489041376, "objective/train/value_avg": -0.005321502685546875, "objective/train/value_loss": 0.00014065524737816304, "objective/train/value_max": -0.0001080632209777832, "objective/train/value_min": -0.56005859375, "objective/train/value_reward_corr": 0.5762415152392202, "objective/train/value_std": 0.00846099853515625, "objective/train/weight_avg": 1.0011897087097168, "objective/train/weighted_lm_loss": 1.3787342309951782, "objective/train/weights_max": 1.2604204416275024, "objective/train/weights_min": 0.36918336153030396, "theoretical_loss": 3.442524825074365, "tokens_seen": 2118516736 }, { "epoch": 0.28, "learning_rate": 0.000361630426061141, "loss": 0.0666, "theoretical_loss": 3.442524825074365, "tokens_seen": 2118516736 }, { "epoch": 0.28, "learning_rate": 0.00036159030730963657, "loss": 0.0666, "theoretical_loss": 3.4425074128348148, "tokens_seen": 2118647808 }, { "epoch": 0.28, "learning_rate": 0.0003615501885581321, "loss": 0.0679, "theoretical_loss": 3.442490001974056, "tokens_seen": 2118778880 }, { "epoch": 0.28, "learning_rate": 0.00036151006980662765, "loss": 0.0648, "theoretical_loss": 3.442472592491894, "tokens_seen": 2118909952 }, { "epoch": 0.28, "learning_rate": 0.00036146995105512317, "loss": 0.0664, "theoretical_loss": 3.442455184388135, "tokens_seen": 2119041024 }, { "epoch": 0.28, "learning_rate": 0.0003614298323036187, "loss": 0.0647, "theoretical_loss": 3.442437777662584, "tokens_seen": 2119172096 }, { "epoch": 0.28, "learning_rate": 0.00036138971355211425, "loss": 0.0701, "theoretical_loss": 3.4424203723150466, "tokens_seen": 2119303168 }, { "epoch": 0.28, "learning_rate": 0.0003613495948006098, "loss": 0.0698, "theoretical_loss": 3.4424029683453288, "tokens_seen": 2119434240 }, { "epoch": 0.28, "learning_rate": 0.00036130947604910533, "loss": 0.0714, "theoretical_loss": 3.442385565753236, "tokens_seen": 2119565312 }, { "epoch": 0.28, "learning_rate": 0.0003612693572976009, "loss": 0.0702, "theoretical_loss": 3.442368164538575, "tokens_seen": 2119696384 }, { "epoch": 0.28, "learning_rate": 0.00036122923854609647, "loss": 0.0663, "theoretical_loss": 3.4423507647011498, "tokens_seen": 2119827456 }, { "epoch": 0.28, "learning_rate": 0.00036118911979459204, "loss": 0.0658, "theoretical_loss": 3.4423333662407676, "tokens_seen": 2119958528 }, { "epoch": 0.28, "learning_rate": 0.00036114900104308755, "loss": 0.0699, "theoretical_loss": 3.442315969157234, "tokens_seen": 2120089600 }, { "epoch": 0.29, "learning_rate": 0.0003611088822915831, "loss": 0.0688, "theoretical_loss": 3.442298573450355, "tokens_seen": 2120220672 }, { "epoch": 0.29, "learning_rate": 0.00036106876354007864, "loss": 0.0691, "theoretical_loss": 3.4422811791199357, "tokens_seen": 2120351744 }, { "epoch": 0.29, "learning_rate": 0.00036102864478857415, "loss": 0.0666, "theoretical_loss": 3.4422637861657837, "tokens_seen": 2120482816 }, { "epoch": 0.29, "learning_rate": 0.0003609885260370697, "loss": 0.0722, "theoretical_loss": 3.4422463945877038, "tokens_seen": 2120613888 }, { "epoch": 0.29, "learning_rate": 0.0003609484072855653, "loss": 0.0694, "theoretical_loss": 3.442229004385502, "tokens_seen": 2120744960 }, { "epoch": 0.29, "learning_rate": 0.0003609082885340608, "loss": 0.0667, "theoretical_loss": 3.4422116155589855, "tokens_seen": 2120876032 }, { "epoch": 0.29, "learning_rate": 0.00036086816978255637, "loss": 0.0633, "theoretical_loss": 3.44219422810796, "tokens_seen": 2121007104 }, { "epoch": 0.29, "learning_rate": 0.00036082805103105194, "loss": 0.0628, "theoretical_loss": 3.442176842032231, "tokens_seen": 2121138176 }, { "epoch": 0.29, "learning_rate": 0.0003607879322795475, "loss": 0.0631, "theoretical_loss": 3.4421594573316057, "tokens_seen": 2121269248 }, { "epoch": 0.29, "learning_rate": 0.000360747813528043, "loss": 0.0707, "theoretical_loss": 3.44214207400589, "tokens_seen": 2121400320 }, { "epoch": 0.29, "learning_rate": 0.0003607076947765386, "loss": 0.0723, "theoretical_loss": 3.44212469205489, "tokens_seen": 2121531392 }, { "epoch": 0.29, "learning_rate": 0.0003606675760250341, "loss": 0.0659, "theoretical_loss": 3.4421073114784124, "tokens_seen": 2121662464 }, { "epoch": 0.29, "objective/train/advantage_avg": -0.0008220600429922342, "objective/train/docs_used": 773131, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3637526035308838, "objective/train/original_loss": 1.3637526035308838, "objective/train/theoretical_loss": 3.4420899322762635, "objective/train/tokens_used": 492318176, "objective/train/value_avg": -0.00379180908203125, "objective/train/value_loss": 0.00016613685875199735, "objective/train/value_max": -6.973743438720703e-05, "objective/train/value_min": -0.4853515625, "objective/train/value_reward_corr": 0.7881403872439248, "objective/train/value_std": 0.0123443603515625, "objective/train/weight_avg": 0.9992547631263733, "objective/train/weighted_lm_loss": 1.362669587135315, "objective/train/weights_max": 1.1529616117477417, "objective/train/weights_min": 0.42700815200805664, "theoretical_loss": 3.4420899322762635, "tokens_seen": 2121793536 }, { "epoch": 0.29, "learning_rate": 0.0003606274572735296, "loss": 0.068, "theoretical_loss": 3.4420899322762635, "tokens_seen": 2121793536 }, { "epoch": 0.29, "learning_rate": 0.0003605873385220252, "loss": 0.0693, "theoretical_loss": 3.4420725544482496, "tokens_seen": 2121924608 }, { "epoch": 0.29, "learning_rate": 0.00036054721977052075, "loss": 0.065, "theoretical_loss": 3.4420551779941775, "tokens_seen": 2122055680 }, { "epoch": 0.29, "learning_rate": 0.00036050710101901627, "loss": 0.0687, "theoretical_loss": 3.4420378029138536, "tokens_seen": 2122186752 }, { "epoch": 0.29, "learning_rate": 0.00036046698226751184, "loss": 0.0696, "theoretical_loss": 3.4420204292070844, "tokens_seen": 2122317824 }, { "epoch": 0.29, "learning_rate": 0.0003604268635160074, "loss": 0.0661, "theoretical_loss": 3.442003056873676, "tokens_seen": 2122448896 }, { "epoch": 0.29, "learning_rate": 0.000360386744764503, "loss": 0.0724, "theoretical_loss": 3.4419856859134357, "tokens_seen": 2122579968 }, { "epoch": 0.29, "learning_rate": 0.0003603466260129985, "loss": 0.0657, "theoretical_loss": 3.44196831632617, "tokens_seen": 2122711040 }, { "epoch": 0.29, "learning_rate": 0.00036030650726149406, "loss": 0.0716, "theoretical_loss": 3.4419509481116854, "tokens_seen": 2122842112 }, { "epoch": 0.29, "learning_rate": 0.00036026638850998957, "loss": 0.0655, "theoretical_loss": 3.4419335812697893, "tokens_seen": 2122973184 }, { "epoch": 0.29, "learning_rate": 0.0003602262697584851, "loss": 0.0677, "theoretical_loss": 3.441916215800288, "tokens_seen": 2123104256 }, { "epoch": 0.29, "learning_rate": 0.00036018615100698065, "loss": 0.0709, "theoretical_loss": 3.4418988517029883, "tokens_seen": 2123235328 }, { "epoch": 0.29, "learning_rate": 0.0003601460322554762, "loss": 0.062, "theoretical_loss": 3.441881488977697, "tokens_seen": 2123366400 }, { "epoch": 0.29, "learning_rate": 0.00036010591350397174, "loss": 0.0669, "theoretical_loss": 3.4418641276242212, "tokens_seen": 2123497472 }, { "epoch": 0.29, "learning_rate": 0.0003600657947524673, "loss": 0.0693, "theoretical_loss": 3.441846767642368, "tokens_seen": 2123628544 }, { "epoch": 0.29, "learning_rate": 0.00036002567600096287, "loss": 0.0675, "theoretical_loss": 3.4418294090319437, "tokens_seen": 2123759616 }, { "epoch": 0.29, "learning_rate": 0.00035998555724945844, "loss": 0.0698, "theoretical_loss": 3.4418120517927564, "tokens_seen": 2123890688 }, { "epoch": 0.29, "learning_rate": 0.00035994543849795395, "loss": 0.0693, "theoretical_loss": 3.4417946959246124, "tokens_seen": 2124021760 }, { "epoch": 0.29, "learning_rate": 0.0003599053197464495, "loss": 0.0692, "theoretical_loss": 3.441777341427319, "tokens_seen": 2124152832 }, { "epoch": 0.29, "learning_rate": 0.00035986520099494504, "loss": 0.0679, "theoretical_loss": 3.4417599883006833, "tokens_seen": 2124283904 }, { "epoch": 0.29, "learning_rate": 0.00035982508224344055, "loss": 0.0695, "theoretical_loss": 3.4417426365445127, "tokens_seen": 2124414976 }, { "epoch": 0.29, "learning_rate": 0.0003597849634919361, "loss": 0.069, "theoretical_loss": 3.441725286158614, "tokens_seen": 2124546048 }, { "epoch": 0.29, "learning_rate": 0.0003597448447404317, "loss": 0.0681, "theoretical_loss": 3.4417079371427945, "tokens_seen": 2124677120 }, { "epoch": 0.29, "learning_rate": 0.0003597047259889272, "loss": 0.0676, "theoretical_loss": 3.4416905894968624, "tokens_seen": 2124808192 }, { "epoch": 0.29, "learning_rate": 0.00035966460723742277, "loss": 0.065, "theoretical_loss": 3.441673243220624, "tokens_seen": 2124939264 }, { "epoch": 0.29, "objective/train/advantage_avg": 0.000777370878495276, "objective/train/docs_used": 774338, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3705503940582275, "objective/train/original_loss": 1.3705503940582275, "objective/train/theoretical_loss": 3.4416558983138863, "objective/train/tokens_used": 495594976, "objective/train/value_avg": -0.00748443603515625, "objective/train/value_loss": 0.0001435621816199273, "objective/train/value_max": -0.00012826919555664062, "objective/train/value_min": -0.2998046875, "objective/train/value_reward_corr": 0.6908329163242205, "objective/train/value_std": 0.01174163818359375, "objective/train/weight_avg": 1.0008445978164673, "objective/train/weighted_lm_loss": 1.3716644048690796, "objective/train/weights_max": 1.157467007637024, "objective/train/weights_min": 0.37153780460357666, "theoretical_loss": 3.4416558983138863, "tokens_seen": 2125070336 }, { "epoch": 0.29, "learning_rate": 0.00035962448848591834, "loss": 0.071, "theoretical_loss": 3.4416558983138863, "tokens_seen": 2125070336 }, { "epoch": 0.29, "learning_rate": 0.0003595843697344139, "loss": 0.0646, "theoretical_loss": 3.4416385547764583, "tokens_seen": 2125201408 }, { "epoch": 0.29, "learning_rate": 0.0003595442509829094, "loss": 0.0665, "theoretical_loss": 3.4416212126081467, "tokens_seen": 2125332480 }, { "epoch": 0.29, "learning_rate": 0.000359504132231405, "loss": 0.0695, "theoretical_loss": 3.4416038718087583, "tokens_seen": 2125463552 }, { "epoch": 0.29, "learning_rate": 0.0003594640134799005, "loss": 0.0677, "theoretical_loss": 3.4415865323781016, "tokens_seen": 2125594624 }, { "epoch": 0.29, "learning_rate": 0.000359423894728396, "loss": 0.0672, "theoretical_loss": 3.4415691943159836, "tokens_seen": 2125725696 }, { "epoch": 0.29, "learning_rate": 0.0003593837759768916, "loss": 0.0688, "theoretical_loss": 3.4415518576222124, "tokens_seen": 2125856768 }, { "epoch": 0.29, "learning_rate": 0.00035934365722538716, "loss": 0.0685, "theoretical_loss": 3.441534522296595, "tokens_seen": 2125987840 }, { "epoch": 0.29, "learning_rate": 0.00035930353847388267, "loss": 0.0673, "theoretical_loss": 3.4415171883389397, "tokens_seen": 2126118912 }, { "epoch": 0.29, "learning_rate": 0.00035926341972237824, "loss": 0.0712, "theoretical_loss": 3.441499855749054, "tokens_seen": 2126249984 }, { "epoch": 0.29, "learning_rate": 0.0003592233009708738, "loss": 0.064, "theoretical_loss": 3.4414825245267453, "tokens_seen": 2126381056 }, { "epoch": 0.29, "learning_rate": 0.0003591831822193694, "loss": 0.066, "theoretical_loss": 3.441465194671822, "tokens_seen": 2126512128 }, { "epoch": 0.29, "learning_rate": 0.0003591430634678649, "loss": 0.0696, "theoretical_loss": 3.441447866184092, "tokens_seen": 2126643200 }, { "epoch": 0.29, "learning_rate": 0.00035910294471636046, "loss": 0.0697, "theoretical_loss": 3.4414305390633624, "tokens_seen": 2126774272 }, { "epoch": 0.29, "learning_rate": 0.00035906282596485597, "loss": 0.0703, "theoretical_loss": 3.441413213309442, "tokens_seen": 2126905344 }, { "epoch": 0.29, "learning_rate": 0.0003590227072133515, "loss": 0.0691, "theoretical_loss": 3.4413958889221385, "tokens_seen": 2127036416 }, { "epoch": 0.29, "learning_rate": 0.00035898258846184705, "loss": 0.0685, "theoretical_loss": 3.4413785659012595, "tokens_seen": 2127167488 }, { "epoch": 0.29, "learning_rate": 0.0003589424697103426, "loss": 0.0669, "theoretical_loss": 3.4413612442466133, "tokens_seen": 2127298560 }, { "epoch": 0.29, "learning_rate": 0.0003589023509588382, "loss": 0.0645, "theoretical_loss": 3.4413439239580077, "tokens_seen": 2127429632 }, { "epoch": 0.29, "learning_rate": 0.0003588622322073337, "loss": 0.0674, "theoretical_loss": 3.441326605035252, "tokens_seen": 2127560704 }, { "epoch": 0.29, "learning_rate": 0.0003588221134558293, "loss": 0.0677, "theoretical_loss": 3.441309287478153, "tokens_seen": 2127691776 }, { "epoch": 0.29, "learning_rate": 0.00035878199470432484, "loss": 0.0644, "theoretical_loss": 3.441291971286519, "tokens_seen": 2127822848 }, { "epoch": 0.29, "learning_rate": 0.00035874187595282036, "loss": 0.0696, "theoretical_loss": 3.4412746564601586, "tokens_seen": 2127953920 }, { "epoch": 0.29, "learning_rate": 0.0003587017572013159, "loss": 0.0679, "theoretical_loss": 3.44125734299888, "tokens_seen": 2128084992 }, { "epoch": 0.29, "learning_rate": 0.00035866163844981144, "loss": 0.0645, "theoretical_loss": 3.441240030902492, "tokens_seen": 2128216064 }, { "epoch": 0.29, "objective/train/advantage_avg": 0.0007559107034467161, "objective/train/docs_used": 775219, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2165194749832153, "objective/train/original_loss": 1.2165195941925049, "objective/train/theoretical_loss": 3.4412227201708028, "objective/train/tokens_used": 498871776, "objective/train/value_avg": -0.00507354736328125, "objective/train/value_loss": 5.614657493424602e-05, "objective/train/value_max": -7.486343383789062e-05, "objective/train/value_min": -0.1885986328125, "objective/train/value_reward_corr": 0.7025374899422115, "objective/train/value_std": 0.00710296630859375, "objective/train/weight_avg": 1.0007838010787964, "objective/train/weighted_lm_loss": 1.2182343006134033, "objective/train/weights_max": 1.1087247133255005, "objective/train/weights_min": 0.8186957836151123, "theoretical_loss": 3.4412227201708028, "tokens_seen": 2128347136 }, { "epoch": 0.29, "learning_rate": 0.00035862151969830695, "loss": 0.0652, "theoretical_loss": 3.4412227201708028, "tokens_seen": 2128347136 }, { "epoch": 0.29, "learning_rate": 0.0003585814009468025, "loss": 0.0686, "theoretical_loss": 3.44120541080362, "tokens_seen": 2128478208 }, { "epoch": 0.29, "learning_rate": 0.0003585412821952981, "loss": 0.0683, "theoretical_loss": 3.4411881028007527, "tokens_seen": 2128609280 }, { "epoch": 0.29, "learning_rate": 0.00035850116344379366, "loss": 0.0689, "theoretical_loss": 3.4411707961620093, "tokens_seen": 2128740352 }, { "epoch": 0.29, "learning_rate": 0.00035846104469228917, "loss": 0.0703, "theoretical_loss": 3.4411534908871984, "tokens_seen": 2128871424 }, { "epoch": 0.29, "learning_rate": 0.00035842092594078474, "loss": 0.0678, "theoretical_loss": 3.4411361869761277, "tokens_seen": 2129002496 }, { "epoch": 0.29, "learning_rate": 0.0003583808071892803, "loss": 0.0694, "theoretical_loss": 3.4411188844286067, "tokens_seen": 2129133568 }, { "epoch": 0.29, "learning_rate": 0.0003583406884377758, "loss": 0.0693, "theoretical_loss": 3.4411015832444436, "tokens_seen": 2129264640 }, { "epoch": 0.29, "learning_rate": 0.0003583005696862714, "loss": 0.0683, "theoretical_loss": 3.441084283423448, "tokens_seen": 2129395712 }, { "epoch": 0.29, "learning_rate": 0.0003582604509347669, "loss": 0.0722, "theoretical_loss": 3.441066984965427, "tokens_seen": 2129526784 }, { "epoch": 0.29, "learning_rate": 0.0003582203321832624, "loss": 0.0658, "theoretical_loss": 3.4410496878701906, "tokens_seen": 2129657856 }, { "epoch": 0.29, "learning_rate": 0.000358180213431758, "loss": 0.064, "theoretical_loss": 3.4410323921375467, "tokens_seen": 2129788928 }, { "epoch": 0.29, "learning_rate": 0.00035814009468025356, "loss": 0.0638, "theoretical_loss": 3.441015097767305, "tokens_seen": 2129920000 }, { "epoch": 0.29, "learning_rate": 0.0003580999759287491, "loss": 0.0659, "theoretical_loss": 3.4409978047592737, "tokens_seen": 2130051072 }, { "epoch": 0.29, "learning_rate": 0.00035805985717724464, "loss": 0.067, "theoretical_loss": 3.440980513113262, "tokens_seen": 2130182144 }, { "epoch": 0.29, "learning_rate": 0.0003580197384257402, "loss": 0.0674, "theoretical_loss": 3.4409632228290783, "tokens_seen": 2130313216 }, { "epoch": 0.29, "learning_rate": 0.0003579796196742358, "loss": 0.0668, "theoretical_loss": 3.4409459339065327, "tokens_seen": 2130444288 }, { "epoch": 0.29, "learning_rate": 0.0003579395009227313, "loss": 0.0665, "theoretical_loss": 3.4409286463454327, "tokens_seen": 2130575360 }, { "epoch": 0.29, "learning_rate": 0.00035789938217122686, "loss": 0.0649, "theoretical_loss": 3.4409113601455887, "tokens_seen": 2130706432 }, { "epoch": 0.29, "learning_rate": 0.00035785926341972237, "loss": 0.0694, "theoretical_loss": 3.440894075306809, "tokens_seen": 2130837504 }, { "epoch": 0.29, "learning_rate": 0.0003578191446682179, "loss": 0.0686, "theoretical_loss": 3.4408767918289027, "tokens_seen": 2130968576 }, { "epoch": 0.29, "learning_rate": 0.00035777902591671345, "loss": 0.0698, "theoretical_loss": 3.440859509711679, "tokens_seen": 2131099648 }, { "epoch": 0.29, "learning_rate": 0.000357738907165209, "loss": 0.0684, "theoretical_loss": 3.4408422289549483, "tokens_seen": 2131230720 }, { "epoch": 0.29, "learning_rate": 0.0003576987884137046, "loss": 0.0706, "theoretical_loss": 3.440824949558518, "tokens_seen": 2131361792 }, { "epoch": 0.29, "learning_rate": 0.0003576586696622001, "loss": 0.067, "theoretical_loss": 3.4408076715221982, "tokens_seen": 2131492864 }, { "epoch": 0.29, "objective/train/advantage_avg": 0.0001498600613558665, "objective/train/docs_used": 776312, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.1855442523956299, "objective/train/original_loss": 1.1855442523956299, "objective/train/theoretical_loss": 3.4407903948457985, "objective/train/tokens_used": 502148576, "objective/train/value_avg": -0.006328582763671875, "objective/train/value_loss": 0.00015378475654870272, "objective/train/value_max": -6.556510925292969e-05, "objective/train/value_min": -0.3681640625, "objective/train/value_reward_corr": 0.6957556715258453, "objective/train/value_std": 0.01100921630859375, "objective/train/weight_avg": 1.000218391418457, "objective/train/weighted_lm_loss": 1.1855714321136475, "objective/train/weights_max": 1.1016281843185425, "objective/train/weights_min": 0.36854350566864014, "theoretical_loss": 3.4407903948457985, "tokens_seen": 2131623936 }, { "epoch": 0.29, "learning_rate": 0.0003576185509106957, "loss": 0.0664, "theoretical_loss": 3.4407903948457985, "tokens_seen": 2131623936 }, { "epoch": 0.29, "learning_rate": 0.00035757843215919124, "loss": 0.0702, "theoretical_loss": 3.440773119529128, "tokens_seen": 2131755008 }, { "epoch": 0.29, "learning_rate": 0.00035753831340768676, "loss": 0.0657, "theoretical_loss": 3.440755845571996, "tokens_seen": 2131886080 }, { "epoch": 0.29, "learning_rate": 0.0003574981946561823, "loss": 0.0663, "theoretical_loss": 3.440738572974212, "tokens_seen": 2132017152 }, { "epoch": 0.29, "learning_rate": 0.00035745807590467784, "loss": 0.0703, "theoretical_loss": 3.4407213017355858, "tokens_seen": 2132148224 }, { "epoch": 0.29, "learning_rate": 0.00035741795715317335, "loss": 0.0708, "theoretical_loss": 3.440704031855926, "tokens_seen": 2132279296 }, { "epoch": 0.29, "learning_rate": 0.0003573778384016689, "loss": 0.066, "theoretical_loss": 3.440686763335043, "tokens_seen": 2132410368 }, { "epoch": 0.29, "learning_rate": 0.0003573377196501645, "loss": 0.062, "theoretical_loss": 3.440669496172746, "tokens_seen": 2132541440 }, { "epoch": 0.29, "learning_rate": 0.00035729760089866006, "loss": 0.0695, "theoretical_loss": 3.440652230368845, "tokens_seen": 2132672512 }, { "epoch": 0.29, "learning_rate": 0.00035725748214715557, "loss": 0.0678, "theoretical_loss": 3.4406349659231488, "tokens_seen": 2132803584 }, { "epoch": 0.29, "learning_rate": 0.00035721736339565114, "loss": 0.0693, "theoretical_loss": 3.4406177028354685, "tokens_seen": 2132934656 }, { "epoch": 0.29, "learning_rate": 0.0003571772446441467, "loss": 0.067, "theoretical_loss": 3.4406004411056124, "tokens_seen": 2133065728 }, { "epoch": 0.29, "learning_rate": 0.0003571371258926422, "loss": 0.0663, "theoretical_loss": 3.440583180733391, "tokens_seen": 2133196800 }, { "epoch": 0.29, "learning_rate": 0.0003570970071411378, "loss": 0.0636, "theoretical_loss": 3.4405659217186138, "tokens_seen": 2133327872 }, { "epoch": 0.29, "learning_rate": 0.00035705688838963336, "loss": 0.0671, "theoretical_loss": 3.440548664061091, "tokens_seen": 2133458944 }, { "epoch": 0.29, "learning_rate": 0.0003570167696381288, "loss": 0.0663, "theoretical_loss": 3.440531407760633, "tokens_seen": 2133590016 }, { "epoch": 0.29, "learning_rate": 0.0003569766508866244, "loss": 0.0707, "theoretical_loss": 3.440514152817048, "tokens_seen": 2133721088 }, { "epoch": 0.29, "learning_rate": 0.00035693653213511996, "loss": 0.0711, "theoretical_loss": 3.4404968992301477, "tokens_seen": 2133852160 }, { "epoch": 0.29, "learning_rate": 0.0003568964133836155, "loss": 0.0682, "theoretical_loss": 3.4404796469997416, "tokens_seen": 2133983232 }, { "epoch": 0.29, "learning_rate": 0.00035685629463211104, "loss": 0.0664, "theoretical_loss": 3.440462396125639, "tokens_seen": 2134114304 }, { "epoch": 0.29, "learning_rate": 0.0003568161758806066, "loss": 0.0672, "theoretical_loss": 3.440445146607651, "tokens_seen": 2134245376 }, { "epoch": 0.29, "learning_rate": 0.0003567760571291022, "loss": 0.0703, "theoretical_loss": 3.4404278984455874, "tokens_seen": 2134376448 }, { "epoch": 0.29, "learning_rate": 0.0003567359383775977, "loss": 0.0691, "theoretical_loss": 3.440410651639258, "tokens_seen": 2134507520 }, { "epoch": 0.29, "learning_rate": 0.00035669581962609326, "loss": 0.0694, "theoretical_loss": 3.4403934061884733, "tokens_seen": 2134638592 }, { "epoch": 0.29, "learning_rate": 0.00035665570087458883, "loss": 0.0663, "theoretical_loss": 3.4403761620930435, "tokens_seen": 2134769664 }, { "epoch": 0.29, "objective/train/advantage_avg": -0.0004773768305312842, "objective/train/docs_used": 777595, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.1504534482955933, "objective/train/original_loss": 1.1504533290863037, "objective/train/theoretical_loss": 3.4403589193527786, "objective/train/tokens_used": 505425376, "objective/train/value_avg": -0.012115478515625, "objective/train/value_loss": 0.0002985993633046746, "objective/train/value_max": -8.547306060791016e-05, "objective/train/value_min": -0.68359375, "objective/train/value_reward_corr": 0.7639669997461078, "objective/train/value_std": 0.020294189453125, "objective/train/weight_avg": 0.9996575713157654, "objective/train/weighted_lm_loss": 1.1500275135040283, "objective/train/weights_max": 1.4734736680984497, "objective/train/weights_min": 0.36835795640945435, "theoretical_loss": 3.4403589193527786, "tokens_seen": 2134900736 }, { "epoch": 0.29, "learning_rate": 0.0003566155821230843, "loss": 0.0641, "theoretical_loss": 3.4403589193527786, "tokens_seen": 2134900736 }, { "epoch": 0.29, "learning_rate": 0.00035657546337157986, "loss": 0.07, "theoretical_loss": 3.4403416779674894, "tokens_seen": 2135031808 }, { "epoch": 0.29, "learning_rate": 0.0003565353446200754, "loss": 0.0647, "theoretical_loss": 3.440324437936986, "tokens_seen": 2135162880 }, { "epoch": 0.29, "learning_rate": 0.000356495225868571, "loss": 0.0703, "theoretical_loss": 3.4403071992610785, "tokens_seen": 2135293952 }, { "epoch": 0.29, "learning_rate": 0.0003564551071170665, "loss": 0.0659, "theoretical_loss": 3.4402899619395786, "tokens_seen": 2135425024 }, { "epoch": 0.29, "learning_rate": 0.0003564149883655621, "loss": 0.0688, "theoretical_loss": 3.440272725972295, "tokens_seen": 2135556096 }, { "epoch": 0.29, "learning_rate": 0.00035637486961405764, "loss": 0.0674, "theoretical_loss": 3.440255491359039, "tokens_seen": 2135687168 }, { "epoch": 0.29, "learning_rate": 0.00035633475086255316, "loss": 0.0656, "theoretical_loss": 3.440238258099621, "tokens_seen": 2135818240 }, { "epoch": 0.29, "learning_rate": 0.0003562946321110487, "loss": 0.0672, "theoretical_loss": 3.440221026193852, "tokens_seen": 2135949312 }, { "epoch": 0.29, "learning_rate": 0.0003562545133595443, "loss": 0.0684, "theoretical_loss": 3.4402037956415423, "tokens_seen": 2136080384 }, { "epoch": 0.29, "learning_rate": 0.0003562143946080398, "loss": 0.063, "theoretical_loss": 3.4401865664425024, "tokens_seen": 2136211456 }, { "epoch": 0.29, "learning_rate": 0.0003561742758565353, "loss": 0.065, "theoretical_loss": 3.4401693385965437, "tokens_seen": 2136342528 }, { "epoch": 0.29, "learning_rate": 0.0003561341571050309, "loss": 0.0691, "theoretical_loss": 3.440152112103476, "tokens_seen": 2136473600 }, { "epoch": 0.29, "learning_rate": 0.00035609403835352646, "loss": 0.0706, "theoretical_loss": 3.4401348869631105, "tokens_seen": 2136604672 }, { "epoch": 0.3, "learning_rate": 0.000356053919602022, "loss": 0.0692, "theoretical_loss": 3.4401176631752577, "tokens_seen": 2136735744 }, { "epoch": 0.3, "learning_rate": 0.00035601380085051754, "loss": 0.0675, "theoretical_loss": 3.4401004407397293, "tokens_seen": 2136866816 }, { "epoch": 0.3, "learning_rate": 0.0003559736820990131, "loss": 0.0679, "theoretical_loss": 3.4400832196563353, "tokens_seen": 2136997888 }, { "epoch": 0.3, "learning_rate": 0.0003559335633475086, "loss": 0.0679, "theoretical_loss": 3.4400659999248866, "tokens_seen": 2137128960 }, { "epoch": 0.3, "learning_rate": 0.0003558934445960042, "loss": 0.0662, "theoretical_loss": 3.440048781545195, "tokens_seen": 2137260032 }, { "epoch": 0.3, "learning_rate": 0.00035585332584449976, "loss": 0.0667, "theoretical_loss": 3.4400315645170707, "tokens_seen": 2137391104 }, { "epoch": 0.3, "learning_rate": 0.0003558132070929953, "loss": 0.0645, "theoretical_loss": 3.440014348840325, "tokens_seen": 2137522176 }, { "epoch": 0.3, "learning_rate": 0.0003557730883414908, "loss": 0.0658, "theoretical_loss": 3.439997134514769, "tokens_seen": 2137653248 }, { "epoch": 0.3, "learning_rate": 0.00035573296958998636, "loss": 0.0682, "theoretical_loss": 3.439979921540214, "tokens_seen": 2137784320 }, { "epoch": 0.3, "learning_rate": 0.0003556928508384819, "loss": 0.0641, "theoretical_loss": 3.439962709916471, "tokens_seen": 2137915392 }, { "epoch": 0.3, "learning_rate": 0.00035565273208697744, "loss": 0.0669, "theoretical_loss": 3.4399454996433514, "tokens_seen": 2138046464 }, { "epoch": 0.3, "objective/train/advantage_avg": 0.00041260907892137766, "objective/train/docs_used": 778668, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.262136459350586, "objective/train/original_loss": 1.2621363401412964, "objective/train/theoretical_loss": 3.4399282907206654, "objective/train/tokens_used": 508702176, "objective/train/value_avg": -0.006771087646484375, "objective/train/value_loss": 0.00017483615374658257, "objective/train/value_max": -4.1961669921875e-05, "objective/train/value_min": -0.65576171875, "objective/train/value_reward_corr": 0.6787613437022155, "objective/train/value_std": 0.012237548828125, "objective/train/weight_avg": 1.000492811203003, "objective/train/weighted_lm_loss": 1.2629919052124023, "objective/train/weights_max": 1.2233127355575562, "objective/train/weights_min": 0.4321470856666565, "theoretical_loss": 3.4399282907206654, "tokens_seen": 2138177536 }, { "epoch": 0.3, "learning_rate": 0.000355612613335473, "loss": 0.0655, "theoretical_loss": 3.4399282907206654, "tokens_seen": 2138177536 }, { "epoch": 0.3, "learning_rate": 0.0003555724945839686, "loss": 0.067, "theoretical_loss": 3.4399110831482256, "tokens_seen": 2138308608 }, { "epoch": 0.3, "learning_rate": 0.0003555323758324641, "loss": 0.0691, "theoretical_loss": 3.4398938769258427, "tokens_seen": 2138439680 }, { "epoch": 0.3, "learning_rate": 0.00035549225708095966, "loss": 0.0623, "theoretical_loss": 3.4398766720533276, "tokens_seen": 2138570752 }, { "epoch": 0.3, "learning_rate": 0.00035545213832945523, "loss": 0.0653, "theoretical_loss": 3.4398594685304924, "tokens_seen": 2138701824 }, { "epoch": 0.3, "learning_rate": 0.00035541201957795074, "loss": 0.0684, "theoretical_loss": 3.4398422663571484, "tokens_seen": 2138832896 }, { "epoch": 0.3, "learning_rate": 0.00035537190082644626, "loss": 0.0708, "theoretical_loss": 3.439825065533107, "tokens_seen": 2138963968 }, { "epoch": 0.3, "learning_rate": 0.0003553317820749418, "loss": 0.0698, "theoretical_loss": 3.439807866058179, "tokens_seen": 2139095040 }, { "epoch": 0.3, "learning_rate": 0.0003552916633234374, "loss": 0.0663, "theoretical_loss": 3.439790667932177, "tokens_seen": 2139226112 }, { "epoch": 0.3, "learning_rate": 0.0003552515445719329, "loss": 0.0657, "theoretical_loss": 3.439773471154912, "tokens_seen": 2139357184 }, { "epoch": 0.3, "learning_rate": 0.0003552114258204285, "loss": 0.0611, "theoretical_loss": 3.4397562757261957, "tokens_seen": 2139488256 }, { "epoch": 0.3, "learning_rate": 0.00035517130706892404, "loss": 0.0702, "theoretical_loss": 3.4397390816458397, "tokens_seen": 2139619328 }, { "epoch": 0.3, "learning_rate": 0.00035513118831741956, "loss": 0.0659, "theoretical_loss": 3.439721888913656, "tokens_seen": 2139750400 }, { "epoch": 0.3, "learning_rate": 0.00035509106956591513, "loss": 0.0651, "theoretical_loss": 3.4397046975294554, "tokens_seen": 2139881472 }, { "epoch": 0.3, "learning_rate": 0.0003550509508144107, "loss": 0.0706, "theoretical_loss": 3.439687507493051, "tokens_seen": 2140012544 }, { "epoch": 0.3, "learning_rate": 0.0003550108320629062, "loss": 0.0632, "theoretical_loss": 3.4396703188042537, "tokens_seen": 2140143616 }, { "epoch": 0.3, "learning_rate": 0.0003549707133114017, "loss": 0.0685, "theoretical_loss": 3.439653131462875, "tokens_seen": 2140274688 }, { "epoch": 0.3, "learning_rate": 0.0003549305945598973, "loss": 0.0659, "theoretical_loss": 3.4396359454687278, "tokens_seen": 2140405760 }, { "epoch": 0.3, "learning_rate": 0.00035489047580839286, "loss": 0.0655, "theoretical_loss": 3.4396187608216233, "tokens_seen": 2140536832 }, { "epoch": 0.3, "learning_rate": 0.0003548503570568884, "loss": 0.0666, "theoretical_loss": 3.4396015775213735, "tokens_seen": 2140667904 }, { "epoch": 0.3, "learning_rate": 0.00035481023830538394, "loss": 0.0649, "theoretical_loss": 3.439584395567791, "tokens_seen": 2140798976 }, { "epoch": 0.3, "learning_rate": 0.0003547701195538795, "loss": 0.0659, "theoretical_loss": 3.439567214960687, "tokens_seen": 2140930048 }, { "epoch": 0.3, "learning_rate": 0.000354730000802375, "loss": 0.0684, "theoretical_loss": 3.439550035699874, "tokens_seen": 2141061120 }, { "epoch": 0.3, "learning_rate": 0.0003546898820508706, "loss": 0.0697, "theoretical_loss": 3.439532857785164, "tokens_seen": 2141192192 }, { "epoch": 0.3, "learning_rate": 0.00035464976329936616, "loss": 0.0663, "theoretical_loss": 3.4395156812163687, "tokens_seen": 2141323264 }, { "epoch": 0.3, "objective/train/advantage_avg": 0.0003267962019890547, "objective/train/docs_used": 779818, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.1624425649642944, "objective/train/original_loss": 1.1624424457550049, "objective/train/theoretical_loss": 3.4394985059933014, "objective/train/tokens_used": 511978976, "objective/train/value_avg": -0.007137298583984375, "objective/train/value_loss": 0.0001580212701810524, "objective/train/value_max": -5.608797073364258e-05, "objective/train/value_min": -0.2626953125, "objective/train/value_reward_corr": 0.7138155780995258, "objective/train/value_std": 0.012847900390625, "objective/train/weight_avg": 1.0004007816314697, "objective/train/weighted_lm_loss": 1.163700819015503, "objective/train/weights_max": 1.1224071979522705, "objective/train/weights_min": 0.37475189566612244, "theoretical_loss": 3.4394985059933014, "tokens_seen": 2141454336 }, { "epoch": 0.3, "learning_rate": 0.0003546096445478617, "loss": 0.0648, "theoretical_loss": 3.4394985059933014, "tokens_seen": 2141454336 }, { "epoch": 0.3, "learning_rate": 0.0003545695257963572, "loss": 0.0626, "theoretical_loss": 3.4394813321157733, "tokens_seen": 2141585408 }, { "epoch": 0.3, "learning_rate": 0.00035452940704485276, "loss": 0.0662, "theoretical_loss": 3.439464159583597, "tokens_seen": 2141716480 }, { "epoch": 0.3, "learning_rate": 0.00035448928829334833, "loss": 0.0681, "theoretical_loss": 3.4394469883965844, "tokens_seen": 2141847552 }, { "epoch": 0.3, "learning_rate": 0.00035444916954184384, "loss": 0.065, "theoretical_loss": 3.4394298185545487, "tokens_seen": 2141978624 }, { "epoch": 0.3, "learning_rate": 0.0003544090507903394, "loss": 0.0689, "theoretical_loss": 3.4394126500573012, "tokens_seen": 2142109696 }, { "epoch": 0.3, "learning_rate": 0.000354368932038835, "loss": 0.0652, "theoretical_loss": 3.439395482904655, "tokens_seen": 2142240768 }, { "epoch": 0.3, "learning_rate": 0.00035432881328733055, "loss": 0.0673, "theoretical_loss": 3.4393783170964225, "tokens_seen": 2142371840 }, { "epoch": 0.3, "learning_rate": 0.00035428869453582606, "loss": 0.0662, "theoretical_loss": 3.439361152632416, "tokens_seen": 2142502912 }, { "epoch": 0.3, "learning_rate": 0.00035424857578432163, "loss": 0.0689, "theoretical_loss": 3.439343989512448, "tokens_seen": 2142633984 }, { "epoch": 0.3, "learning_rate": 0.00035420845703281714, "loss": 0.0647, "theoretical_loss": 3.4393268277363305, "tokens_seen": 2142765056 }, { "epoch": 0.3, "learning_rate": 0.00035416833828131266, "loss": 0.0669, "theoretical_loss": 3.4393096673038777, "tokens_seen": 2142896128 }, { "epoch": 0.3, "learning_rate": 0.0003541282195298082, "loss": 0.0683, "theoretical_loss": 3.4392925082149004, "tokens_seen": 2143027200 }, { "epoch": 0.3, "learning_rate": 0.0003540881007783038, "loss": 0.0625, "theoretical_loss": 3.439275350469212, "tokens_seen": 2143158272 }, { "epoch": 0.3, "learning_rate": 0.0003540479820267993, "loss": 0.0677, "theoretical_loss": 3.4392581940666256, "tokens_seen": 2143289344 }, { "epoch": 0.3, "learning_rate": 0.0003540078632752949, "loss": 0.0676, "theoretical_loss": 3.4392410390069537, "tokens_seen": 2143420416 }, { "epoch": 0.3, "learning_rate": 0.00035396774452379045, "loss": 0.069, "theoretical_loss": 3.4392238852900086, "tokens_seen": 2143551488 }, { "epoch": 0.3, "learning_rate": 0.000353927625772286, "loss": 0.0643, "theoretical_loss": 3.4392067329156033, "tokens_seen": 2143682560 }, { "epoch": 0.3, "learning_rate": 0.00035388750702078153, "loss": 0.0675, "theoretical_loss": 3.4391895818835514, "tokens_seen": 2143813632 }, { "epoch": 0.3, "learning_rate": 0.0003538473882692771, "loss": 0.0675, "theoretical_loss": 3.439172432193665, "tokens_seen": 2143944704 }, { "epoch": 0.3, "learning_rate": 0.0003538072695177726, "loss": 0.0675, "theoretical_loss": 3.439155283845757, "tokens_seen": 2144075776 }, { "epoch": 0.3, "learning_rate": 0.0003537671507662681, "loss": 0.0675, "theoretical_loss": 3.4391381368396408, "tokens_seen": 2144206848 }, { "epoch": 0.3, "learning_rate": 0.0003537270320147637, "loss": 0.0685, "theoretical_loss": 3.4391209911751286, "tokens_seen": 2144337920 }, { "epoch": 0.3, "learning_rate": 0.00035368691326325926, "loss": 0.0656, "theoretical_loss": 3.439103846852035, "tokens_seen": 2144468992 }, { "epoch": 0.3, "learning_rate": 0.0003536467945117548, "loss": 0.067, "theoretical_loss": 3.4390867038701716, "tokens_seen": 2144600064 }, { "epoch": 0.3, "objective/train/advantage_avg": 0.0011193029349669814, "objective/train/docs_used": 780961, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.1437392234802246, "objective/train/original_loss": 1.1437389850616455, "objective/train/theoretical_loss": 3.439069562229352, "objective/train/tokens_used": 515255776, "objective/train/value_avg": -0.0058746337890625, "objective/train/value_loss": 6.647661939496174e-05, "objective/train/value_max": -7.545948028564453e-05, "objective/train/value_min": -0.33203125, "objective/train/value_reward_corr": 0.7808518363277379, "objective/train/value_std": 0.01025390625, "objective/train/weight_avg": 1.0011521577835083, "objective/train/weighted_lm_loss": 1.145405888557434, "objective/train/weights_max": 1.1798231601715088, "objective/train/weights_min": 0.8373921513557434, "theoretical_loss": 3.439069562229352, "tokens_seen": 2144731136 }, { "epoch": 0.3, "learning_rate": 0.00035360667576025034, "loss": 0.0647, "theoretical_loss": 3.439069562229352, "tokens_seen": 2144731136 }, { "epoch": 0.3, "learning_rate": 0.0003535665570087459, "loss": 0.0672, "theoretical_loss": 3.439052421929389, "tokens_seen": 2144862208 }, { "epoch": 0.3, "learning_rate": 0.0003535264382572415, "loss": 0.0688, "theoretical_loss": 3.4390352829700963, "tokens_seen": 2144993280 }, { "epoch": 0.3, "learning_rate": 0.000353486319505737, "loss": 0.0681, "theoretical_loss": 3.439018145351287, "tokens_seen": 2145124352 }, { "epoch": 0.3, "learning_rate": 0.00035344620075423256, "loss": 0.0694, "theoretical_loss": 3.4390010090727743, "tokens_seen": 2145255424 }, { "epoch": 0.3, "learning_rate": 0.0003534060820027281, "loss": 0.063, "theoretical_loss": 3.4389838741343715, "tokens_seen": 2145386496 }, { "epoch": 0.3, "learning_rate": 0.0003533659632512236, "loss": 0.0671, "theoretical_loss": 3.438966740535892, "tokens_seen": 2145517568 }, { "epoch": 0.3, "learning_rate": 0.00035332584449971916, "loss": 0.0648, "theoretical_loss": 3.438949608277149, "tokens_seen": 2145648640 }, { "epoch": 0.3, "learning_rate": 0.00035328572574821473, "loss": 0.0644, "theoretical_loss": 3.438932477357956, "tokens_seen": 2145779712 }, { "epoch": 0.3, "learning_rate": 0.00035324560699671024, "loss": 0.0663, "theoretical_loss": 3.438915347778127, "tokens_seen": 2145910784 }, { "epoch": 0.3, "learning_rate": 0.0003532054882452058, "loss": 0.0652, "theoretical_loss": 3.4388982195374744, "tokens_seen": 2146041856 }, { "epoch": 0.3, "learning_rate": 0.0003531653694937014, "loss": 0.0635, "theoretical_loss": 3.4388810926358127, "tokens_seen": 2146172928 }, { "epoch": 0.3, "learning_rate": 0.00035312525074219695, "loss": 0.0711, "theoretical_loss": 3.4388639670729546, "tokens_seen": 2146304000 }, { "epoch": 0.3, "learning_rate": 0.00035308513199069246, "loss": 0.0652, "theoretical_loss": 3.4388468428487142, "tokens_seen": 2146435072 }, { "epoch": 0.3, "learning_rate": 0.00035304501323918803, "loss": 0.0629, "theoretical_loss": 3.4388297199629054, "tokens_seen": 2146566144 }, { "epoch": 0.3, "learning_rate": 0.00035300489448768355, "loss": 0.0671, "theoretical_loss": 3.4388125984153413, "tokens_seen": 2146697216 }, { "epoch": 0.3, "learning_rate": 0.00035296477573617906, "loss": 0.0684, "theoretical_loss": 3.4387954782058356, "tokens_seen": 2146828288 }, { "epoch": 0.3, "learning_rate": 0.00035292465698467463, "loss": 0.0686, "theoretical_loss": 3.4387783593342025, "tokens_seen": 2146959360 }, { "epoch": 0.3, "learning_rate": 0.0003528845382331702, "loss": 0.0658, "theoretical_loss": 3.4387612418002558, "tokens_seen": 2147090432 }, { "epoch": 0.3, "learning_rate": 0.0003528444194816657, "loss": 0.0669, "theoretical_loss": 3.438744125603809, "tokens_seen": 2147221504 }, { "epoch": 0.3, "learning_rate": 0.0003528043007301613, "loss": 0.072, "theoretical_loss": 3.4387270107446763, "tokens_seen": 2147352576 }, { "epoch": 0.3, "learning_rate": 0.00035276418197865685, "loss": 0.0684, "theoretical_loss": 3.4387098972226706, "tokens_seen": 2147483648 }, { "epoch": 0.3, "learning_rate": 0.0003527240632271524, "loss": 0.0696, "theoretical_loss": 3.4386927850376074, "tokens_seen": 2147614720 }, { "epoch": 0.3, "learning_rate": 0.00035268394447564793, "loss": 0.0687, "theoretical_loss": 3.4386756741892994, "tokens_seen": 2147745792 }, { "epoch": 0.3, "learning_rate": 0.0003526438257241435, "loss": 0.0672, "theoretical_loss": 3.438658564677561, "tokens_seen": 2147876864 }, { "epoch": 0.3, "objective/train/advantage_avg": -0.001209654496051371, "objective/train/docs_used": 782164, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2226568460464478, "objective/train/original_loss": 1.2226567268371582, "objective/train/theoretical_loss": 3.4386414565022063, "objective/train/tokens_used": 518532576, "objective/train/value_avg": -0.009002685546875, "objective/train/value_loss": 0.00036885784356854856, "objective/train/value_max": -3.11732292175293e-05, "objective/train/value_min": -0.350341796875, "objective/train/value_reward_corr": 0.7072263967585593, "objective/train/value_std": 0.0179901123046875, "objective/train/weight_avg": 0.998958170413971, "objective/train/weighted_lm_loss": 1.2218875885009766, "objective/train/weights_max": 1.241788625717163, "objective/train/weights_min": 0.389484167098999, "theoretical_loss": 3.4386414565022063, "tokens_seen": 2148007936 }, { "epoch": 0.3, "learning_rate": 0.000352603706972639, "loss": 0.0681, "theoretical_loss": 3.4386414565022063, "tokens_seen": 2148007936 }, { "epoch": 0.3, "learning_rate": 0.0003525635882211345, "loss": 0.0639, "theoretical_loss": 3.4386243496630495, "tokens_seen": 2148139008 }, { "epoch": 0.3, "learning_rate": 0.0003525234694696301, "loss": 0.0716, "theoretical_loss": 3.438607244159905, "tokens_seen": 2148270080 }, { "epoch": 0.3, "learning_rate": 0.00035248335071812566, "loss": 0.0682, "theoretical_loss": 3.438590139992586, "tokens_seen": 2148401152 }, { "epoch": 0.3, "learning_rate": 0.0003524432319666212, "loss": 0.0662, "theoretical_loss": 3.4385730371609076, "tokens_seen": 2148532224 }, { "epoch": 0.3, "learning_rate": 0.00035240311321511675, "loss": 0.0671, "theoretical_loss": 3.4385559356646835, "tokens_seen": 2148663296 }, { "epoch": 0.3, "learning_rate": 0.0003523629944636123, "loss": 0.0666, "theoretical_loss": 3.438538835503728, "tokens_seen": 2148794368 }, { "epoch": 0.3, "learning_rate": 0.0003523228757121079, "loss": 0.0685, "theoretical_loss": 3.4385217366778558, "tokens_seen": 2148925440 }, { "epoch": 0.3, "learning_rate": 0.0003522827569606034, "loss": 0.0669, "theoretical_loss": 3.438504639186881, "tokens_seen": 2149056512 }, { "epoch": 0.3, "learning_rate": 0.00035224263820909896, "loss": 0.0702, "theoretical_loss": 3.438487543030618, "tokens_seen": 2149187584 }, { "epoch": 0.3, "learning_rate": 0.0003522025194575945, "loss": 0.0624, "theoretical_loss": 3.4384704482088813, "tokens_seen": 2149318656 }, { "epoch": 0.3, "learning_rate": 0.00035216240070609, "loss": 0.0706, "theoretical_loss": 3.4384533547214846, "tokens_seen": 2149449728 }, { "epoch": 0.3, "learning_rate": 0.00035212228195458556, "loss": 0.0661, "theoretical_loss": 3.4384362625682434, "tokens_seen": 2149580800 }, { "epoch": 0.3, "learning_rate": 0.00035208216320308113, "loss": 0.0679, "theoretical_loss": 3.438419171748972, "tokens_seen": 2149711872 }, { "epoch": 0.3, "learning_rate": 0.00035204204445157664, "loss": 0.0694, "theoretical_loss": 3.438402082263485, "tokens_seen": 2149842944 }, { "epoch": 0.3, "learning_rate": 0.0003520019257000722, "loss": 0.0681, "theoretical_loss": 3.4383849941115967, "tokens_seen": 2149974016 }, { "epoch": 0.3, "learning_rate": 0.0003519618069485678, "loss": 0.0653, "theoretical_loss": 3.4383679072931215, "tokens_seen": 2150105088 }, { "epoch": 0.3, "learning_rate": 0.00035192168819706335, "loss": 0.0705, "theoretical_loss": 3.438350821807875, "tokens_seen": 2150236160 }, { "epoch": 0.3, "learning_rate": 0.00035188156944555886, "loss": 0.0683, "theoretical_loss": 3.438333737655671, "tokens_seen": 2150367232 }, { "epoch": 0.3, "learning_rate": 0.00035184145069405443, "loss": 0.0702, "theoretical_loss": 3.438316654836324, "tokens_seen": 2150498304 }, { "epoch": 0.3, "learning_rate": 0.00035180133194254995, "loss": 0.0699, "theoretical_loss": 3.43829957334965, "tokens_seen": 2150629376 }, { "epoch": 0.3, "learning_rate": 0.00035176121319104546, "loss": 0.0674, "theoretical_loss": 3.438282493195463, "tokens_seen": 2150760448 }, { "epoch": 0.3, "learning_rate": 0.00035172109443954103, "loss": 0.0639, "theoretical_loss": 3.4382654143735785, "tokens_seen": 2150891520 }, { "epoch": 0.3, "learning_rate": 0.0003516809756880366, "loss": 0.0693, "theoretical_loss": 3.4382483368838113, "tokens_seen": 2151022592 }, { "epoch": 0.3, "learning_rate": 0.00035164085693653217, "loss": 0.0705, "theoretical_loss": 3.438231260725975, "tokens_seen": 2151153664 }, { "epoch": 0.3, "objective/train/advantage_avg": 0.001492616138420999, "objective/train/docs_used": 783310, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4524009227752686, "objective/train/original_loss": 1.452401041984558, "objective/train/theoretical_loss": 3.438214185899886, "objective/train/tokens_used": 521809376, "objective/train/value_avg": -0.01131439208984375, "objective/train/value_loss": 0.0004004965885542333, "objective/train/value_max": -4.649162292480469e-05, "objective/train/value_min": -0.82080078125, "objective/train/value_reward_corr": 0.6865380048474146, "objective/train/value_std": 0.0195159912109375, "objective/train/weight_avg": 1.0016696453094482, "objective/train/weighted_lm_loss": 1.4541211128234863, "objective/train/weights_max": 1.429987907409668, "objective/train/weights_min": 0.3730887472629547, "theoretical_loss": 3.438214185899886, "tokens_seen": 2151284736 }, { "epoch": 0.3, "learning_rate": 0.0003516007381850277, "loss": 0.0713, "theoretical_loss": 3.438214185899886, "tokens_seen": 2151284736 }, { "epoch": 0.3, "learning_rate": 0.00035156061943352325, "loss": 0.0709, "theoretical_loss": 3.4381971124053594, "tokens_seen": 2151415808 }, { "epoch": 0.3, "learning_rate": 0.0003515205006820188, "loss": 0.0656, "theoretical_loss": 3.4381800402422087, "tokens_seen": 2151546880 }, { "epoch": 0.3, "learning_rate": 0.00035148038193051433, "loss": 0.0701, "theoretical_loss": 3.438162969410251, "tokens_seen": 2151677952 }, { "epoch": 0.3, "learning_rate": 0.0003514402631790099, "loss": 0.0699, "theoretical_loss": 3.4381458999093, "tokens_seen": 2151809024 }, { "epoch": 0.3, "learning_rate": 0.0003514001444275054, "loss": 0.0653, "theoretical_loss": 3.438128831739171, "tokens_seen": 2151940096 }, { "epoch": 0.3, "learning_rate": 0.00035136002567600093, "loss": 0.0686, "theoretical_loss": 3.4381117648996797, "tokens_seen": 2152071168 }, { "epoch": 0.3, "learning_rate": 0.0003513199069244965, "loss": 0.0655, "theoretical_loss": 3.4380946993906414, "tokens_seen": 2152202240 }, { "epoch": 0.3, "learning_rate": 0.00035127978817299206, "loss": 0.0658, "theoretical_loss": 3.438077635211871, "tokens_seen": 2152333312 }, { "epoch": 0.3, "learning_rate": 0.00035123966942148763, "loss": 0.0672, "theoretical_loss": 3.4380605723631836, "tokens_seen": 2152464384 }, { "epoch": 0.3, "learning_rate": 0.00035119955066998315, "loss": 0.066, "theoretical_loss": 3.4380435108443947, "tokens_seen": 2152595456 }, { "epoch": 0.3, "learning_rate": 0.0003511594319184787, "loss": 0.0682, "theoretical_loss": 3.4380264506553204, "tokens_seen": 2152726528 }, { "epoch": 0.3, "learning_rate": 0.0003511193131669743, "loss": 0.0683, "theoretical_loss": 3.4380093917957755, "tokens_seen": 2152857600 }, { "epoch": 0.3, "learning_rate": 0.0003510791944154698, "loss": 0.0696, "theoretical_loss": 3.4379923342655747, "tokens_seen": 2152988672 }, { "epoch": 0.3, "learning_rate": 0.00035103907566396537, "loss": 0.0668, "theoretical_loss": 3.4379752780645347, "tokens_seen": 2153119744 }, { "epoch": 0.31, "learning_rate": 0.0003509989569124609, "loss": 0.0676, "theoretical_loss": 3.437958223192471, "tokens_seen": 2153250816 }, { "epoch": 0.31, "learning_rate": 0.0003509588381609564, "loss": 0.0651, "theoretical_loss": 3.437941169649198, "tokens_seen": 2153381888 }, { "epoch": 0.31, "learning_rate": 0.00035091871940945196, "loss": 0.0688, "theoretical_loss": 3.4379241174345325, "tokens_seen": 2153512960 }, { "epoch": 0.31, "learning_rate": 0.00035087860065794753, "loss": 0.0684, "theoretical_loss": 3.4379070665482896, "tokens_seen": 2153644032 }, { "epoch": 0.31, "learning_rate": 0.0003508384819064431, "loss": 0.0705, "theoretical_loss": 3.437890016990285, "tokens_seen": 2153775104 }, { "epoch": 0.31, "learning_rate": 0.0003507983631549386, "loss": 0.0675, "theoretical_loss": 3.4378729687603347, "tokens_seen": 2153906176 }, { "epoch": 0.31, "learning_rate": 0.0003507582444034342, "loss": 0.0625, "theoretical_loss": 3.4378559218582536, "tokens_seen": 2154037248 }, { "epoch": 0.31, "learning_rate": 0.00035071812565192975, "loss": 0.0698, "theoretical_loss": 3.4378388762838585, "tokens_seen": 2154168320 }, { "epoch": 0.31, "learning_rate": 0.00035067800690042526, "loss": 0.0659, "theoretical_loss": 3.4378218320369647, "tokens_seen": 2154299392 }, { "epoch": 0.31, "learning_rate": 0.00035063788814892083, "loss": 0.0684, "theoretical_loss": 3.437804789117388, "tokens_seen": 2154430464 }, { "epoch": 0.31, "objective/train/advantage_avg": -0.00012075314589310437, "objective/train/docs_used": 784476, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2525418996810913, "objective/train/original_loss": 1.2525418996810913, "objective/train/theoretical_loss": 3.437787747524945, "objective/train/tokens_used": 525086176, "objective/train/value_avg": -0.007205963134765625, "objective/train/value_loss": 0.0001527634885860607, "objective/train/value_max": -6.711483001708984e-05, "objective/train/value_min": -0.5751953125, "objective/train/value_reward_corr": 0.7457801653177916, "objective/train/value_std": 0.01293182373046875, "objective/train/weight_avg": 0.999952495098114, "objective/train/weighted_lm_loss": 1.2511646747589111, "objective/train/weights_max": 1.1854642629623413, "objective/train/weights_min": 0.5366953611373901, "theoretical_loss": 3.437787747524945, "tokens_seen": 2154561536 }, { "epoch": 0.31, "learning_rate": 0.00035059776939741635, "loss": 0.0677, "theoretical_loss": 3.437787747524945, "tokens_seen": 2154561536 }, { "epoch": 0.31, "learning_rate": 0.00035055765064591186, "loss": 0.0637, "theoretical_loss": 3.4377707072594506, "tokens_seen": 2154692608 }, { "epoch": 0.31, "learning_rate": 0.00035051753189440743, "loss": 0.0645, "theoretical_loss": 3.4377536683207217, "tokens_seen": 2154823680 }, { "epoch": 0.31, "learning_rate": 0.000350477413142903, "loss": 0.0626, "theoretical_loss": 3.4377366307085735, "tokens_seen": 2154954752 }, { "epoch": 0.31, "learning_rate": 0.00035043729439139857, "loss": 0.0685, "theoretical_loss": 3.4377195944228225, "tokens_seen": 2155085824 }, { "epoch": 0.31, "learning_rate": 0.0003503971756398941, "loss": 0.0687, "theoretical_loss": 3.4377025594632844, "tokens_seen": 2155216896 }, { "epoch": 0.31, "learning_rate": 0.00035035705688838965, "loss": 0.0672, "theoretical_loss": 3.4376855258297763, "tokens_seen": 2155347968 }, { "epoch": 0.31, "learning_rate": 0.0003503169381368852, "loss": 0.0688, "theoretical_loss": 3.437668493522113, "tokens_seen": 2155479040 }, { "epoch": 0.31, "learning_rate": 0.00035027681938538073, "loss": 0.0661, "theoretical_loss": 3.4376514625401113, "tokens_seen": 2155610112 }, { "epoch": 0.31, "learning_rate": 0.0003502367006338763, "loss": 0.0644, "theoretical_loss": 3.4376344328835877, "tokens_seen": 2155741184 }, { "epoch": 0.31, "learning_rate": 0.0003501965818823718, "loss": 0.0659, "theoretical_loss": 3.437617404552358, "tokens_seen": 2155872256 }, { "epoch": 0.31, "learning_rate": 0.00035015646313086733, "loss": 0.0659, "theoretical_loss": 3.4376003775462394, "tokens_seen": 2156003328 }, { "epoch": 0.31, "learning_rate": 0.0003501163443793629, "loss": 0.062, "theoretical_loss": 3.4375833518650465, "tokens_seen": 2156134400 }, { "epoch": 0.31, "learning_rate": 0.00035007622562785847, "loss": 0.0669, "theoretical_loss": 3.4375663275085975, "tokens_seen": 2156265472 }, { "epoch": 0.31, "learning_rate": 0.00035003610687635403, "loss": 0.0647, "theoretical_loss": 3.4375493044767076, "tokens_seen": 2156396544 }, { "epoch": 0.31, "learning_rate": 0.00034999598812484955, "loss": 0.0663, "theoretical_loss": 3.437532282769194, "tokens_seen": 2156527616 }, { "epoch": 0.31, "learning_rate": 0.0003499558693733451, "loss": 0.0661, "theoretical_loss": 3.4375152623858725, "tokens_seen": 2156658688 }, { "epoch": 0.31, "learning_rate": 0.0003499157506218407, "loss": 0.0697, "theoretical_loss": 3.43749824332656, "tokens_seen": 2156789760 }, { "epoch": 0.31, "learning_rate": 0.0003498756318703362, "loss": 0.0665, "theoretical_loss": 3.437481225591073, "tokens_seen": 2156920832 }, { "epoch": 0.31, "learning_rate": 0.00034983551311883177, "loss": 0.0644, "theoretical_loss": 3.437464209179228, "tokens_seen": 2157051904 }, { "epoch": 0.31, "learning_rate": 0.0003497953943673273, "loss": 0.066, "theoretical_loss": 3.4374471940908418, "tokens_seen": 2157182976 }, { "epoch": 0.31, "learning_rate": 0.0003497552756158228, "loss": 0.0703, "theoretical_loss": 3.437430180325731, "tokens_seen": 2157314048 }, { "epoch": 0.31, "learning_rate": 0.00034971515686431836, "loss": 0.0665, "theoretical_loss": 3.4374131678837125, "tokens_seen": 2157445120 }, { "epoch": 0.31, "learning_rate": 0.00034967503811281393, "loss": 0.0637, "theoretical_loss": 3.4373961567646028, "tokens_seen": 2157576192 }, { "epoch": 0.31, "learning_rate": 0.0003496349193613095, "loss": 0.0667, "theoretical_loss": 3.4373791469682184, "tokens_seen": 2157707264 }, { "epoch": 0.31, "objective/train/advantage_avg": 0.0009003663435578346, "objective/train/docs_used": 785665, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.335235357284546, "objective/train/original_loss": 1.335235357284546, "objective/train/theoretical_loss": 3.4373621384943767, "objective/train/tokens_used": 528362976, "objective/train/value_avg": -0.008453369140625, "objective/train/value_loss": 0.00010689553892007098, "objective/train/value_max": -2.586841583251953e-05, "objective/train/value_min": -0.282470703125, "objective/train/value_reward_corr": 0.7552708120086591, "objective/train/value_std": 0.0130615234375, "objective/train/weight_avg": 1.000953197479248, "objective/train/weighted_lm_loss": 1.3362016677856445, "objective/train/weights_max": 1.0982791185379028, "objective/train/weights_min": 0.723071813583374, "theoretical_loss": 3.4373621384943767, "tokens_seen": 2157838336 }, { "epoch": 0.31, "learning_rate": 0.000349594800609805, "loss": 0.0688, "theoretical_loss": 3.4373621384943767, "tokens_seen": 2157838336 }, { "epoch": 0.31, "learning_rate": 0.0003495546818583006, "loss": 0.0645, "theoretical_loss": 3.437345131342894, "tokens_seen": 2157969408 }, { "epoch": 0.31, "learning_rate": 0.00034951456310679615, "loss": 0.0684, "theoretical_loss": 3.4373281255135875, "tokens_seen": 2158100480 }, { "epoch": 0.31, "learning_rate": 0.00034947444435529167, "loss": 0.0683, "theoretical_loss": 3.437311121006274, "tokens_seen": 2158231552 }, { "epoch": 0.31, "learning_rate": 0.00034943432560378723, "loss": 0.0674, "theoretical_loss": 3.4372941178207705, "tokens_seen": 2158362624 }, { "epoch": 0.31, "learning_rate": 0.00034939420685228275, "loss": 0.068, "theoretical_loss": 3.4372771159568942, "tokens_seen": 2158493696 }, { "epoch": 0.31, "learning_rate": 0.00034935408810077826, "loss": 0.0673, "theoretical_loss": 3.4372601154144617, "tokens_seen": 2158624768 }, { "epoch": 0.31, "learning_rate": 0.00034931396934927383, "loss": 0.0679, "theoretical_loss": 3.43724311619329, "tokens_seen": 2158755840 }, { "epoch": 0.31, "learning_rate": 0.0003492738505977694, "loss": 0.064, "theoretical_loss": 3.4372261182931974, "tokens_seen": 2158886912 }, { "epoch": 0.31, "learning_rate": 0.00034923373184626497, "loss": 0.0664, "theoretical_loss": 3.437209121713999, "tokens_seen": 2159017984 }, { "epoch": 0.31, "learning_rate": 0.0003491936130947605, "loss": 0.0673, "theoretical_loss": 3.437192126455514, "tokens_seen": 2159149056 }, { "epoch": 0.31, "learning_rate": 0.00034915349434325605, "loss": 0.0687, "theoretical_loss": 3.4371751325175586, "tokens_seen": 2159280128 }, { "epoch": 0.31, "learning_rate": 0.0003491133755917516, "loss": 0.0688, "theoretical_loss": 3.4371581398999496, "tokens_seen": 2159411200 }, { "epoch": 0.31, "learning_rate": 0.00034907325684024713, "loss": 0.0679, "theoretical_loss": 3.437141148602505, "tokens_seen": 2159542272 }, { "epoch": 0.31, "learning_rate": 0.0003490331380887427, "loss": 0.0703, "theoretical_loss": 3.437124158625042, "tokens_seen": 2159673344 }, { "epoch": 0.31, "learning_rate": 0.00034899301933723827, "loss": 0.0686, "theoretical_loss": 3.437107169967378, "tokens_seen": 2159804416 }, { "epoch": 0.31, "learning_rate": 0.0003489529005857338, "loss": 0.0662, "theoretical_loss": 3.4370901826293303, "tokens_seen": 2159935488 }, { "epoch": 0.31, "learning_rate": 0.0003489127818342293, "loss": 0.0673, "theoretical_loss": 3.437073196610716, "tokens_seen": 2160066560 }, { "epoch": 0.31, "learning_rate": 0.00034887266308272487, "loss": 0.0681, "theoretical_loss": 3.4370562119113535, "tokens_seen": 2160197632 }, { "epoch": 0.31, "learning_rate": 0.00034883254433122043, "loss": 0.0736, "theoretical_loss": 3.4370392285310594, "tokens_seen": 2160328704 }, { "epoch": 0.31, "learning_rate": 0.00034879242557971595, "loss": 0.0656, "theoretical_loss": 3.437022246469651, "tokens_seen": 2160459776 }, { "epoch": 0.31, "learning_rate": 0.0003487523068282115, "loss": 0.0694, "theoretical_loss": 3.437005265726947, "tokens_seen": 2160590848 }, { "epoch": 0.31, "learning_rate": 0.0003487121880767071, "loss": 0.0658, "theoretical_loss": 3.436988286302764, "tokens_seen": 2160721920 }, { "epoch": 0.31, "learning_rate": 0.0003486720693252026, "loss": 0.0678, "theoretical_loss": 3.4369713081969206, "tokens_seen": 2160852992 }, { "epoch": 0.31, "learning_rate": 0.00034863195057369817, "loss": 0.065, "theoretical_loss": 3.4369543314092335, "tokens_seen": 2160984064 }, { "epoch": 0.31, "objective/train/advantage_avg": -0.001137944869697094, "objective/train/docs_used": 786923, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3575602769851685, "objective/train/original_loss": 1.357560157775879, "objective/train/theoretical_loss": 3.436937355939521, "objective/train/tokens_used": 531639776, "objective/train/value_avg": -0.00933837890625, "objective/train/value_loss": 0.000332461204379797, "objective/train/value_max": -3.647804260253906e-05, "objective/train/value_min": -0.481201171875, "objective/train/value_reward_corr": 0.6877560438293095, "objective/train/value_std": 0.0165863037109375, "objective/train/weight_avg": 0.9990164041519165, "objective/train/weighted_lm_loss": 1.3555333614349365, "objective/train/weights_max": 1.5724419355392456, "objective/train/weights_min": 0.37430042028427124, "theoretical_loss": 3.436937355939521, "tokens_seen": 2161115136 }, { "epoch": 0.31, "learning_rate": 0.00034859183182219374, "loss": 0.0661, "theoretical_loss": 3.436937355939521, "tokens_seen": 2161115136 }, { "epoch": 0.31, "learning_rate": 0.00034855171307068925, "loss": 0.0659, "theoretical_loss": 3.436920381787601, "tokens_seen": 2161246208 }, { "epoch": 0.31, "learning_rate": 0.00034851159431918477, "loss": 0.0688, "theoretical_loss": 3.4369034089532904, "tokens_seen": 2161377280 }, { "epoch": 0.31, "learning_rate": 0.00034847147556768033, "loss": 0.0655, "theoretical_loss": 3.436886437436408, "tokens_seen": 2161508352 }, { "epoch": 0.31, "learning_rate": 0.0003484313568161759, "loss": 0.0717, "theoretical_loss": 3.436869467236771, "tokens_seen": 2161639424 }, { "epoch": 0.31, "learning_rate": 0.0003483912380646714, "loss": 0.066, "theoretical_loss": 3.436852498354198, "tokens_seen": 2161770496 }, { "epoch": 0.31, "learning_rate": 0.000348351119313167, "loss": 0.0655, "theoretical_loss": 3.4368355307885063, "tokens_seen": 2161901568 }, { "epoch": 0.31, "learning_rate": 0.00034831100056166255, "loss": 0.0697, "theoretical_loss": 3.436818564539514, "tokens_seen": 2162032640 }, { "epoch": 0.31, "learning_rate": 0.00034827088181015807, "loss": 0.0646, "theoretical_loss": 3.436801599607039, "tokens_seen": 2162163712 }, { "epoch": 0.31, "learning_rate": 0.00034823076305865364, "loss": 0.0679, "theoretical_loss": 3.4367846359909002, "tokens_seen": 2162294784 }, { "epoch": 0.31, "learning_rate": 0.0003481906443071492, "loss": 0.0654, "theoretical_loss": 3.4367676736909143, "tokens_seen": 2162425856 }, { "epoch": 0.31, "learning_rate": 0.0003481505255556447, "loss": 0.0682, "theoretical_loss": 3.4367507127069006, "tokens_seen": 2162556928 }, { "epoch": 0.31, "learning_rate": 0.00034811040680414023, "loss": 0.0714, "theoretical_loss": 3.4367337530386766, "tokens_seen": 2162688000 }, { "epoch": 0.31, "learning_rate": 0.0003480702880526358, "loss": 0.0667, "theoretical_loss": 3.4367167946860606, "tokens_seen": 2162819072 }, { "epoch": 0.31, "learning_rate": 0.00034803016930113137, "loss": 0.0664, "theoretical_loss": 3.436699837648871, "tokens_seen": 2162950144 }, { "epoch": 0.31, "learning_rate": 0.0003479900505496269, "loss": 0.0662, "theoretical_loss": 3.4366828819269264, "tokens_seen": 2163081216 }, { "epoch": 0.31, "learning_rate": 0.00034794993179812245, "loss": 0.0685, "theoretical_loss": 3.4366659275200444, "tokens_seen": 2163212288 }, { "epoch": 0.31, "learning_rate": 0.000347909813046618, "loss": 0.0716, "theoretical_loss": 3.436648974428043, "tokens_seen": 2163343360 }, { "epoch": 0.31, "learning_rate": 0.00034786969429511353, "loss": 0.0619, "theoretical_loss": 3.436632022650742, "tokens_seen": 2163474432 }, { "epoch": 0.31, "learning_rate": 0.0003478295755436091, "loss": 0.0687, "theoretical_loss": 3.4366150721879585, "tokens_seen": 2163605504 }, { "epoch": 0.31, "learning_rate": 0.00034778945679210467, "loss": 0.0676, "theoretical_loss": 3.4365981230395115, "tokens_seen": 2163736576 }, { "epoch": 0.31, "learning_rate": 0.0003477493380406002, "loss": 0.0674, "theoretical_loss": 3.4365811752052196, "tokens_seen": 2163867648 }, { "epoch": 0.31, "learning_rate": 0.0003477092192890957, "loss": 0.0675, "theoretical_loss": 3.436564228684901, "tokens_seen": 2163998720 }, { "epoch": 0.31, "learning_rate": 0.00034766910053759127, "loss": 0.069, "theoretical_loss": 3.436547283478374, "tokens_seen": 2164129792 }, { "epoch": 0.31, "learning_rate": 0.00034762898178608684, "loss": 0.0691, "theoretical_loss": 3.436530339585458, "tokens_seen": 2164260864 }, { "epoch": 0.31, "objective/train/advantage_avg": 0.000743912416510284, "objective/train/docs_used": 788153, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3687949180603027, "objective/train/original_loss": 1.3687947988510132, "objective/train/theoretical_loss": 3.436513397005971, "objective/train/tokens_used": 534916576, "objective/train/value_avg": -0.006320953369140625, "objective/train/value_loss": 0.00011728073877748102, "objective/train/value_max": -2.3186206817626953e-05, "objective/train/value_min": -0.212890625, "objective/train/value_reward_corr": 0.6775233318491605, "objective/train/value_std": 0.01026153564453125, "objective/train/weight_avg": 1.000798225402832, "objective/train/weighted_lm_loss": 1.369715690612793, "objective/train/weights_max": 1.1140283346176147, "objective/train/weights_min": 0.37257105112075806, "theoretical_loss": 3.436513397005971, "tokens_seen": 2164391936 }, { "epoch": 0.31, "learning_rate": 0.00034758886303458235, "loss": 0.0699, "theoretical_loss": 3.436513397005971, "tokens_seen": 2164391936 }, { "epoch": 0.31, "learning_rate": 0.0003475487442830779, "loss": 0.0717, "theoretical_loss": 3.4364964557397317, "tokens_seen": 2164523008 }, { "epoch": 0.31, "learning_rate": 0.0003475086255315735, "loss": 0.0672, "theoretical_loss": 3.436479515786559, "tokens_seen": 2164654080 }, { "epoch": 0.31, "learning_rate": 0.000347468506780069, "loss": 0.0676, "theoretical_loss": 3.436462577146272, "tokens_seen": 2164785152 }, { "epoch": 0.31, "learning_rate": 0.00034742838802856457, "loss": 0.0651, "theoretical_loss": 3.4364456398186882, "tokens_seen": 2164916224 }, { "epoch": 0.31, "learning_rate": 0.00034738826927706014, "loss": 0.0676, "theoretical_loss": 3.4364287038036276, "tokens_seen": 2165047296 }, { "epoch": 0.31, "learning_rate": 0.00034734815052555565, "loss": 0.0693, "theoretical_loss": 3.436411769100909, "tokens_seen": 2165178368 }, { "epoch": 0.31, "learning_rate": 0.00034730803177405117, "loss": 0.0751, "theoretical_loss": 3.4363948357103506, "tokens_seen": 2165309440 }, { "epoch": 0.31, "learning_rate": 0.00034726791302254673, "loss": 0.0642, "theoretical_loss": 3.4363779036317714, "tokens_seen": 2165440512 }, { "epoch": 0.31, "learning_rate": 0.0003472277942710423, "loss": 0.0661, "theoretical_loss": 3.436360972864991, "tokens_seen": 2165571584 }, { "epoch": 0.31, "learning_rate": 0.0003471876755195378, "loss": 0.0671, "theoretical_loss": 3.436344043409828, "tokens_seen": 2165702656 }, { "epoch": 0.31, "learning_rate": 0.0003471475567680334, "loss": 0.0653, "theoretical_loss": 3.4363271152661006, "tokens_seen": 2165833728 }, { "epoch": 0.31, "learning_rate": 0.00034710743801652895, "loss": 0.0674, "theoretical_loss": 3.4363101884336293, "tokens_seen": 2165964800 }, { "epoch": 0.31, "learning_rate": 0.00034706731926502447, "loss": 0.0671, "theoretical_loss": 3.4362932629122325, "tokens_seen": 2166095872 }, { "epoch": 0.31, "learning_rate": 0.00034702720051352004, "loss": 0.0678, "theoretical_loss": 3.436276338701729, "tokens_seen": 2166226944 }, { "epoch": 0.31, "learning_rate": 0.0003469870817620156, "loss": 0.0639, "theoretical_loss": 3.436259415801939, "tokens_seen": 2166358016 }, { "epoch": 0.31, "learning_rate": 0.0003469469630105111, "loss": 0.0665, "theoretical_loss": 3.43624249421268, "tokens_seen": 2166489088 }, { "epoch": 0.31, "learning_rate": 0.00034690684425900663, "loss": 0.0696, "theoretical_loss": 3.436225573933773, "tokens_seen": 2166620160 }, { "epoch": 0.31, "learning_rate": 0.0003468667255075022, "loss": 0.0666, "theoretical_loss": 3.4362086549650366, "tokens_seen": 2166751232 }, { "epoch": 0.31, "learning_rate": 0.00034682660675599777, "loss": 0.067, "theoretical_loss": 3.43619173730629, "tokens_seen": 2166882304 }, { "epoch": 0.31, "learning_rate": 0.0003467864880044933, "loss": 0.07, "theoretical_loss": 3.4361748209573517, "tokens_seen": 2167013376 }, { "epoch": 0.31, "learning_rate": 0.00034674636925298885, "loss": 0.0685, "theoretical_loss": 3.4361579059180425, "tokens_seen": 2167144448 }, { "epoch": 0.31, "learning_rate": 0.0003467062505014844, "loss": 0.0656, "theoretical_loss": 3.4361409921881814, "tokens_seen": 2167275520 }, { "epoch": 0.31, "learning_rate": 0.00034666613174998, "loss": 0.0681, "theoretical_loss": 3.4361240797675876, "tokens_seen": 2167406592 }, { "epoch": 0.31, "learning_rate": 0.0003466260129984755, "loss": 0.0666, "theoretical_loss": 3.4361071686560805, "tokens_seen": 2167537664 }, { "epoch": 0.31, "objective/train/advantage_avg": -0.0004292539961170405, "objective/train/docs_used": 789244, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3922314643859863, "objective/train/original_loss": 1.3922314643859863, "objective/train/theoretical_loss": 3.43609025885348, "objective/train/tokens_used": 538193376, "objective/train/value_avg": -0.006778717041015625, "objective/train/value_loss": 0.00028023633058182895, "objective/train/value_max": -3.916025161743164e-05, "objective/train/value_min": -0.587890625, "objective/train/value_reward_corr": 0.6057025640779168, "objective/train/value_std": 0.01245880126953125, "objective/train/weight_avg": 0.999692976474762, "objective/train/weighted_lm_loss": 1.3917301893234253, "objective/train/weights_max": 1.1953831911087036, "objective/train/weights_min": 0.3683098554611206, "theoretical_loss": 3.43609025885348, "tokens_seen": 2167668736 }, { "epoch": 0.31, "learning_rate": 0.00034658589424697107, "loss": 0.0686, "theoretical_loss": 3.43609025885348, "tokens_seen": 2167668736 }, { "epoch": 0.31, "learning_rate": 0.0003465457754954666, "loss": 0.0702, "theoretical_loss": 3.4360733503596053, "tokens_seen": 2167799808 }, { "epoch": 0.31, "learning_rate": 0.0003465056567439621, "loss": 0.0675, "theoretical_loss": 3.436056443174276, "tokens_seen": 2167930880 }, { "epoch": 0.31, "learning_rate": 0.00034646553799245767, "loss": 0.0645, "theoretical_loss": 3.4360395372973125, "tokens_seen": 2168061952 }, { "epoch": 0.31, "learning_rate": 0.00034642541924095324, "loss": 0.0705, "theoretical_loss": 3.436022632728533, "tokens_seen": 2168193024 }, { "epoch": 0.31, "learning_rate": 0.00034638530048944875, "loss": 0.0667, "theoretical_loss": 3.4360057294677584, "tokens_seen": 2168324096 }, { "epoch": 0.31, "learning_rate": 0.0003463451817379443, "loss": 0.0653, "theoretical_loss": 3.4359888275148083, "tokens_seen": 2168455168 }, { "epoch": 0.31, "learning_rate": 0.0003463050629864399, "loss": 0.0721, "theoretical_loss": 3.435971926869502, "tokens_seen": 2168586240 }, { "epoch": 0.31, "learning_rate": 0.00034626494423493546, "loss": 0.0676, "theoretical_loss": 3.4359550275316595, "tokens_seen": 2168717312 }, { "epoch": 0.31, "learning_rate": 0.00034622482548343097, "loss": 0.0695, "theoretical_loss": 3.4359381295011007, "tokens_seen": 2168848384 }, { "epoch": 0.31, "learning_rate": 0.00034618470673192654, "loss": 0.0718, "theoretical_loss": 3.4359212327776456, "tokens_seen": 2168979456 }, { "epoch": 0.31, "learning_rate": 0.00034614458798042205, "loss": 0.0668, "theoretical_loss": 3.435904337361114, "tokens_seen": 2169110528 }, { "epoch": 0.31, "learning_rate": 0.00034610446922891757, "loss": 0.0668, "theoretical_loss": 3.4358874432513256, "tokens_seen": 2169241600 }, { "epoch": 0.31, "learning_rate": 0.00034606435047741314, "loss": 0.0663, "theoretical_loss": 3.435870550448101, "tokens_seen": 2169372672 }, { "epoch": 0.31, "learning_rate": 0.0003460242317259087, "loss": 0.0689, "theoretical_loss": 3.435853658951259, "tokens_seen": 2169503744 }, { "epoch": 0.31, "learning_rate": 0.0003459841129744042, "loss": 0.0688, "theoretical_loss": 3.435836768760621, "tokens_seen": 2169634816 }, { "epoch": 0.32, "learning_rate": 0.0003459439942228998, "loss": 0.0706, "theoretical_loss": 3.4358198798760067, "tokens_seen": 2169765888 }, { "epoch": 0.32, "learning_rate": 0.00034590387547139536, "loss": 0.0666, "theoretical_loss": 3.435802992297236, "tokens_seen": 2169896960 }, { "epoch": 0.32, "learning_rate": 0.0003458637567198909, "loss": 0.066, "theoretical_loss": 3.4357861060241293, "tokens_seen": 2170028032 }, { "epoch": 0.32, "learning_rate": 0.00034582363796838644, "loss": 0.0736, "theoretical_loss": 3.435769221056506, "tokens_seen": 2170159104 }, { "epoch": 0.32, "learning_rate": 0.000345783519216882, "loss": 0.0688, "theoretical_loss": 3.4357523373941876, "tokens_seen": 2170290176 }, { "epoch": 0.32, "learning_rate": 0.0003457434004653775, "loss": 0.0671, "theoretical_loss": 3.435735455036994, "tokens_seen": 2170421248 }, { "epoch": 0.32, "learning_rate": 0.00034570328171387303, "loss": 0.0698, "theoretical_loss": 3.4357185739847447, "tokens_seen": 2170552320 }, { "epoch": 0.32, "learning_rate": 0.0003456631629623686, "loss": 0.063, "theoretical_loss": 3.435701694237261, "tokens_seen": 2170683392 }, { "epoch": 0.32, "learning_rate": 0.00034562304421086417, "loss": 0.0678, "theoretical_loss": 3.4356848157943625, "tokens_seen": 2170814464 }, { "epoch": 0.32, "objective/train/advantage_avg": 0.0020299660973250866, "objective/train/docs_used": 790517, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4230010509490967, "objective/train/original_loss": 1.4230010509490967, "objective/train/theoretical_loss": 3.4356679386558704, "objective/train/tokens_used": 541470176, "objective/train/value_avg": -0.01250457763671875, "objective/train/value_loss": 0.0004699077398981899, "objective/train/value_max": -1.621246337890625e-05, "objective/train/value_min": -0.498046875, "objective/train/value_reward_corr": 0.8818507949467509, "objective/train/value_std": 0.0443115234375, "objective/train/weight_avg": 1.0022624731063843, "objective/train/weighted_lm_loss": 1.4236948490142822, "objective/train/weights_max": 1.5358459949493408, "objective/train/weights_min": 0.3682536482810974, "theoretical_loss": 3.4356679386558704, "tokens_seen": 2170945536 }, { "epoch": 0.32, "learning_rate": 0.0003455829254593597, "loss": 0.0698, "theoretical_loss": 3.4356679386558704, "tokens_seen": 2170945536 }, { "epoch": 0.32, "learning_rate": 0.00034554280670785525, "loss": 0.0679, "theoretical_loss": 3.4356510628216044, "tokens_seen": 2171076608 }, { "epoch": 0.32, "learning_rate": 0.0003455026879563508, "loss": 0.0672, "theoretical_loss": 3.4356341882913854, "tokens_seen": 2171207680 }, { "epoch": 0.32, "learning_rate": 0.0003454625692048464, "loss": 0.0669, "theoretical_loss": 3.435617315065034, "tokens_seen": 2171338752 }, { "epoch": 0.32, "learning_rate": 0.0003454224504533419, "loss": 0.0705, "theoretical_loss": 3.435600443142371, "tokens_seen": 2171469824 }, { "epoch": 0.32, "learning_rate": 0.0003453823317018375, "loss": 0.0697, "theoretical_loss": 3.435583572523216, "tokens_seen": 2171600896 }, { "epoch": 0.32, "learning_rate": 0.000345342212950333, "loss": 0.0691, "theoretical_loss": 3.4355667032073898, "tokens_seen": 2171731968 }, { "epoch": 0.32, "learning_rate": 0.0003453020941988285, "loss": 0.066, "theoretical_loss": 3.4355498351947142, "tokens_seen": 2171863040 }, { "epoch": 0.32, "learning_rate": 0.00034526197544732407, "loss": 0.0697, "theoretical_loss": 3.435532968485009, "tokens_seen": 2171994112 }, { "epoch": 0.32, "learning_rate": 0.00034522185669581964, "loss": 0.0681, "theoretical_loss": 3.435516103078095, "tokens_seen": 2172125184 }, { "epoch": 0.32, "learning_rate": 0.00034518173794431515, "loss": 0.0648, "theoretical_loss": 3.435499238973793, "tokens_seen": 2172256256 }, { "epoch": 0.32, "learning_rate": 0.0003451416191928107, "loss": 0.0671, "theoretical_loss": 3.435482376171924, "tokens_seen": 2172387328 }, { "epoch": 0.32, "learning_rate": 0.0003451015004413063, "loss": 0.0674, "theoretical_loss": 3.4354655146723085, "tokens_seen": 2172518400 }, { "epoch": 0.32, "learning_rate": 0.00034506138168980186, "loss": 0.0697, "theoretical_loss": 3.435448654474768, "tokens_seen": 2172649472 }, { "epoch": 0.32, "learning_rate": 0.00034502126293829737, "loss": 0.068, "theoretical_loss": 3.435431795579123, "tokens_seen": 2172780544 }, { "epoch": 0.32, "learning_rate": 0.00034498114418679294, "loss": 0.0682, "theoretical_loss": 3.435414937985194, "tokens_seen": 2172911616 }, { "epoch": 0.32, "learning_rate": 0.00034494102543528845, "loss": 0.0662, "theoretical_loss": 3.4353980816928025, "tokens_seen": 2173042688 }, { "epoch": 0.32, "learning_rate": 0.00034490090668378397, "loss": 0.0683, "theoretical_loss": 3.43538122670177, "tokens_seen": 2173173760 }, { "epoch": 0.32, "learning_rate": 0.00034486078793227954, "loss": 0.067, "theoretical_loss": 3.4353643730119163, "tokens_seen": 2173304832 }, { "epoch": 0.32, "learning_rate": 0.0003448206691807751, "loss": 0.068, "theoretical_loss": 3.435347520623063, "tokens_seen": 2173435904 }, { "epoch": 0.32, "learning_rate": 0.0003447805504292706, "loss": 0.0707, "theoretical_loss": 3.4353306695350323, "tokens_seen": 2173566976 }, { "epoch": 0.32, "learning_rate": 0.0003447404316777662, "loss": 0.0699, "theoretical_loss": 3.435313819747644, "tokens_seen": 2173698048 }, { "epoch": 0.32, "learning_rate": 0.00034470031292626176, "loss": 0.0676, "theoretical_loss": 3.435296971260719, "tokens_seen": 2173829120 }, { "epoch": 0.32, "learning_rate": 0.0003446601941747573, "loss": 0.0703, "theoretical_loss": 3.4352801240740805, "tokens_seen": 2173960192 }, { "epoch": 0.32, "learning_rate": 0.00034462007542325284, "loss": 0.0662, "theoretical_loss": 3.4352632781875476, "tokens_seen": 2174091264 }, { "epoch": 0.32, "objective/train/advantage_avg": -0.0008815110777504742, "objective/train/docs_used": 791682, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3767930269241333, "objective/train/original_loss": 1.3767926692962646, "objective/train/theoretical_loss": 3.435246433600943, "objective/train/tokens_used": 544746976, "objective/train/value_avg": -0.00818634033203125, "objective/train/value_loss": 0.00027465668972581625, "objective/train/value_max": -6.604194641113281e-05, "objective/train/value_min": -0.470458984375, "objective/train/value_reward_corr": 0.6317106652483184, "objective/train/value_std": 0.01215362548828125, "objective/train/weight_avg": 0.9992392659187317, "objective/train/weighted_lm_loss": 1.376387596130371, "objective/train/weights_max": 1.354987621307373, "objective/train/weights_min": 0.36823222041130066, "theoretical_loss": 3.435246433600943, "tokens_seen": 2174222336 }, { "epoch": 0.32, "learning_rate": 0.0003445799566717484, "loss": 0.067, "theoretical_loss": 3.435246433600943, "tokens_seen": 2174222336 }, { "epoch": 0.32, "learning_rate": 0.0003445398379202439, "loss": 0.0653, "theoretical_loss": 3.4352295903140875, "tokens_seen": 2174353408 }, { "epoch": 0.32, "learning_rate": 0.00034449971916873944, "loss": 0.0728, "theoretical_loss": 3.4352127483268022, "tokens_seen": 2174484480 }, { "epoch": 0.32, "learning_rate": 0.000344459600417235, "loss": 0.0689, "theoretical_loss": 3.435195907638909, "tokens_seen": 2174615552 }, { "epoch": 0.32, "learning_rate": 0.00034441948166573057, "loss": 0.0673, "theoretical_loss": 3.4351790682502297, "tokens_seen": 2174746624 }, { "epoch": 0.32, "learning_rate": 0.0003443793629142261, "loss": 0.0666, "theoretical_loss": 3.435162230160585, "tokens_seen": 2174877696 }, { "epoch": 0.32, "learning_rate": 0.00034433924416272165, "loss": 0.0671, "theoretical_loss": 3.435145393369796, "tokens_seen": 2175008768 }, { "epoch": 0.32, "learning_rate": 0.0003442991254112172, "loss": 0.0675, "theoretical_loss": 3.4351285578776856, "tokens_seen": 2175139840 }, { "epoch": 0.32, "learning_rate": 0.0003442590066597128, "loss": 0.0704, "theoretical_loss": 3.4351117236840745, "tokens_seen": 2175270912 }, { "epoch": 0.32, "learning_rate": 0.0003442188879082083, "loss": 0.0639, "theoretical_loss": 3.4350948907887844, "tokens_seen": 2175401984 }, { "epoch": 0.32, "learning_rate": 0.0003441787691567039, "loss": 0.0657, "theoretical_loss": 3.4350780591916372, "tokens_seen": 2175533056 }, { "epoch": 0.32, "learning_rate": 0.0003441386504051994, "loss": 0.0659, "theoretical_loss": 3.435061228892454, "tokens_seen": 2175664128 }, { "epoch": 0.32, "learning_rate": 0.0003440985316536949, "loss": 0.0665, "theoretical_loss": 3.4350443998910576, "tokens_seen": 2175795200 }, { "epoch": 0.32, "learning_rate": 0.00034405841290219047, "loss": 0.0625, "theoretical_loss": 3.4350275721872685, "tokens_seen": 2175926272 }, { "epoch": 0.32, "learning_rate": 0.00034401829415068604, "loss": 0.0686, "theoretical_loss": 3.435010745780909, "tokens_seen": 2176057344 }, { "epoch": 0.32, "learning_rate": 0.0003439781753991816, "loss": 0.0661, "theoretical_loss": 3.434993920671802, "tokens_seen": 2176188416 }, { "epoch": 0.32, "learning_rate": 0.0003439380566476771, "loss": 0.0662, "theoretical_loss": 3.4349770968597677, "tokens_seen": 2176319488 }, { "epoch": 0.32, "learning_rate": 0.0003438979378961727, "loss": 0.0713, "theoretical_loss": 3.434960274344629, "tokens_seen": 2176450560 }, { "epoch": 0.32, "learning_rate": 0.00034385781914466826, "loss": 0.0667, "theoretical_loss": 3.4349434531262073, "tokens_seen": 2176581632 }, { "epoch": 0.32, "learning_rate": 0.00034381770039316377, "loss": 0.068, "theoretical_loss": 3.4349266332043245, "tokens_seen": 2176712704 }, { "epoch": 0.32, "learning_rate": 0.00034377758164165934, "loss": 0.0683, "theoretical_loss": 3.4349098145788033, "tokens_seen": 2176843776 }, { "epoch": 0.32, "learning_rate": 0.00034373746289015486, "loss": 0.0657, "theoretical_loss": 3.4348929972494653, "tokens_seen": 2176974848 }, { "epoch": 0.32, "learning_rate": 0.00034369734413865037, "loss": 0.0711, "theoretical_loss": 3.434876181216133, "tokens_seen": 2177105920 }, { "epoch": 0.32, "learning_rate": 0.00034365722538714594, "loss": 0.069, "theoretical_loss": 3.4348593664786273, "tokens_seen": 2177236992 }, { "epoch": 0.32, "learning_rate": 0.0003436171066356415, "loss": 0.0684, "theoretical_loss": 3.4348425530367717, "tokens_seen": 2177368064 }, { "epoch": 0.32, "objective/train/advantage_avg": 0.00032470570295117795, "objective/train/docs_used": 792987, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3615885972976685, "objective/train/original_loss": 1.3615885972976685, "objective/train/theoretical_loss": 3.4348257408903873, "objective/train/tokens_used": 548023776, "objective/train/value_avg": -0.007762908935546875, "objective/train/value_loss": 0.0002019459498114884, "objective/train/value_max": -3.24249267578125e-05, "objective/train/value_min": -0.330810546875, "objective/train/value_reward_corr": 0.7091150004898605, "objective/train/value_std": 0.01506805419921875, "objective/train/weight_avg": 1.000413417816162, "objective/train/weighted_lm_loss": 1.3620177507400513, "objective/train/weights_max": 1.2442163228988647, "objective/train/weights_min": 0.3694644272327423, "theoretical_loss": 3.4348257408903873, "tokens_seen": 2177499136 }, { "epoch": 0.32, "learning_rate": 0.0003435769878841371, "loss": 0.0677, "theoretical_loss": 3.4348257408903873, "tokens_seen": 2177499136 }, { "epoch": 0.32, "learning_rate": 0.0003435368691326326, "loss": 0.0671, "theoretical_loss": 3.4348089300392974, "tokens_seen": 2177630208 }, { "epoch": 0.32, "learning_rate": 0.00034349675038112816, "loss": 0.0665, "theoretical_loss": 3.4347921204833236, "tokens_seen": 2177761280 }, { "epoch": 0.32, "learning_rate": 0.0003434566316296237, "loss": 0.0658, "theoretical_loss": 3.434775312222288, "tokens_seen": 2177892352 }, { "epoch": 0.32, "learning_rate": 0.00034341651287811924, "loss": 0.0675, "theoretical_loss": 3.4347585052560134, "tokens_seen": 2178023424 }, { "epoch": 0.32, "learning_rate": 0.0003433763941266148, "loss": 0.0702, "theoretical_loss": 3.434741699584322, "tokens_seen": 2178154496 }, { "epoch": 0.32, "learning_rate": 0.0003433362753751103, "loss": 0.0627, "theoretical_loss": 3.434724895207036, "tokens_seen": 2178285568 }, { "epoch": 0.32, "learning_rate": 0.00034329615662360584, "loss": 0.0642, "theoretical_loss": 3.434708092123978, "tokens_seen": 2178416640 }, { "epoch": 0.32, "learning_rate": 0.0003432560378721014, "loss": 0.0654, "theoretical_loss": 3.4346912903349702, "tokens_seen": 2178547712 }, { "epoch": 0.32, "learning_rate": 0.000343215919120597, "loss": 0.0639, "theoretical_loss": 3.434674489839836, "tokens_seen": 2178678784 }, { "epoch": 0.32, "learning_rate": 0.00034317580036909254, "loss": 0.068, "theoretical_loss": 3.4346576906383968, "tokens_seen": 2178809856 }, { "epoch": 0.32, "learning_rate": 0.00034313568161758806, "loss": 0.0671, "theoretical_loss": 3.4346408927304757, "tokens_seen": 2178940928 }, { "epoch": 0.32, "learning_rate": 0.0003430955628660836, "loss": 0.0657, "theoretical_loss": 3.434624096115895, "tokens_seen": 2179072000 }, { "epoch": 0.32, "learning_rate": 0.0003430554441145792, "loss": 0.0635, "theoretical_loss": 3.434607300794478, "tokens_seen": 2179203072 }, { "epoch": 0.32, "learning_rate": 0.0003430153253630747, "loss": 0.0694, "theoretical_loss": 3.4345905067660465, "tokens_seen": 2179334144 }, { "epoch": 0.32, "learning_rate": 0.0003429752066115703, "loss": 0.0685, "theoretical_loss": 3.434573714030424, "tokens_seen": 2179465216 }, { "epoch": 0.32, "learning_rate": 0.0003429350878600658, "loss": 0.0676, "theoretical_loss": 3.434556922587433, "tokens_seen": 2179596288 }, { "epoch": 0.32, "learning_rate": 0.0003428949691085613, "loss": 0.0672, "theoretical_loss": 3.4345401324368954, "tokens_seen": 2179727360 }, { "epoch": 0.32, "learning_rate": 0.00034285485035705687, "loss": 0.0687, "theoretical_loss": 3.4345233435786353, "tokens_seen": 2179858432 }, { "epoch": 0.32, "learning_rate": 0.00034281473160555244, "loss": 0.0695, "theoretical_loss": 3.4345065560124746, "tokens_seen": 2179989504 }, { "epoch": 0.32, "learning_rate": 0.000342774612854048, "loss": 0.0652, "theoretical_loss": 3.434489769738237, "tokens_seen": 2180120576 }, { "epoch": 0.32, "learning_rate": 0.0003427344941025435, "loss": 0.0661, "theoretical_loss": 3.434472984755745, "tokens_seen": 2180251648 }, { "epoch": 0.32, "learning_rate": 0.0003426943753510391, "loss": 0.0664, "theoretical_loss": 3.4344562010648207, "tokens_seen": 2180382720 }, { "epoch": 0.32, "learning_rate": 0.00034265425659953466, "loss": 0.0704, "theoretical_loss": 3.434439418665289, "tokens_seen": 2180513792 }, { "epoch": 0.32, "learning_rate": 0.0003426141378480302, "loss": 0.065, "theoretical_loss": 3.4344226375569713, "tokens_seen": 2180644864 }, { "epoch": 0.32, "objective/train/advantage_avg": 0.0005774701712653041, "objective/train/docs_used": 794081, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3003865480422974, "objective/train/original_loss": 1.3003864288330078, "objective/train/theoretical_loss": 3.434405857739691, "objective/train/tokens_used": 551300576, "objective/train/value_avg": -0.00445556640625, "objective/train/value_loss": 0.00013969487918075174, "objective/train/value_max": -3.170967102050781e-05, "objective/train/value_min": -0.2208251953125, "objective/train/value_reward_corr": 0.5209432723834129, "objective/train/value_std": 0.0078887939453125, "objective/train/weight_avg": 1.0006356239318848, "objective/train/weighted_lm_loss": 1.3011525869369507, "objective/train/weights_max": 1.084959864616394, "objective/train/weights_min": 0.3703887462615967, "theoretical_loss": 3.434405857739691, "tokens_seen": 2180775936 }, { "epoch": 0.32, "learning_rate": 0.00034257401909652574, "loss": 0.0651, "theoretical_loss": 3.434405857739691, "tokens_seen": 2180775936 }, { "epoch": 0.32, "learning_rate": 0.00034253390034502126, "loss": 0.0662, "theoretical_loss": 3.434389079213272, "tokens_seen": 2180907008 }, { "epoch": 0.32, "learning_rate": 0.00034249378159351677, "loss": 0.0661, "theoretical_loss": 3.434372301977536, "tokens_seen": 2181038080 }, { "epoch": 0.32, "learning_rate": 0.00034245366284201234, "loss": 0.0664, "theoretical_loss": 3.4343555260323075, "tokens_seen": 2181169152 }, { "epoch": 0.32, "learning_rate": 0.0003424135440905079, "loss": 0.0657, "theoretical_loss": 3.4343387513774095, "tokens_seen": 2181300224 }, { "epoch": 0.32, "learning_rate": 0.0003423734253390035, "loss": 0.0641, "theoretical_loss": 3.4343219780126644, "tokens_seen": 2181431296 }, { "epoch": 0.32, "learning_rate": 0.000342333306587499, "loss": 0.0661, "theoretical_loss": 3.434305205937896, "tokens_seen": 2181562368 }, { "epoch": 0.32, "learning_rate": 0.00034229318783599456, "loss": 0.0707, "theoretical_loss": 3.4342884351529275, "tokens_seen": 2181693440 }, { "epoch": 0.32, "learning_rate": 0.0003422530690844901, "loss": 0.0695, "theoretical_loss": 3.4342716656575822, "tokens_seen": 2181824512 }, { "epoch": 0.32, "learning_rate": 0.00034221295033298564, "loss": 0.0677, "theoretical_loss": 3.4342548974516838, "tokens_seen": 2181955584 }, { "epoch": 0.32, "learning_rate": 0.0003421728315814812, "loss": 0.0683, "theoretical_loss": 3.4342381305350553, "tokens_seen": 2182086656 }, { "epoch": 0.32, "learning_rate": 0.0003421327128299767, "loss": 0.067, "theoretical_loss": 3.43422136490752, "tokens_seen": 2182217728 }, { "epoch": 0.32, "learning_rate": 0.00034209259407847224, "loss": 0.067, "theoretical_loss": 3.434204600568902, "tokens_seen": 2182348800 }, { "epoch": 0.32, "learning_rate": 0.0003420524753269678, "loss": 0.067, "theoretical_loss": 3.4341878375190245, "tokens_seen": 2182479872 }, { "epoch": 0.32, "learning_rate": 0.0003420123565754634, "loss": 0.0726, "theoretical_loss": 3.434171075757711, "tokens_seen": 2182610944 }, { "epoch": 0.32, "learning_rate": 0.00034197223782395894, "loss": 0.067, "theoretical_loss": 3.434154315284785, "tokens_seen": 2182742016 }, { "epoch": 0.32, "learning_rate": 0.00034193211907245446, "loss": 0.0637, "theoretical_loss": 3.4341375561000698, "tokens_seen": 2182873088 }, { "epoch": 0.32, "learning_rate": 0.00034189200032095, "loss": 0.0674, "theoretical_loss": 3.43412079820339, "tokens_seen": 2183004160 }, { "epoch": 0.32, "learning_rate": 0.0003418518815694456, "loss": 0.0666, "theoretical_loss": 3.4341040415945683, "tokens_seen": 2183135232 }, { "epoch": 0.32, "learning_rate": 0.0003418117628179411, "loss": 0.0669, "theoretical_loss": 3.4340872862734293, "tokens_seen": 2183266304 }, { "epoch": 0.32, "learning_rate": 0.0003417716440664367, "loss": 0.0661, "theoretical_loss": 3.434070532239796, "tokens_seen": 2183397376 }, { "epoch": 0.32, "learning_rate": 0.0003417315253149322, "loss": 0.0663, "theoretical_loss": 3.4340537794934924, "tokens_seen": 2183528448 }, { "epoch": 0.32, "learning_rate": 0.0003416914065634277, "loss": 0.0685, "theoretical_loss": 3.4340370280343424, "tokens_seen": 2183659520 }, { "epoch": 0.32, "learning_rate": 0.0003416512878119233, "loss": 0.0702, "theoretical_loss": 3.43402027786217, "tokens_seen": 2183790592 }, { "epoch": 0.32, "learning_rate": 0.00034161116906041884, "loss": 0.067, "theoretical_loss": 3.434003528976799, "tokens_seen": 2183921664 }, { "epoch": 0.32, "objective/train/advantage_avg": -0.00014973235374782234, "objective/train/docs_used": 795264, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.49132239818573, "objective/train/original_loss": 1.4913221597671509, "objective/train/theoretical_loss": 3.433986781378053, "objective/train/tokens_used": 554577376, "objective/train/value_avg": -0.00632476806640625, "objective/train/value_loss": 0.00027962011517956853, "objective/train/value_max": -5.066394805908203e-05, "objective/train/value_min": -0.264404296875, "objective/train/value_reward_corr": 0.6272315256075904, "objective/train/value_std": 0.01256561279296875, "objective/train/weight_avg": 0.9999637007713318, "objective/train/weighted_lm_loss": 1.4903829097747803, "objective/train/weights_max": 1.1656666994094849, "objective/train/weights_min": 0.2344958484172821, "theoretical_loss": 3.433986781378053, "tokens_seen": 2184052736 }, { "epoch": 0.32, "learning_rate": 0.0003415710503089144, "loss": 0.0691, "theoretical_loss": 3.433986781378053, "tokens_seen": 2184052736 }, { "epoch": 0.32, "learning_rate": 0.0003415309315574099, "loss": 0.0631, "theoretical_loss": 3.433970035065756, "tokens_seen": 2184183808 }, { "epoch": 0.32, "learning_rate": 0.0003414908128059055, "loss": 0.0685, "theoretical_loss": 3.4339532900397325, "tokens_seen": 2184314880 }, { "epoch": 0.32, "learning_rate": 0.00034145069405440106, "loss": 0.0688, "theoretical_loss": 3.4339365462998064, "tokens_seen": 2184445952 }, { "epoch": 0.32, "learning_rate": 0.0003414105753028966, "loss": 0.0671, "theoretical_loss": 3.433919803845801, "tokens_seen": 2184577024 }, { "epoch": 0.32, "learning_rate": 0.00034137045655139214, "loss": 0.0663, "theoretical_loss": 3.4339030626775413, "tokens_seen": 2184708096 }, { "epoch": 0.32, "learning_rate": 0.00034133033779988766, "loss": 0.0668, "theoretical_loss": 3.4338863227948515, "tokens_seen": 2184839168 }, { "epoch": 0.32, "learning_rate": 0.0003412902190483832, "loss": 0.0722, "theoretical_loss": 3.433869584197555, "tokens_seen": 2184970240 }, { "epoch": 0.32, "learning_rate": 0.00034125010029687874, "loss": 0.07, "theoretical_loss": 3.4338528468854763, "tokens_seen": 2185101312 }, { "epoch": 0.32, "learning_rate": 0.0003412099815453743, "loss": 0.0687, "theoretical_loss": 3.43383611085844, "tokens_seen": 2185232384 }, { "epoch": 0.32, "learning_rate": 0.0003411698627938699, "loss": 0.066, "theoretical_loss": 3.43381937611627, "tokens_seen": 2185363456 }, { "epoch": 0.32, "learning_rate": 0.0003411297440423654, "loss": 0.0702, "theoretical_loss": 3.43380264265879, "tokens_seen": 2185494528 }, { "epoch": 0.32, "learning_rate": 0.00034108962529086096, "loss": 0.0698, "theoretical_loss": 3.4337859104858257, "tokens_seen": 2185625600 }, { "epoch": 0.32, "learning_rate": 0.00034104950653935653, "loss": 0.0703, "theoretical_loss": 3.433769179597201, "tokens_seen": 2185756672 }, { "epoch": 0.32, "learning_rate": 0.00034100938778785204, "loss": 0.0709, "theoretical_loss": 3.4337524499927397, "tokens_seen": 2185887744 }, { "epoch": 0.32, "learning_rate": 0.0003409692690363476, "loss": 0.0691, "theoretical_loss": 3.433735721672267, "tokens_seen": 2186018816 }, { "epoch": 0.32, "learning_rate": 0.0003409291502848432, "loss": 0.0658, "theoretical_loss": 3.4337189946356066, "tokens_seen": 2186149888 }, { "epoch": 0.33, "learning_rate": 0.0003408890315333387, "loss": 0.0693, "theoretical_loss": 3.4337022688825836, "tokens_seen": 2186280960 }, { "epoch": 0.33, "learning_rate": 0.0003408489127818342, "loss": 0.0679, "theoretical_loss": 3.4336855444130223, "tokens_seen": 2186412032 }, { "epoch": 0.33, "learning_rate": 0.0003408087940303298, "loss": 0.0704, "theoretical_loss": 3.4336688212267474, "tokens_seen": 2186543104 }, { "epoch": 0.33, "learning_rate": 0.00034076867527882534, "loss": 0.0697, "theoretical_loss": 3.4336520993235835, "tokens_seen": 2186674176 }, { "epoch": 0.33, "learning_rate": 0.00034072855652732086, "loss": 0.0659, "theoretical_loss": 3.4336353787033556, "tokens_seen": 2186805248 }, { "epoch": 0.33, "learning_rate": 0.0003406884377758164, "loss": 0.069, "theoretical_loss": 3.4336186593658873, "tokens_seen": 2186936320 }, { "epoch": 0.33, "learning_rate": 0.000340648319024312, "loss": 0.0662, "theoretical_loss": 3.4336019413110046, "tokens_seen": 2187067392 }, { "epoch": 0.33, "learning_rate": 0.0003406082002728075, "loss": 0.0672, "theoretical_loss": 3.433585224538531, "tokens_seen": 2187198464 }, { "epoch": 0.33, "objective/train/advantage_avg": -2.046007284661755e-05, "objective/train/docs_used": 796380, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2042121887207031, "objective/train/original_loss": 1.2042121887207031, "objective/train/theoretical_loss": 3.4335685090482926, "objective/train/tokens_used": 557854176, "objective/train/value_avg": -0.004138946533203125, "objective/train/value_loss": 0.00013944355305284262, "objective/train/value_max": -1.9252300262451172e-05, "objective/train/value_min": -0.204345703125, "objective/train/value_reward_corr": 0.5490653473156977, "objective/train/value_std": 0.006809234619140625, "objective/train/weight_avg": 1.0000391006469727, "objective/train/weighted_lm_loss": 1.2043805122375488, "objective/train/weights_max": 1.1198102235794067, "objective/train/weights_min": 0.3761727213859558, "theoretical_loss": 3.4335685090482926, "tokens_seen": 2187329536 }, { "epoch": 0.33, "learning_rate": 0.0003405680815213031, "loss": 0.0656, "theoretical_loss": 3.4335685090482926, "tokens_seen": 2187329536 }, { "epoch": 0.33, "learning_rate": 0.00034052796276979865, "loss": 0.0679, "theoretical_loss": 3.4335517948401133, "tokens_seen": 2187460608 }, { "epoch": 0.33, "learning_rate": 0.00034048784401829416, "loss": 0.0699, "theoretical_loss": 3.4335350819138184, "tokens_seen": 2187591680 }, { "epoch": 0.33, "learning_rate": 0.0003404477252667897, "loss": 0.0656, "theoretical_loss": 3.4335183702692325, "tokens_seen": 2187722752 }, { "epoch": 0.33, "learning_rate": 0.00034040760651528524, "loss": 0.0694, "theoretical_loss": 3.433501659906181, "tokens_seen": 2187853824 }, { "epoch": 0.33, "learning_rate": 0.0003403674877637808, "loss": 0.0633, "theoretical_loss": 3.433484950824488, "tokens_seen": 2187984896 }, { "epoch": 0.33, "learning_rate": 0.0003403273690122763, "loss": 0.0679, "theoretical_loss": 3.433468243023979, "tokens_seen": 2188115968 }, { "epoch": 0.33, "learning_rate": 0.0003402872502607719, "loss": 0.0665, "theoretical_loss": 3.4334515365044798, "tokens_seen": 2188247040 }, { "epoch": 0.33, "learning_rate": 0.00034024713150926746, "loss": 0.0669, "theoretical_loss": 3.433434831265814, "tokens_seen": 2188378112 }, { "epoch": 0.33, "learning_rate": 0.000340207012757763, "loss": 0.063, "theoretical_loss": 3.4334181273078075, "tokens_seen": 2188509184 }, { "epoch": 0.33, "learning_rate": 0.00034016689400625854, "loss": 0.0685, "theoretical_loss": 3.4334014246302855, "tokens_seen": 2188640256 }, { "epoch": 0.33, "learning_rate": 0.0003401267752547541, "loss": 0.0678, "theoretical_loss": 3.4333847232330728, "tokens_seen": 2188771328 }, { "epoch": 0.33, "learning_rate": 0.0003400866565032496, "loss": 0.0665, "theoretical_loss": 3.4333680231159946, "tokens_seen": 2188902400 }, { "epoch": 0.33, "learning_rate": 0.00034004653775174514, "loss": 0.069, "theoretical_loss": 3.4333513242788767, "tokens_seen": 2189033472 }, { "epoch": 0.33, "learning_rate": 0.0003400064190002407, "loss": 0.0695, "theoretical_loss": 3.433334626721544, "tokens_seen": 2189164544 }, { "epoch": 0.33, "learning_rate": 0.0003399663002487363, "loss": 0.0691, "theoretical_loss": 3.4333179304438217, "tokens_seen": 2189295616 }, { "epoch": 0.33, "learning_rate": 0.0003399261814972318, "loss": 0.0703, "theoretical_loss": 3.4333012354455352, "tokens_seen": 2189426688 }, { "epoch": 0.33, "learning_rate": 0.00033988606274572736, "loss": 0.0656, "theoretical_loss": 3.43328454172651, "tokens_seen": 2189557760 }, { "epoch": 0.33, "learning_rate": 0.00033984594399422293, "loss": 0.0712, "theoretical_loss": 3.433267849286571, "tokens_seen": 2189688832 }, { "epoch": 0.33, "learning_rate": 0.00033980582524271844, "loss": 0.0643, "theoretical_loss": 3.4332511581255445, "tokens_seen": 2189819904 }, { "epoch": 0.33, "learning_rate": 0.000339765706491214, "loss": 0.07, "theoretical_loss": 3.4332344682432554, "tokens_seen": 2189950976 }, { "epoch": 0.33, "learning_rate": 0.0003397255877397096, "loss": 0.066, "theoretical_loss": 3.433217779639529, "tokens_seen": 2190082048 }, { "epoch": 0.33, "learning_rate": 0.0003396854689882051, "loss": 0.069, "theoretical_loss": 3.4332010923141913, "tokens_seen": 2190213120 }, { "epoch": 0.33, "learning_rate": 0.0003396453502367006, "loss": 0.067, "theoretical_loss": 3.4331844062670682, "tokens_seen": 2190344192 }, { "epoch": 0.33, "learning_rate": 0.0003396052314851962, "loss": 0.0664, "theoretical_loss": 3.4331677214979845, "tokens_seen": 2190475264 }, { "epoch": 0.33, "objective/train/advantage_avg": 3.8146703445818275e-05, "objective/train/docs_used": 797544, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.400113582611084, "objective/train/original_loss": 1.400113582611084, "objective/train/theoretical_loss": 3.4331510380067662, "objective/train/tokens_used": 561130976, "objective/train/value_avg": -0.004299163818359375, "objective/train/value_loss": 0.00010871435370063409, "objective/train/value_max": -2.0444393157958984e-05, "objective/train/value_min": -0.220458984375, "objective/train/value_reward_corr": 0.6035125012566668, "objective/train/value_std": 0.0074920654296875, "objective/train/weight_avg": 1.000090479850769, "objective/train/weighted_lm_loss": 1.4005143642425537, "objective/train/weights_max": 1.1425936222076416, "objective/train/weights_min": 0.6068740487098694, "theoretical_loss": 3.4331510380067662, "tokens_seen": 2190606336 }, { "epoch": 0.33, "learning_rate": 0.00033956511273369175, "loss": 0.0674, "theoretical_loss": 3.4331510380067662, "tokens_seen": 2190606336 }, { "epoch": 0.33, "learning_rate": 0.00033952499398218726, "loss": 0.0683, "theoretical_loss": 3.4331343557932392, "tokens_seen": 2190737408 }, { "epoch": 0.33, "learning_rate": 0.00033948487523068283, "loss": 0.0628, "theoretical_loss": 3.433117674857229, "tokens_seen": 2190868480 }, { "epoch": 0.33, "learning_rate": 0.0003394447564791784, "loss": 0.0678, "theoretical_loss": 3.433100995198561, "tokens_seen": 2190999552 }, { "epoch": 0.33, "learning_rate": 0.0003394046377276739, "loss": 0.0703, "theoretical_loss": 3.4330843168170615, "tokens_seen": 2191130624 }, { "epoch": 0.33, "learning_rate": 0.0003393645189761695, "loss": 0.0665, "theoretical_loss": 3.433067639712556, "tokens_seen": 2191261696 }, { "epoch": 0.33, "learning_rate": 0.00033932440022466505, "loss": 0.0691, "theoretical_loss": 3.433050963884871, "tokens_seen": 2191392768 }, { "epoch": 0.33, "learning_rate": 0.00033928428147316056, "loss": 0.0692, "theoretical_loss": 3.4330342893338313, "tokens_seen": 2191523840 }, { "epoch": 0.33, "learning_rate": 0.0003392441627216561, "loss": 0.065, "theoretical_loss": 3.4330176160592636, "tokens_seen": 2191654912 }, { "epoch": 0.33, "learning_rate": 0.00033920404397015164, "loss": 0.0665, "theoretical_loss": 3.4330009440609937, "tokens_seen": 2191785984 }, { "epoch": 0.33, "learning_rate": 0.0003391639252186472, "loss": 0.0664, "theoretical_loss": 3.432984273338848, "tokens_seen": 2191917056 }, { "epoch": 0.33, "learning_rate": 0.0003391238064671427, "loss": 0.0667, "theoretical_loss": 3.432967603892651, "tokens_seen": 2192048128 }, { "epoch": 0.33, "learning_rate": 0.0003390836877156383, "loss": 0.0669, "theoretical_loss": 3.4329509357222303, "tokens_seen": 2192179200 }, { "epoch": 0.33, "learning_rate": 0.00033904356896413386, "loss": 0.0672, "theoretical_loss": 3.432934268827412, "tokens_seen": 2192310272 }, { "epoch": 0.33, "learning_rate": 0.00033900345021262943, "loss": 0.0665, "theoretical_loss": 3.432917603208021, "tokens_seen": 2192441344 }, { "epoch": 0.33, "learning_rate": 0.00033896333146112495, "loss": 0.0679, "theoretical_loss": 3.4329009388638845, "tokens_seen": 2192572416 }, { "epoch": 0.33, "learning_rate": 0.0003389232127096205, "loss": 0.0654, "theoretical_loss": 3.4328842757948284, "tokens_seen": 2192703488 }, { "epoch": 0.33, "learning_rate": 0.00033888309395811603, "loss": 0.0678, "theoretical_loss": 3.4328676140006786, "tokens_seen": 2192834560 }, { "epoch": 0.33, "learning_rate": 0.00033884297520661154, "loss": 0.0701, "theoretical_loss": 3.432850953481262, "tokens_seen": 2192965632 }, { "epoch": 0.33, "learning_rate": 0.0003388028564551071, "loss": 0.0669, "theoretical_loss": 3.4328342942364043, "tokens_seen": 2193096704 }, { "epoch": 0.33, "learning_rate": 0.0003387627377036027, "loss": 0.0704, "theoretical_loss": 3.4328176362659324, "tokens_seen": 2193227776 }, { "epoch": 0.33, "learning_rate": 0.0003387226189520982, "loss": 0.0698, "theoretical_loss": 3.432800979569672, "tokens_seen": 2193358848 }, { "epoch": 0.33, "learning_rate": 0.00033868250020059376, "loss": 0.0684, "theoretical_loss": 3.43278432414745, "tokens_seen": 2193489920 }, { "epoch": 0.33, "learning_rate": 0.00033864238144908933, "loss": 0.0676, "theoretical_loss": 3.4327676699990928, "tokens_seen": 2193620992 }, { "epoch": 0.33, "learning_rate": 0.0003386022626975849, "loss": 0.0672, "theoretical_loss": 3.4327510171244264, "tokens_seen": 2193752064 }, { "epoch": 0.33, "objective/train/advantage_avg": 1.115020495490171e-05, "objective/train/docs_used": 798764, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4097254276275635, "objective/train/original_loss": 1.4097254276275635, "objective/train/theoretical_loss": 3.432734365523278, "objective/train/tokens_used": 564407776, "objective/train/value_avg": -0.00864410400390625, "objective/train/value_loss": 0.0004849732213187963, "objective/train/value_max": -4.7206878662109375e-05, "objective/train/value_min": -0.64990234375, "objective/train/value_reward_corr": 0.6312352902531914, "objective/train/value_std": 0.01776123046875, "objective/train/weight_avg": 1.0002102851867676, "objective/train/weighted_lm_loss": 1.4093152284622192, "objective/train/weights_max": 1.3695472478866577, "objective/train/weights_min": 0.22523219883441925, "theoretical_loss": 3.432734365523278, "tokens_seen": 2193883136 }, { "epoch": 0.33, "learning_rate": 0.0003385621439460804, "loss": 0.0688, "theoretical_loss": 3.432734365523278, "tokens_seen": 2193883136 }, { "epoch": 0.33, "learning_rate": 0.000338522025194576, "loss": 0.0683, "theoretical_loss": 3.4327177151954738, "tokens_seen": 2194014208 }, { "epoch": 0.33, "learning_rate": 0.0003384819064430715, "loss": 0.0705, "theoretical_loss": 3.4327010661408397, "tokens_seen": 2194145280 }, { "epoch": 0.33, "learning_rate": 0.000338441787691567, "loss": 0.067, "theoretical_loss": 3.4326844183592033, "tokens_seen": 2194276352 }, { "epoch": 0.33, "learning_rate": 0.0003384016689400626, "loss": 0.0652, "theoretical_loss": 3.432667771850391, "tokens_seen": 2194407424 }, { "epoch": 0.33, "learning_rate": 0.00033836155018855815, "loss": 0.0635, "theoretical_loss": 3.432651126614229, "tokens_seen": 2194538496 }, { "epoch": 0.33, "learning_rate": 0.00033832143143705366, "loss": 0.0726, "theoretical_loss": 3.4326344826505446, "tokens_seen": 2194669568 }, { "epoch": 0.33, "learning_rate": 0.00033828131268554923, "loss": 0.0724, "theoretical_loss": 3.4326178399591645, "tokens_seen": 2194800640 }, { "epoch": 0.33, "learning_rate": 0.0003382411939340448, "loss": 0.0671, "theoretical_loss": 3.4326011985399147, "tokens_seen": 2194931712 }, { "epoch": 0.33, "learning_rate": 0.00033820107518254037, "loss": 0.069, "theoretical_loss": 3.4325845583926227, "tokens_seen": 2195062784 }, { "epoch": 0.33, "learning_rate": 0.0003381609564310359, "loss": 0.0671, "theoretical_loss": 3.4325679195171155, "tokens_seen": 2195193856 }, { "epoch": 0.33, "learning_rate": 0.00033812083767953145, "loss": 0.0681, "theoretical_loss": 3.432551281913219, "tokens_seen": 2195324928 }, { "epoch": 0.33, "learning_rate": 0.00033808071892802696, "loss": 0.0692, "theoretical_loss": 3.4325346455807613, "tokens_seen": 2195456000 }, { "epoch": 0.33, "learning_rate": 0.0003380406001765225, "loss": 0.0665, "theoretical_loss": 3.4325180105195687, "tokens_seen": 2195587072 }, { "epoch": 0.33, "learning_rate": 0.00033800048142501804, "loss": 0.0661, "theoretical_loss": 3.4325013767294683, "tokens_seen": 2195718144 }, { "epoch": 0.33, "learning_rate": 0.0003379603626735136, "loss": 0.0654, "theoretical_loss": 3.432484744210287, "tokens_seen": 2195849216 }, { "epoch": 0.33, "learning_rate": 0.00033792024392200913, "loss": 0.0676, "theoretical_loss": 3.4324681129618515, "tokens_seen": 2195980288 }, { "epoch": 0.33, "learning_rate": 0.0003378801251705047, "loss": 0.0705, "theoretical_loss": 3.43245148298399, "tokens_seen": 2196111360 }, { "epoch": 0.33, "learning_rate": 0.00033784000641900026, "loss": 0.0664, "theoretical_loss": 3.4324348542765284, "tokens_seen": 2196242432 }, { "epoch": 0.33, "learning_rate": 0.00033779988766749583, "loss": 0.0699, "theoretical_loss": 3.4324182268392947, "tokens_seen": 2196373504 }, { "epoch": 0.33, "learning_rate": 0.00033775976891599135, "loss": 0.0693, "theoretical_loss": 3.432401600672115, "tokens_seen": 2196504576 }, { "epoch": 0.33, "learning_rate": 0.0003377196501644869, "loss": 0.0686, "theoretical_loss": 3.432384975774818, "tokens_seen": 2196635648 }, { "epoch": 0.33, "learning_rate": 0.00033767953141298243, "loss": 0.0682, "theoretical_loss": 3.43236835214723, "tokens_seen": 2196766720 }, { "epoch": 0.33, "learning_rate": 0.00033763941266147794, "loss": 0.0672, "theoretical_loss": 3.4323517297891786, "tokens_seen": 2196897792 }, { "epoch": 0.33, "learning_rate": 0.0003375992939099735, "loss": 0.067, "theoretical_loss": 3.4323351087004905, "tokens_seen": 2197028864 }, { "epoch": 0.33, "objective/train/advantage_avg": 0.0017974297516047955, "objective/train/docs_used": 799965, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3409456014633179, "objective/train/original_loss": 1.3409457206726074, "objective/train/theoretical_loss": 3.432318488880994, "objective/train/tokens_used": 567684576, "objective/train/value_avg": -0.00872039794921875, "objective/train/value_loss": 0.00022452347911894321, "objective/train/value_max": -6.866455078125e-05, "objective/train/value_min": -0.490966796875, "objective/train/value_reward_corr": 0.661596567819829, "objective/train/value_std": 0.0160369873046875, "objective/train/weight_avg": 1.001906156539917, "objective/train/weighted_lm_loss": 1.3425172567367554, "objective/train/weights_max": 1.4717830419540405, "objective/train/weights_min": 0.38960304856300354, "theoretical_loss": 3.432318488880994, "tokens_seen": 2197159936 }, { "epoch": 0.33, "learning_rate": 0.0003375591751584691, "loss": 0.0664, "theoretical_loss": 3.432318488880994, "tokens_seen": 2197159936 }, { "epoch": 0.33, "learning_rate": 0.0003375190564069646, "loss": 0.0693, "theoretical_loss": 3.4323018703305155, "tokens_seen": 2197291008 }, { "epoch": 0.33, "learning_rate": 0.00033747893765546016, "loss": 0.0682, "theoretical_loss": 3.432285253048883, "tokens_seen": 2197422080 }, { "epoch": 0.33, "learning_rate": 0.00033743881890395573, "loss": 0.0644, "theoretical_loss": 3.432268637035924, "tokens_seen": 2197553152 }, { "epoch": 0.33, "learning_rate": 0.0003373987001524513, "loss": 0.0683, "theoretical_loss": 3.4322520222914656, "tokens_seen": 2197684224 }, { "epoch": 0.33, "learning_rate": 0.0003373585814009468, "loss": 0.0675, "theoretical_loss": 3.4322354088153357, "tokens_seen": 2197815296 }, { "epoch": 0.33, "learning_rate": 0.0003373184626494424, "loss": 0.067, "theoretical_loss": 3.4322187966073616, "tokens_seen": 2197946368 }, { "epoch": 0.33, "learning_rate": 0.0003372783438979379, "loss": 0.0655, "theoretical_loss": 3.432202185667371, "tokens_seen": 2198077440 }, { "epoch": 0.33, "learning_rate": 0.0003372382251464334, "loss": 0.069, "theoretical_loss": 3.4321855759951916, "tokens_seen": 2198208512 }, { "epoch": 0.33, "learning_rate": 0.000337198106394929, "loss": 0.0655, "theoretical_loss": 3.432168967590651, "tokens_seen": 2198339584 }, { "epoch": 0.33, "learning_rate": 0.00033715798764342455, "loss": 0.062, "theoretical_loss": 3.4321523604535766, "tokens_seen": 2198470656 }, { "epoch": 0.33, "learning_rate": 0.00033711786889192006, "loss": 0.0671, "theoretical_loss": 3.432135754583796, "tokens_seen": 2198601728 }, { "epoch": 0.33, "learning_rate": 0.00033707775014041563, "loss": 0.0674, "theoretical_loss": 3.432119149981138, "tokens_seen": 2198732800 }, { "epoch": 0.33, "learning_rate": 0.0003370376313889112, "loss": 0.0657, "theoretical_loss": 3.432102546645429, "tokens_seen": 2198863872 }, { "epoch": 0.33, "learning_rate": 0.00033699751263740677, "loss": 0.0667, "theoretical_loss": 3.4320859445764977, "tokens_seen": 2198994944 }, { "epoch": 0.33, "learning_rate": 0.0003369573938859023, "loss": 0.0696, "theoretical_loss": 3.432069343774172, "tokens_seen": 2199126016 }, { "epoch": 0.33, "learning_rate": 0.00033691727513439785, "loss": 0.0662, "theoretical_loss": 3.4320527442382796, "tokens_seen": 2199257088 }, { "epoch": 0.33, "learning_rate": 0.00033687715638289336, "loss": 0.0676, "theoretical_loss": 3.432036145968648, "tokens_seen": 2199388160 }, { "epoch": 0.33, "learning_rate": 0.0003368370376313889, "loss": 0.0649, "theoretical_loss": 3.4320195489651053, "tokens_seen": 2199519232 }, { "epoch": 0.33, "learning_rate": 0.00033679691887988445, "loss": 0.0664, "theoretical_loss": 3.43200295322748, "tokens_seen": 2199650304 }, { "epoch": 0.33, "learning_rate": 0.00033675680012838, "loss": 0.0632, "theoretical_loss": 3.4319863587555997, "tokens_seen": 2199781376 }, { "epoch": 0.33, "learning_rate": 0.00033671668137687553, "loss": 0.0686, "theoretical_loss": 3.431969765549293, "tokens_seen": 2199912448 }, { "epoch": 0.33, "learning_rate": 0.0003366765626253711, "loss": 0.0666, "theoretical_loss": 3.431953173608387, "tokens_seen": 2200043520 }, { "epoch": 0.33, "learning_rate": 0.00033663644387386667, "loss": 0.0675, "theoretical_loss": 3.43193658293271, "tokens_seen": 2200174592 }, { "epoch": 0.33, "learning_rate": 0.00033659632512236223, "loss": 0.0644, "theoretical_loss": 3.431919993522091, "tokens_seen": 2200305664 }, { "epoch": 0.33, "objective/train/advantage_avg": 0.0008899401291273534, "objective/train/docs_used": 801168, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3788948059082031, "objective/train/original_loss": 1.3788948059082031, "objective/train/theoretical_loss": 3.4319034053763575, "objective/train/tokens_used": 570961376, "objective/train/value_avg": -0.00714111328125, "objective/train/value_loss": 0.0001221873826580122, "objective/train/value_max": -6.455183029174805e-05, "objective/train/value_min": -0.359619140625, "objective/train/value_reward_corr": 0.7273420937733488, "objective/train/value_std": 0.0116729736328125, "objective/train/weight_avg": 1.0009499788284302, "objective/train/weighted_lm_loss": 1.3798168897628784, "objective/train/weights_max": 1.217027187347412, "objective/train/weights_min": 0.7176953554153442, "theoretical_loss": 3.4319034053763575, "tokens_seen": 2200436736 }, { "epoch": 0.33, "learning_rate": 0.00033655620637085775, "loss": 0.0702, "theoretical_loss": 3.4319034053763575, "tokens_seen": 2200436736 }, { "epoch": 0.33, "learning_rate": 0.0003365160876193533, "loss": 0.0671, "theoretical_loss": 3.431886818495338, "tokens_seen": 2200567808 }, { "epoch": 0.33, "learning_rate": 0.00033647596886784883, "loss": 0.071, "theoretical_loss": 3.431870232878861, "tokens_seen": 2200698880 }, { "epoch": 0.33, "learning_rate": 0.00033643585011634434, "loss": 0.0675, "theoretical_loss": 3.4318536485267543, "tokens_seen": 2200829952 }, { "epoch": 0.33, "learning_rate": 0.0003363957313648399, "loss": 0.0668, "theoretical_loss": 3.431837065438846, "tokens_seen": 2200961024 }, { "epoch": 0.33, "learning_rate": 0.0003363556126133355, "loss": 0.0631, "theoretical_loss": 3.431820483614965, "tokens_seen": 2201092096 }, { "epoch": 0.33, "learning_rate": 0.00033631549386183105, "loss": 0.0671, "theoretical_loss": 3.4318039030549397, "tokens_seen": 2201223168 }, { "epoch": 0.33, "learning_rate": 0.00033627537511032656, "loss": 0.0682, "theoretical_loss": 3.431787323758598, "tokens_seen": 2201354240 }, { "epoch": 0.33, "learning_rate": 0.00033623525635882213, "loss": 0.0682, "theoretical_loss": 3.431770745725769, "tokens_seen": 2201485312 }, { "epoch": 0.33, "learning_rate": 0.0003361951376073177, "loss": 0.0672, "theoretical_loss": 3.4317541689562807, "tokens_seen": 2201616384 }, { "epoch": 0.33, "learning_rate": 0.0003361550188558132, "loss": 0.0667, "theoretical_loss": 3.431737593449962, "tokens_seen": 2201747456 }, { "epoch": 0.33, "learning_rate": 0.0003361149001043088, "loss": 0.0689, "theoretical_loss": 3.431721019206641, "tokens_seen": 2201878528 }, { "epoch": 0.33, "learning_rate": 0.0003360747813528043, "loss": 0.0711, "theoretical_loss": 3.4317044462261466, "tokens_seen": 2202009600 }, { "epoch": 0.33, "learning_rate": 0.0003360346626012998, "loss": 0.0661, "theoretical_loss": 3.4316878745083077, "tokens_seen": 2202140672 }, { "epoch": 0.33, "learning_rate": 0.0003359945438497954, "loss": 0.0681, "theoretical_loss": 3.4316713040529523, "tokens_seen": 2202271744 }, { "epoch": 0.33, "learning_rate": 0.00033595442509829095, "loss": 0.0671, "theoretical_loss": 3.43165473485991, "tokens_seen": 2202402816 }, { "epoch": 0.33, "learning_rate": 0.0003359143063467865, "loss": 0.0665, "theoretical_loss": 3.4316381669290084, "tokens_seen": 2202533888 }, { "epoch": 0.33, "learning_rate": 0.00033587418759528203, "loss": 0.0673, "theoretical_loss": 3.4316216002600766, "tokens_seen": 2202664960 }, { "epoch": 0.34, "learning_rate": 0.0003358340688437776, "loss": 0.0682, "theoretical_loss": 3.431605034852944, "tokens_seen": 2202796032 }, { "epoch": 0.34, "learning_rate": 0.00033579395009227317, "loss": 0.0696, "theoretical_loss": 3.431588470707439, "tokens_seen": 2202927104 }, { "epoch": 0.34, "learning_rate": 0.0003357538313407687, "loss": 0.0736, "theoretical_loss": 3.43157190782339, "tokens_seen": 2203058176 }, { "epoch": 0.34, "learning_rate": 0.00033571371258926425, "loss": 0.069, "theoretical_loss": 3.4315553462006267, "tokens_seen": 2203189248 }, { "epoch": 0.34, "learning_rate": 0.00033567359383775976, "loss": 0.0696, "theoretical_loss": 3.431538785838978, "tokens_seen": 2203320320 }, { "epoch": 0.34, "learning_rate": 0.0003356334750862553, "loss": 0.0676, "theoretical_loss": 3.4315222267382723, "tokens_seen": 2203451392 }, { "epoch": 0.34, "learning_rate": 0.00033559335633475085, "loss": 0.0676, "theoretical_loss": 3.4315056688983385, "tokens_seen": 2203582464 }, { "epoch": 0.34, "objective/train/advantage_avg": 0.001016387715935707, "objective/train/docs_used": 802328, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4350961446762085, "objective/train/original_loss": 1.4350961446762085, "objective/train/theoretical_loss": 3.4314891123190066, "objective/train/tokens_used": 574238176, "objective/train/value_avg": -0.00827789306640625, "objective/train/value_loss": 0.00015710720617789775, "objective/train/value_max": -5.650520324707031e-05, "objective/train/value_min": -0.283935546875, "objective/train/value_reward_corr": 0.7774812712200353, "objective/train/value_std": 0.0160980224609375, "objective/train/weight_avg": 1.0010932683944702, "objective/train/weighted_lm_loss": 1.4377284049987793, "objective/train/weights_max": 1.201808214187622, "objective/train/weights_min": 0.6912817358970642, "theoretical_loss": 3.4314891123190066, "tokens_seen": 2203713536 }, { "epoch": 0.34, "learning_rate": 0.0003355532375832464, "loss": 0.069, "theoretical_loss": 3.4314891123190066, "tokens_seen": 2203713536 }, { "epoch": 0.34, "learning_rate": 0.000335513118831742, "loss": 0.068, "theoretical_loss": 3.4314725570001046, "tokens_seen": 2203844608 }, { "epoch": 0.34, "learning_rate": 0.0003354730000802375, "loss": 0.0676, "theoretical_loss": 3.431456002941462, "tokens_seen": 2203975680 }, { "epoch": 0.34, "learning_rate": 0.00033543288132873307, "loss": 0.0674, "theoretical_loss": 3.431439450142908, "tokens_seen": 2204106752 }, { "epoch": 0.34, "learning_rate": 0.00033539276257722863, "loss": 0.0656, "theoretical_loss": 3.4314228986042714, "tokens_seen": 2204237824 }, { "epoch": 0.34, "learning_rate": 0.00033535264382572415, "loss": 0.0694, "theoretical_loss": 3.4314063483253823, "tokens_seen": 2204368896 }, { "epoch": 0.34, "learning_rate": 0.0003353125250742197, "loss": 0.0633, "theoretical_loss": 3.4313897993060687, "tokens_seen": 2204499968 }, { "epoch": 0.34, "learning_rate": 0.00033527240632271523, "loss": 0.0687, "theoretical_loss": 3.431373251546161, "tokens_seen": 2204631040 }, { "epoch": 0.34, "learning_rate": 0.00033523228757121075, "loss": 0.0691, "theoretical_loss": 3.4313567050454874, "tokens_seen": 2204762112 }, { "epoch": 0.34, "learning_rate": 0.0003351921688197063, "loss": 0.0701, "theoretical_loss": 3.431340159803878, "tokens_seen": 2204893184 }, { "epoch": 0.34, "learning_rate": 0.0003351520500682019, "loss": 0.0725, "theoretical_loss": 3.431323615821162, "tokens_seen": 2205024256 }, { "epoch": 0.34, "learning_rate": 0.00033511193131669745, "loss": 0.0726, "theoretical_loss": 3.4313070730971686, "tokens_seen": 2205155328 }, { "epoch": 0.34, "learning_rate": 0.00033507181256519297, "loss": 0.0685, "theoretical_loss": 3.4312905316317277, "tokens_seen": 2205286400 }, { "epoch": 0.34, "learning_rate": 0.00033503169381368853, "loss": 0.0691, "theoretical_loss": 3.431273991424668, "tokens_seen": 2205417472 }, { "epoch": 0.34, "learning_rate": 0.0003349915750621841, "loss": 0.0675, "theoretical_loss": 3.4312574524758195, "tokens_seen": 2205548544 }, { "epoch": 0.34, "learning_rate": 0.0003349514563106796, "loss": 0.0694, "theoretical_loss": 3.431240914785012, "tokens_seen": 2205679616 }, { "epoch": 0.34, "learning_rate": 0.0003349113375591752, "loss": 0.0698, "theoretical_loss": 3.431224378352074, "tokens_seen": 2205810688 }, { "epoch": 0.34, "learning_rate": 0.0003348712188076707, "loss": 0.0707, "theoretical_loss": 3.431207843176836, "tokens_seen": 2205941760 }, { "epoch": 0.34, "learning_rate": 0.0003348311000561662, "loss": 0.0696, "theoretical_loss": 3.4311913092591277, "tokens_seen": 2206072832 }, { "epoch": 0.34, "learning_rate": 0.0003347909813046618, "loss": 0.0707, "theoretical_loss": 3.431174776598778, "tokens_seen": 2206203904 }, { "epoch": 0.34, "learning_rate": 0.00033475086255315735, "loss": 0.0695, "theoretical_loss": 3.431158245195617, "tokens_seen": 2206334976 }, { "epoch": 0.34, "learning_rate": 0.0003347107438016529, "loss": 0.068, "theoretical_loss": 3.431141715049475, "tokens_seen": 2206466048 }, { "epoch": 0.34, "learning_rate": 0.00033467062505014843, "loss": 0.0707, "theoretical_loss": 3.431125186160181, "tokens_seen": 2206597120 }, { "epoch": 0.34, "learning_rate": 0.000334630506298644, "loss": 0.0692, "theoretical_loss": 3.4311086585275645, "tokens_seen": 2206728192 }, { "epoch": 0.34, "learning_rate": 0.00033459038754713957, "loss": 0.0647, "theoretical_loss": 3.4310921321514565, "tokens_seen": 2206859264 }, { "epoch": 0.34, "objective/train/advantage_avg": 0.0005416091298684478, "objective/train/docs_used": 803600, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.312454104423523, "objective/train/original_loss": 1.3124542236328125, "objective/train/theoretical_loss": 3.4310756070316857, "objective/train/tokens_used": 577514976, "objective/train/value_avg": -0.00766754150390625, "objective/train/value_loss": 0.00019397679716348648, "objective/train/value_max": -4.7206878662109375e-05, "objective/train/value_min": -0.23828125, "objective/train/value_reward_corr": 0.6477911474751314, "objective/train/value_std": 0.0124359130859375, "objective/train/weight_avg": 1.0006273984909058, "objective/train/weighted_lm_loss": 1.3128777742385864, "objective/train/weights_max": 1.124742031097412, "objective/train/weights_min": 0.36825186014175415, "theoretical_loss": 3.4310756070316857, "tokens_seen": 2206990336 }, { "epoch": 0.34, "learning_rate": 0.0003345502687956351, "loss": 0.0683, "theoretical_loss": 3.4310756070316857, "tokens_seen": 2206990336 }, { "epoch": 0.34, "learning_rate": 0.00033451015004413065, "loss": 0.0667, "theoretical_loss": 3.4310590831680825, "tokens_seen": 2207121408 }, { "epoch": 0.34, "learning_rate": 0.00033447003129262617, "loss": 0.0666, "theoretical_loss": 3.431042560560477, "tokens_seen": 2207252480 }, { "epoch": 0.34, "learning_rate": 0.0003344299125411217, "loss": 0.0692, "theoretical_loss": 3.4310260392086986, "tokens_seen": 2207383552 }, { "epoch": 0.34, "learning_rate": 0.00033438979378961725, "loss": 0.07, "theoretical_loss": 3.431009519112578, "tokens_seen": 2207514624 }, { "epoch": 0.34, "learning_rate": 0.0003343496750381128, "loss": 0.0671, "theoretical_loss": 3.4309930002719446, "tokens_seen": 2207645696 }, { "epoch": 0.34, "learning_rate": 0.0003343095562866084, "loss": 0.0681, "theoretical_loss": 3.4309764826866287, "tokens_seen": 2207776768 }, { "epoch": 0.34, "learning_rate": 0.0003342694375351039, "loss": 0.0672, "theoretical_loss": 3.4309599663564607, "tokens_seen": 2207907840 }, { "epoch": 0.34, "learning_rate": 0.00033422931878359947, "loss": 0.0646, "theoretical_loss": 3.43094345128127, "tokens_seen": 2208038912 }, { "epoch": 0.34, "learning_rate": 0.00033418920003209504, "loss": 0.066, "theoretical_loss": 3.4309269374608875, "tokens_seen": 2208169984 }, { "epoch": 0.34, "learning_rate": 0.00033414908128059055, "loss": 0.065, "theoretical_loss": 3.430910424895143, "tokens_seen": 2208301056 }, { "epoch": 0.34, "learning_rate": 0.0003341089625290861, "loss": 0.0644, "theoretical_loss": 3.4308939135838665, "tokens_seen": 2208432128 }, { "epoch": 0.34, "learning_rate": 0.00033406884377758163, "loss": 0.0697, "theoretical_loss": 3.4308774035268885, "tokens_seen": 2208563200 }, { "epoch": 0.34, "learning_rate": 0.00033402872502607715, "loss": 0.0656, "theoretical_loss": 3.4308608947240398, "tokens_seen": 2208694272 }, { "epoch": 0.34, "learning_rate": 0.0003339886062745727, "loss": 0.0676, "theoretical_loss": 3.4308443871751497, "tokens_seen": 2208825344 }, { "epoch": 0.34, "learning_rate": 0.0003339484875230683, "loss": 0.0721, "theoretical_loss": 3.430827880880049, "tokens_seen": 2208956416 }, { "epoch": 0.34, "learning_rate": 0.00033390836877156385, "loss": 0.0683, "theoretical_loss": 3.4308113758385685, "tokens_seen": 2209087488 }, { "epoch": 0.34, "learning_rate": 0.00033386825002005937, "loss": 0.0681, "theoretical_loss": 3.430794872050538, "tokens_seen": 2209218560 }, { "epoch": 0.34, "learning_rate": 0.00033382813126855493, "loss": 0.0685, "theoretical_loss": 3.4307783695157887, "tokens_seen": 2209349632 }, { "epoch": 0.34, "learning_rate": 0.0003337880125170505, "loss": 0.0647, "theoretical_loss": 3.4307618682341503, "tokens_seen": 2209480704 }, { "epoch": 0.34, "learning_rate": 0.000333747893765546, "loss": 0.07, "theoretical_loss": 3.4307453682054536, "tokens_seen": 2209611776 }, { "epoch": 0.34, "learning_rate": 0.0003337077750140416, "loss": 0.0665, "theoretical_loss": 3.430728869429529, "tokens_seen": 2209742848 }, { "epoch": 0.34, "learning_rate": 0.0003336676562625371, "loss": 0.0691, "theoretical_loss": 3.430712371906208, "tokens_seen": 2209873920 }, { "epoch": 0.34, "learning_rate": 0.00033362753751103267, "loss": 0.065, "theoretical_loss": 3.4306958756353194, "tokens_seen": 2210004992 }, { "epoch": 0.34, "learning_rate": 0.0003335874187595282, "loss": 0.0671, "theoretical_loss": 3.4306793806166955, "tokens_seen": 2210136064 }, { "epoch": 0.34, "objective/train/advantage_avg": 0.0015833480283617973, "objective/train/docs_used": 804834, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.318640947341919, "objective/train/original_loss": 1.318640947341919, "objective/train/theoretical_loss": 3.4306628868501665, "objective/train/tokens_used": 580791776, "objective/train/value_avg": -0.00960540771484375, "objective/train/value_loss": 0.00035190529888495803, "objective/train/value_max": -6.157159805297852e-05, "objective/train/value_min": -0.970703125, "objective/train/value_reward_corr": 0.7061657302499916, "objective/train/value_std": 0.0198211669921875, "objective/train/weight_avg": 1.0017414093017578, "objective/train/weighted_lm_loss": 1.3203641176223755, "objective/train/weights_max": 1.6051334142684937, "objective/train/weights_min": 0.37063467502593994, "theoretical_loss": 3.4306628868501665, "tokens_seen": 2210267136 }, { "epoch": 0.34, "learning_rate": 0.00033354730000802375, "loss": 0.067, "theoretical_loss": 3.4306628868501665, "tokens_seen": 2210267136 }, { "epoch": 0.34, "learning_rate": 0.0003335071812565193, "loss": 0.0671, "theoretical_loss": 3.4306463943355627, "tokens_seen": 2210398208 }, { "epoch": 0.34, "learning_rate": 0.00033346706250501483, "loss": 0.0713, "theoretical_loss": 3.4306299030727154, "tokens_seen": 2210529280 }, { "epoch": 0.34, "learning_rate": 0.0003334269437535104, "loss": 0.0631, "theoretical_loss": 3.430613413061455, "tokens_seen": 2210660352 }, { "epoch": 0.34, "learning_rate": 0.00033338682500200597, "loss": 0.07, "theoretical_loss": 3.4305969243016126, "tokens_seen": 2210791424 }, { "epoch": 0.34, "learning_rate": 0.0003333467062505015, "loss": 0.0705, "theoretical_loss": 3.4305804367930186, "tokens_seen": 2210922496 }, { "epoch": 0.34, "learning_rate": 0.00033330658749899705, "loss": 0.0657, "theoretical_loss": 3.430563950535505, "tokens_seen": 2211053568 }, { "epoch": 0.34, "learning_rate": 0.0003332664687474926, "loss": 0.0655, "theoretical_loss": 3.4305474655289006, "tokens_seen": 2211184640 }, { "epoch": 0.34, "learning_rate": 0.00033322634999598814, "loss": 0.0707, "theoretical_loss": 3.4305309817730385, "tokens_seen": 2211315712 }, { "epoch": 0.34, "learning_rate": 0.00033318623124448365, "loss": 0.0702, "theoretical_loss": 3.4305144992677485, "tokens_seen": 2211446784 }, { "epoch": 0.34, "learning_rate": 0.0003331461124929792, "loss": 0.0706, "theoretical_loss": 3.4304980180128624, "tokens_seen": 2211577856 }, { "epoch": 0.34, "learning_rate": 0.0003331059937414748, "loss": 0.0735, "theoretical_loss": 3.4304815380082103, "tokens_seen": 2211708928 }, { "epoch": 0.34, "learning_rate": 0.0003330658749899703, "loss": 0.0663, "theoretical_loss": 3.4304650592536237, "tokens_seen": 2211840000 }, { "epoch": 0.34, "learning_rate": 0.00033302575623846587, "loss": 0.0676, "theoretical_loss": 3.430448581748934, "tokens_seen": 2211971072 }, { "epoch": 0.34, "learning_rate": 0.00033298563748696144, "loss": 0.07, "theoretical_loss": 3.430432105493972, "tokens_seen": 2212102144 }, { "epoch": 0.34, "learning_rate": 0.00033294551873545695, "loss": 0.0689, "theoretical_loss": 3.430415630488569, "tokens_seen": 2212233216 }, { "epoch": 0.34, "learning_rate": 0.0003329053999839525, "loss": 0.0653, "theoretical_loss": 3.4303991567325562, "tokens_seen": 2212364288 }, { "epoch": 0.34, "learning_rate": 0.0003328652812324481, "loss": 0.0673, "theoretical_loss": 3.4303826842257648, "tokens_seen": 2212495360 }, { "epoch": 0.34, "learning_rate": 0.0003328251624809436, "loss": 0.0682, "theoretical_loss": 3.4303662129680257, "tokens_seen": 2212626432 }, { "epoch": 0.34, "learning_rate": 0.0003327850437294391, "loss": 0.0687, "theoretical_loss": 3.430349742959171, "tokens_seen": 2212757504 }, { "epoch": 0.34, "learning_rate": 0.0003327449249779347, "loss": 0.0669, "theoretical_loss": 3.4303332741990316, "tokens_seen": 2212888576 }, { "epoch": 0.34, "learning_rate": 0.00033270480622643025, "loss": 0.0706, "theoretical_loss": 3.4303168066874385, "tokens_seen": 2213019648 }, { "epoch": 0.34, "learning_rate": 0.00033266468747492577, "loss": 0.0642, "theoretical_loss": 3.430300340424224, "tokens_seen": 2213150720 }, { "epoch": 0.34, "learning_rate": 0.00033262456872342134, "loss": 0.068, "theoretical_loss": 3.430283875409219, "tokens_seen": 2213281792 }, { "epoch": 0.34, "learning_rate": 0.0003325844499719169, "loss": 0.0699, "theoretical_loss": 3.430267411642255, "tokens_seen": 2213412864 }, { "epoch": 0.34, "objective/train/advantage_avg": 0.0007442680071108043, "objective/train/docs_used": 805966, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4011800289154053, "objective/train/original_loss": 1.4011802673339844, "objective/train/theoretical_loss": 3.430250949123163, "objective/train/tokens_used": 584068576, "objective/train/value_avg": -0.00624847412109375, "objective/train/value_loss": 0.00015373311180155724, "objective/train/value_max": -4.869699478149414e-05, "objective/train/value_min": -0.314208984375, "objective/train/value_reward_corr": 0.6664287255213615, "objective/train/value_std": 0.01210784912109375, "objective/train/weight_avg": 1.00081205368042, "objective/train/weighted_lm_loss": 1.4027150869369507, "objective/train/weights_max": 1.2366453409194946, "objective/train/weights_min": 0.36870554089546204, "theoretical_loss": 3.430250949123163, "tokens_seen": 2213543936 }, { "epoch": 0.34, "learning_rate": 0.0003325443312204124, "loss": 0.0661, "theoretical_loss": 3.430250949123163, "tokens_seen": 2213543936 }, { "epoch": 0.34, "learning_rate": 0.000332504212468908, "loss": 0.0714, "theoretical_loss": 3.4302344878517754, "tokens_seen": 2213675008 }, { "epoch": 0.34, "learning_rate": 0.00033246409371740355, "loss": 0.0678, "theoretical_loss": 3.430218027827924, "tokens_seen": 2213806080 }, { "epoch": 0.34, "learning_rate": 0.00033242397496589907, "loss": 0.0667, "theoretical_loss": 3.430201569051439, "tokens_seen": 2213937152 }, { "epoch": 0.34, "learning_rate": 0.0003323838562143946, "loss": 0.0708, "theoretical_loss": 3.430185111522153, "tokens_seen": 2214068224 }, { "epoch": 0.34, "learning_rate": 0.00033234373746289015, "loss": 0.0702, "theoretical_loss": 3.4301686552398976, "tokens_seen": 2214199296 }, { "epoch": 0.34, "learning_rate": 0.0003323036187113857, "loss": 0.0678, "theoretical_loss": 3.4301522002045046, "tokens_seen": 2214330368 }, { "epoch": 0.34, "learning_rate": 0.00033226349995988123, "loss": 0.0659, "theoretical_loss": 3.4301357464158055, "tokens_seen": 2214461440 }, { "epoch": 0.34, "learning_rate": 0.0003322233812083768, "loss": 0.0628, "theoretical_loss": 3.430119293873632, "tokens_seen": 2214592512 }, { "epoch": 0.34, "learning_rate": 0.00033218326245687237, "loss": 0.0677, "theoretical_loss": 3.4301028425778157, "tokens_seen": 2214723584 }, { "epoch": 0.34, "learning_rate": 0.0003321431437053679, "loss": 0.0664, "theoretical_loss": 3.4300863925281893, "tokens_seen": 2214854656 }, { "epoch": 0.34, "learning_rate": 0.00033210302495386345, "loss": 0.0677, "theoretical_loss": 3.4300699437245834, "tokens_seen": 2214985728 }, { "epoch": 0.34, "learning_rate": 0.000332062906202359, "loss": 0.0697, "theoretical_loss": 3.430053496166831, "tokens_seen": 2215116800 }, { "epoch": 0.34, "learning_rate": 0.00033202278745085454, "loss": 0.065, "theoretical_loss": 3.430037049854764, "tokens_seen": 2215247872 }, { "epoch": 0.34, "learning_rate": 0.00033198266869935005, "loss": 0.0659, "theoretical_loss": 3.4300206047882136, "tokens_seen": 2215378944 }, { "epoch": 0.34, "learning_rate": 0.0003319425499478456, "loss": 0.069, "theoretical_loss": 3.430004160967012, "tokens_seen": 2215510016 }, { "epoch": 0.34, "learning_rate": 0.0003319024311963412, "loss": 0.0697, "theoretical_loss": 3.4299877183909917, "tokens_seen": 2215641088 }, { "epoch": 0.34, "learning_rate": 0.0003318623124448367, "loss": 0.0659, "theoretical_loss": 3.429971277059984, "tokens_seen": 2215772160 }, { "epoch": 0.34, "learning_rate": 0.00033182219369333227, "loss": 0.0669, "theoretical_loss": 3.4299548369738218, "tokens_seen": 2215903232 }, { "epoch": 0.34, "learning_rate": 0.00033178207494182784, "loss": 0.0675, "theoretical_loss": 3.429938398132337, "tokens_seen": 2216034304 }, { "epoch": 0.34, "learning_rate": 0.0003317419561903234, "loss": 0.0667, "theoretical_loss": 3.429921960535361, "tokens_seen": 2216165376 }, { "epoch": 0.34, "learning_rate": 0.0003317018374388189, "loss": 0.0672, "theoretical_loss": 3.4299055241827268, "tokens_seen": 2216296448 }, { "epoch": 0.34, "learning_rate": 0.0003316617186873145, "loss": 0.0655, "theoretical_loss": 3.429889089074267, "tokens_seen": 2216427520 }, { "epoch": 0.34, "learning_rate": 0.00033162159993581, "loss": 0.0651, "theoretical_loss": 3.4298726552098127, "tokens_seen": 2216558592 }, { "epoch": 0.34, "learning_rate": 0.0003315814811843055, "loss": 0.0645, "theoretical_loss": 3.429856222589197, "tokens_seen": 2216689664 }, { "epoch": 0.34, "objective/train/advantage_avg": -0.0006817588582634926, "objective/train/docs_used": 807026, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4144872426986694, "objective/train/original_loss": 1.4144872426986694, "objective/train/theoretical_loss": 3.4298397912122516, "objective/train/tokens_used": 587345376, "objective/train/value_avg": -0.00836181640625, "objective/train/value_loss": 0.0003349633188918233, "objective/train/value_max": -5.692243576049805e-05, "objective/train/value_min": -0.7109375, "objective/train/value_reward_corr": 0.7280507580972199, "objective/train/value_std": 0.016845703125, "objective/train/weight_avg": 0.9994626045227051, "objective/train/weighted_lm_loss": 1.4134851694107056, "objective/train/weights_max": 1.474948525428772, "objective/train/weights_min": 0.2354423850774765, "theoretical_loss": 3.4298397912122516, "tokens_seen": 2216820736 }, { "epoch": 0.34, "learning_rate": 0.0003315413624328011, "loss": 0.066, "theoretical_loss": 3.4298397912122516, "tokens_seen": 2216820736 }, { "epoch": 0.34, "learning_rate": 0.00033150124368129665, "loss": 0.0643, "theoretical_loss": 3.4298233610788094, "tokens_seen": 2216951808 }, { "epoch": 0.34, "learning_rate": 0.00033146112492979217, "loss": 0.0635, "theoretical_loss": 3.429806932188703, "tokens_seen": 2217082880 }, { "epoch": 0.34, "learning_rate": 0.00033142100617828774, "loss": 0.0671, "theoretical_loss": 3.429790504541764, "tokens_seen": 2217213952 }, { "epoch": 0.34, "learning_rate": 0.0003313808874267833, "loss": 0.0663, "theoretical_loss": 3.429774078137825, "tokens_seen": 2217345024 }, { "epoch": 0.34, "learning_rate": 0.0003313407686752789, "loss": 0.0679, "theoretical_loss": 3.4297576529767193, "tokens_seen": 2217476096 }, { "epoch": 0.34, "learning_rate": 0.0003313006499237744, "loss": 0.0681, "theoretical_loss": 3.429741229058279, "tokens_seen": 2217607168 }, { "epoch": 0.34, "learning_rate": 0.00033126053117226996, "loss": 0.0661, "theoretical_loss": 3.4297248063823362, "tokens_seen": 2217738240 }, { "epoch": 0.34, "learning_rate": 0.00033122041242076547, "loss": 0.0671, "theoretical_loss": 3.429708384948724, "tokens_seen": 2217869312 }, { "epoch": 0.34, "learning_rate": 0.000331180293669261, "loss": 0.069, "theoretical_loss": 3.4296919647572746, "tokens_seen": 2218000384 }, { "epoch": 0.34, "learning_rate": 0.00033114017491775655, "loss": 0.0655, "theoretical_loss": 3.4296755458078207, "tokens_seen": 2218131456 }, { "epoch": 0.34, "learning_rate": 0.0003311000561662521, "loss": 0.0657, "theoretical_loss": 3.4296591281001954, "tokens_seen": 2218262528 }, { "epoch": 0.34, "learning_rate": 0.00033105993741474764, "loss": 0.0646, "theoretical_loss": 3.429642711634231, "tokens_seen": 2218393600 }, { "epoch": 0.34, "learning_rate": 0.0003310198186632432, "loss": 0.064, "theoretical_loss": 3.42962629640976, "tokens_seen": 2218524672 }, { "epoch": 0.34, "learning_rate": 0.00033097969991173877, "loss": 0.0671, "theoretical_loss": 3.4296098824266164, "tokens_seen": 2218655744 }, { "epoch": 0.34, "learning_rate": 0.00033093958116023434, "loss": 0.0643, "theoretical_loss": 3.4295934696846313, "tokens_seen": 2218786816 }, { "epoch": 0.34, "learning_rate": 0.00033089946240872985, "loss": 0.0684, "theoretical_loss": 3.4295770581836384, "tokens_seen": 2218917888 }, { "epoch": 0.34, "learning_rate": 0.0003308593436572254, "loss": 0.0626, "theoretical_loss": 3.4295606479234713, "tokens_seen": 2219048960 }, { "epoch": 0.34, "learning_rate": 0.00033081922490572094, "loss": 0.0687, "theoretical_loss": 3.4295442389039614, "tokens_seen": 2219180032 }, { "epoch": 0.35, "learning_rate": 0.00033077910615421645, "loss": 0.0656, "theoretical_loss": 3.429527831124943, "tokens_seen": 2219311104 }, { "epoch": 0.35, "learning_rate": 0.000330738987402712, "loss": 0.0681, "theoretical_loss": 3.4295114245862477, "tokens_seen": 2219442176 }, { "epoch": 0.35, "learning_rate": 0.0003306988686512076, "loss": 0.0723, "theoretical_loss": 3.42949501928771, "tokens_seen": 2219573248 }, { "epoch": 0.35, "learning_rate": 0.0003306587498997031, "loss": 0.0649, "theoretical_loss": 3.4294786152291614, "tokens_seen": 2219704320 }, { "epoch": 0.35, "learning_rate": 0.00033061863114819867, "loss": 0.0718, "theoretical_loss": 3.4294622124104364, "tokens_seen": 2219835392 }, { "epoch": 0.35, "learning_rate": 0.00033057851239669424, "loss": 0.0673, "theoretical_loss": 3.4294458108313672, "tokens_seen": 2219966464 }, { "epoch": 0.35, "objective/train/advantage_avg": 5.409380901255645e-05, "objective/train/docs_used": 808099, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.387454867362976, "objective/train/original_loss": 1.387454867362976, "objective/train/theoretical_loss": 3.429429410491787, "objective/train/tokens_used": 590622176, "objective/train/value_avg": -0.008209228515625, "objective/train/value_loss": 0.0004157430084887892, "objective/train/value_max": -4.035234451293945e-05, "objective/train/value_min": -0.5869140625, "objective/train/value_reward_corr": 0.6879643850745054, "objective/train/value_std": 0.018951416015625, "objective/train/weight_avg": 1.000233769416809, "objective/train/weighted_lm_loss": 1.3870490789413452, "objective/train/weights_max": 1.5326557159423828, "objective/train/weights_min": 0.36859163641929626, "theoretical_loss": 3.429429410491787, "tokens_seen": 2220097536 }, { "epoch": 0.35, "learning_rate": 0.0003305383936451898, "loss": 0.0681, "theoretical_loss": 3.429429410491787, "tokens_seen": 2220097536 }, { "epoch": 0.35, "learning_rate": 0.0003304982748936853, "loss": 0.0693, "theoretical_loss": 3.4294130113915293, "tokens_seen": 2220228608 }, { "epoch": 0.35, "learning_rate": 0.0003304581561421809, "loss": 0.0668, "theoretical_loss": 3.429396613530427, "tokens_seen": 2220359680 }, { "epoch": 0.35, "learning_rate": 0.0003304180373906764, "loss": 0.0629, "theoretical_loss": 3.4293802169083136, "tokens_seen": 2220490752 }, { "epoch": 0.35, "learning_rate": 0.0003303779186391719, "loss": 0.0665, "theoretical_loss": 3.429363821525022, "tokens_seen": 2220621824 }, { "epoch": 0.35, "learning_rate": 0.0003303377998876675, "loss": 0.0729, "theoretical_loss": 3.429347427380386, "tokens_seen": 2220752896 }, { "epoch": 0.35, "learning_rate": 0.00033029768113616306, "loss": 0.0691, "theoretical_loss": 3.4293310344742385, "tokens_seen": 2220883968 }, { "epoch": 0.35, "learning_rate": 0.00033025756238465857, "loss": 0.0649, "theoretical_loss": 3.429314642806413, "tokens_seen": 2221015040 }, { "epoch": 0.35, "learning_rate": 0.00033021744363315414, "loss": 0.0672, "theoretical_loss": 3.429298252376743, "tokens_seen": 2221146112 }, { "epoch": 0.35, "learning_rate": 0.0003301773248816497, "loss": 0.0703, "theoretical_loss": 3.4292818631850612, "tokens_seen": 2221277184 }, { "epoch": 0.35, "learning_rate": 0.0003301372061301453, "loss": 0.0697, "theoretical_loss": 3.429265475231202, "tokens_seen": 2221408256 }, { "epoch": 0.35, "learning_rate": 0.0003300970873786408, "loss": 0.067, "theoretical_loss": 3.4292490885149984, "tokens_seen": 2221539328 }, { "epoch": 0.35, "learning_rate": 0.00033005696862713636, "loss": 0.0645, "theoretical_loss": 3.429232703036284, "tokens_seen": 2221670400 }, { "epoch": 0.35, "learning_rate": 0.00033001684987563187, "loss": 0.0668, "theoretical_loss": 3.4292163187948925, "tokens_seen": 2221801472 }, { "epoch": 0.35, "learning_rate": 0.0003299767311241274, "loss": 0.0681, "theoretical_loss": 3.429199935790657, "tokens_seen": 2221932544 }, { "epoch": 0.35, "learning_rate": 0.00032993661237262295, "loss": 0.0674, "theoretical_loss": 3.4291835540234117, "tokens_seen": 2222063616 }, { "epoch": 0.35, "learning_rate": 0.0003298964936211185, "loss": 0.0689, "theoretical_loss": 3.4291671734929903, "tokens_seen": 2222194688 }, { "epoch": 0.35, "learning_rate": 0.00032985637486961404, "loss": 0.0649, "theoretical_loss": 3.429150794199226, "tokens_seen": 2222325760 }, { "epoch": 0.35, "learning_rate": 0.0003298162561181096, "loss": 0.063, "theoretical_loss": 3.4291344161419524, "tokens_seen": 2222456832 }, { "epoch": 0.35, "learning_rate": 0.0003297761373666052, "loss": 0.0665, "theoretical_loss": 3.4291180393210037, "tokens_seen": 2222587904 }, { "epoch": 0.35, "learning_rate": 0.00032973601861510074, "loss": 0.0633, "theoretical_loss": 3.4291016637362137, "tokens_seen": 2222718976 }, { "epoch": 0.35, "learning_rate": 0.00032969589986359626, "loss": 0.0688, "theoretical_loss": 3.4290852893874155, "tokens_seen": 2222850048 }, { "epoch": 0.35, "learning_rate": 0.0003296557811120918, "loss": 0.066, "theoretical_loss": 3.429068916274444, "tokens_seen": 2222981120 }, { "epoch": 0.35, "learning_rate": 0.00032961566236058734, "loss": 0.0645, "theoretical_loss": 3.4290525443971323, "tokens_seen": 2223112192 }, { "epoch": 0.35, "learning_rate": 0.00032957554360908285, "loss": 0.0651, "theoretical_loss": 3.429036173755314, "tokens_seen": 2223243264 }, { "epoch": 0.35, "objective/train/advantage_avg": 0.0003406072792131454, "objective/train/docs_used": 809438, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3037582635879517, "objective/train/original_loss": 1.303758144378662, "objective/train/theoretical_loss": 3.4290198043488243, "objective/train/tokens_used": 593898976, "objective/train/value_avg": -0.00798797607421875, "objective/train/value_loss": 0.00035889988066628575, "objective/train/value_max": -3.0934810638427734e-05, "objective/train/value_min": -0.6806640625, "objective/train/value_reward_corr": 0.6432021011144358, "objective/train/value_std": 0.0167083740234375, "objective/train/weight_avg": 1.0004966259002686, "objective/train/weighted_lm_loss": 1.3039801120758057, "objective/train/weights_max": 1.6689705848693848, "objective/train/weights_min": 0.3811357617378235, "theoretical_loss": 3.4290198043488243, "tokens_seen": 2223374336 }, { "epoch": 0.35, "learning_rate": 0.0003295354248575784, "loss": 0.0671, "theoretical_loss": 3.4290198043488243, "tokens_seen": 2223374336 }, { "epoch": 0.35, "learning_rate": 0.000329495306106074, "loss": 0.0662, "theoretical_loss": 3.429003436177496, "tokens_seen": 2223505408 }, { "epoch": 0.35, "learning_rate": 0.0003294551873545695, "loss": 0.0634, "theoretical_loss": 3.4289870692411637, "tokens_seen": 2223636480 }, { "epoch": 0.35, "learning_rate": 0.00032941506860306507, "loss": 0.065, "theoretical_loss": 3.4289707035396613, "tokens_seen": 2223767552 }, { "epoch": 0.35, "learning_rate": 0.00032937494985156064, "loss": 0.0696, "theoretical_loss": 3.4289543390728228, "tokens_seen": 2223898624 }, { "epoch": 0.35, "learning_rate": 0.0003293348311000562, "loss": 0.0673, "theoretical_loss": 3.428937975840482, "tokens_seen": 2224029696 }, { "epoch": 0.35, "learning_rate": 0.0003292947123485517, "loss": 0.0707, "theoretical_loss": 3.4289216138424736, "tokens_seen": 2224160768 }, { "epoch": 0.35, "learning_rate": 0.0003292545935970473, "loss": 0.072, "theoretical_loss": 3.4289052530786313, "tokens_seen": 2224291840 }, { "epoch": 0.35, "learning_rate": 0.0003292144748455428, "loss": 0.0644, "theoretical_loss": 3.42888889354879, "tokens_seen": 2224422912 }, { "epoch": 0.35, "learning_rate": 0.0003291743560940383, "loss": 0.0625, "theoretical_loss": 3.4288725352527827, "tokens_seen": 2224553984 }, { "epoch": 0.35, "learning_rate": 0.0003291342373425339, "loss": 0.0689, "theoretical_loss": 3.4288561781904443, "tokens_seen": 2224685056 }, { "epoch": 0.35, "learning_rate": 0.00032909411859102946, "loss": 0.0694, "theoretical_loss": 3.42883982236161, "tokens_seen": 2224816128 }, { "epoch": 0.35, "learning_rate": 0.000329053999839525, "loss": 0.0658, "theoretical_loss": 3.4288234677661125, "tokens_seen": 2224947200 }, { "epoch": 0.35, "learning_rate": 0.00032901388108802054, "loss": 0.0702, "theoretical_loss": 3.428807114403787, "tokens_seen": 2225078272 }, { "epoch": 0.35, "learning_rate": 0.0003289737623365161, "loss": 0.0653, "theoretical_loss": 3.4287907622744678, "tokens_seen": 2225209344 }, { "epoch": 0.35, "learning_rate": 0.0003289336435850117, "loss": 0.066, "theoretical_loss": 3.428774411377989, "tokens_seen": 2225340416 }, { "epoch": 0.35, "learning_rate": 0.0003288935248335072, "loss": 0.0642, "theoretical_loss": 3.428758061714186, "tokens_seen": 2225471488 }, { "epoch": 0.35, "learning_rate": 0.00032885340608200276, "loss": 0.0669, "theoretical_loss": 3.4287417132828923, "tokens_seen": 2225602560 }, { "epoch": 0.35, "learning_rate": 0.00032881328733049827, "loss": 0.0668, "theoretical_loss": 3.4287253660839427, "tokens_seen": 2225733632 }, { "epoch": 0.35, "learning_rate": 0.0003287731685789938, "loss": 0.0674, "theoretical_loss": 3.428709020117172, "tokens_seen": 2225864704 }, { "epoch": 0.35, "learning_rate": 0.00032873304982748936, "loss": 0.07, "theoretical_loss": 3.4286926753824143, "tokens_seen": 2225995776 }, { "epoch": 0.35, "learning_rate": 0.0003286929310759849, "loss": 0.0617, "theoretical_loss": 3.4286763318795046, "tokens_seen": 2226126848 }, { "epoch": 0.35, "learning_rate": 0.0003286528123244805, "loss": 0.0691, "theoretical_loss": 3.4286599896082772, "tokens_seen": 2226257920 }, { "epoch": 0.35, "learning_rate": 0.000328612693572976, "loss": 0.072, "theoretical_loss": 3.428643648568567, "tokens_seen": 2226388992 }, { "epoch": 0.35, "learning_rate": 0.0003285725748214716, "loss": 0.0673, "theoretical_loss": 3.428627308760209, "tokens_seen": 2226520064 }, { "epoch": 0.35, "objective/train/advantage_avg": 0.0007055290043354034, "objective/train/docs_used": 810670, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2405948638916016, "objective/train/original_loss": 1.2405948638916016, "objective/train/theoretical_loss": 3.4286109701830374, "objective/train/tokens_used": 597175776, "objective/train/value_avg": -0.006046295166015625, "objective/train/value_loss": 0.00024628511164337397, "objective/train/value_max": -2.092123031616211e-05, "objective/train/value_min": -0.921875, "objective/train/value_reward_corr": 0.6043310844024478, "objective/train/value_std": 0.01318359375, "objective/train/weight_avg": 1.0008163452148438, "objective/train/weighted_lm_loss": 1.2412614822387695, "objective/train/weights_max": 1.8546124696731567, "objective/train/weights_min": 0.3680107891559601, "theoretical_loss": 3.4286109701830374, "tokens_seen": 2226651136 }, { "epoch": 0.35, "learning_rate": 0.00032853245606996714, "loss": 0.0667, "theoretical_loss": 3.4286109701830374, "tokens_seen": 2226651136 }, { "epoch": 0.35, "learning_rate": 0.00032849233731846266, "loss": 0.0676, "theoretical_loss": 3.4285946328368873, "tokens_seen": 2226782208 }, { "epoch": 0.35, "learning_rate": 0.0003284522185669582, "loss": 0.0705, "theoretical_loss": 3.428578296721593, "tokens_seen": 2226913280 }, { "epoch": 0.35, "learning_rate": 0.00032841209981545374, "loss": 0.0682, "theoretical_loss": 3.42856196183699, "tokens_seen": 2227044352 }, { "epoch": 0.35, "learning_rate": 0.00032837198106394925, "loss": 0.0672, "theoretical_loss": 3.4285456281829125, "tokens_seen": 2227175424 }, { "epoch": 0.35, "learning_rate": 0.0003283318623124448, "loss": 0.0662, "theoretical_loss": 3.428529295759196, "tokens_seen": 2227306496 }, { "epoch": 0.35, "learning_rate": 0.0003282917435609404, "loss": 0.0689, "theoretical_loss": 3.428512964565675, "tokens_seen": 2227437568 }, { "epoch": 0.35, "learning_rate": 0.00032825162480943596, "loss": 0.0653, "theoretical_loss": 3.428496634602185, "tokens_seen": 2227568640 }, { "epoch": 0.35, "learning_rate": 0.0003282115060579315, "loss": 0.0638, "theoretical_loss": 3.4284803058685602, "tokens_seen": 2227699712 }, { "epoch": 0.35, "learning_rate": 0.00032817138730642704, "loss": 0.0668, "theoretical_loss": 3.4284639783646362, "tokens_seen": 2227830784 }, { "epoch": 0.35, "learning_rate": 0.0003281312685549226, "loss": 0.0696, "theoretical_loss": 3.428447652090248, "tokens_seen": 2227961856 }, { "epoch": 0.35, "learning_rate": 0.0003280911498034181, "loss": 0.0679, "theoretical_loss": 3.4284313270452307, "tokens_seen": 2228092928 }, { "epoch": 0.35, "learning_rate": 0.0003280510310519137, "loss": 0.0682, "theoretical_loss": 3.4284150032294187, "tokens_seen": 2228224000 }, { "epoch": 0.35, "learning_rate": 0.0003280109123004092, "loss": 0.0645, "theoretical_loss": 3.428398680642648, "tokens_seen": 2228355072 }, { "epoch": 0.35, "learning_rate": 0.0003279707935489047, "loss": 0.0661, "theoretical_loss": 3.428382359284754, "tokens_seen": 2228486144 }, { "epoch": 0.35, "learning_rate": 0.0003279306747974003, "loss": 0.0695, "theoretical_loss": 3.428366039155571, "tokens_seen": 2228617216 }, { "epoch": 0.35, "learning_rate": 0.00032789055604589586, "loss": 0.0686, "theoretical_loss": 3.4283497202549347, "tokens_seen": 2228748288 }, { "epoch": 0.35, "learning_rate": 0.0003278504372943914, "loss": 0.0639, "theoretical_loss": 3.4283334025826804, "tokens_seen": 2228879360 }, { "epoch": 0.35, "learning_rate": 0.00032781031854288694, "loss": 0.0643, "theoretical_loss": 3.4283170861386436, "tokens_seen": 2229010432 }, { "epoch": 0.35, "learning_rate": 0.0003277701997913825, "loss": 0.0681, "theoretical_loss": 3.4283007709226587, "tokens_seen": 2229141504 }, { "epoch": 0.35, "learning_rate": 0.0003277300810398781, "loss": 0.0674, "theoretical_loss": 3.4282844569345623, "tokens_seen": 2229272576 }, { "epoch": 0.35, "learning_rate": 0.0003276899622883736, "loss": 0.0649, "theoretical_loss": 3.428268144174189, "tokens_seen": 2229403648 }, { "epoch": 0.35, "learning_rate": 0.00032764984353686916, "loss": 0.0681, "theoretical_loss": 3.428251832641375, "tokens_seen": 2229534720 }, { "epoch": 0.35, "learning_rate": 0.0003276097247853647, "loss": 0.0681, "theoretical_loss": 3.428235522335955, "tokens_seen": 2229665792 }, { "epoch": 0.35, "learning_rate": 0.0003275696060338602, "loss": 0.0687, "theoretical_loss": 3.4282192132577647, "tokens_seen": 2229796864 }, { "epoch": 0.35, "objective/train/advantage_avg": -0.000428285711677745, "objective/train/docs_used": 811859, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4422637224197388, "objective/train/original_loss": 1.4422638416290283, "objective/train/theoretical_loss": 3.4282029054066396, "objective/train/tokens_used": 600452576, "objective/train/value_avg": -0.00919342041015625, "objective/train/value_loss": 0.0002747675753198564, "objective/train/value_max": -2.8192996978759766e-05, "objective/train/value_min": -0.447265625, "objective/train/value_reward_corr": 0.7351733573439034, "objective/train/value_std": 0.0164337158203125, "objective/train/weight_avg": 0.9996934533119202, "objective/train/weighted_lm_loss": 1.4423694610595703, "objective/train/weights_max": 1.3880900144577026, "objective/train/weights_min": 0.3709147274494171, "theoretical_loss": 3.4282029054066396, "tokens_seen": 2229927936 }, { "epoch": 0.35, "learning_rate": 0.00032752948728235576, "loss": 0.0677, "theoretical_loss": 3.4282029054066396, "tokens_seen": 2229927936 }, { "epoch": 0.35, "learning_rate": 0.0003274893685308513, "loss": 0.0668, "theoretical_loss": 3.4281865987824154, "tokens_seen": 2230059008 }, { "epoch": 0.35, "learning_rate": 0.0003274492497793469, "loss": 0.0668, "theoretical_loss": 3.4281702933849276, "tokens_seen": 2230190080 }, { "epoch": 0.35, "learning_rate": 0.0003274091310278424, "loss": 0.0661, "theoretical_loss": 3.428153989214012, "tokens_seen": 2230321152 }, { "epoch": 0.35, "learning_rate": 0.000327369012276338, "loss": 0.0675, "theoretical_loss": 3.4281376862695043, "tokens_seen": 2230452224 }, { "epoch": 0.35, "learning_rate": 0.00032732889352483354, "loss": 0.0641, "theoretical_loss": 3.4281213845512397, "tokens_seen": 2230583296 }, { "epoch": 0.35, "learning_rate": 0.00032728877477332906, "loss": 0.0664, "theoretical_loss": 3.4281050840590543, "tokens_seen": 2230714368 }, { "epoch": 0.35, "learning_rate": 0.0003272486560218246, "loss": 0.07, "theoretical_loss": 3.428088784792784, "tokens_seen": 2230845440 }, { "epoch": 0.35, "learning_rate": 0.00032720853727032014, "loss": 0.0654, "theoretical_loss": 3.428072486752264, "tokens_seen": 2230976512 }, { "epoch": 0.35, "learning_rate": 0.00032716841851881565, "loss": 0.0681, "theoretical_loss": 3.4280561899373305, "tokens_seen": 2231107584 }, { "epoch": 0.35, "learning_rate": 0.0003271282997673112, "loss": 0.0721, "theoretical_loss": 3.42803989434782, "tokens_seen": 2231238656 }, { "epoch": 0.35, "learning_rate": 0.0003270881810158068, "loss": 0.0725, "theoretical_loss": 3.428023599983567, "tokens_seen": 2231369728 }, { "epoch": 0.35, "learning_rate": 0.00032704806226430236, "loss": 0.0669, "theoretical_loss": 3.4280073068444086, "tokens_seen": 2231500800 }, { "epoch": 0.35, "learning_rate": 0.0003270079435127979, "loss": 0.0707, "theoretical_loss": 3.4279910149301798, "tokens_seen": 2231631872 }, { "epoch": 0.35, "learning_rate": 0.00032696782476129344, "loss": 0.0714, "theoretical_loss": 3.427974724240717, "tokens_seen": 2231762944 }, { "epoch": 0.35, "learning_rate": 0.000326927706009789, "loss": 0.0671, "theoretical_loss": 3.4279584347758565, "tokens_seen": 2231894016 }, { "epoch": 0.35, "learning_rate": 0.0003268875872582845, "loss": 0.0676, "theoretical_loss": 3.4279421465354343, "tokens_seen": 2232025088 }, { "epoch": 0.35, "learning_rate": 0.0003268474685067801, "loss": 0.068, "theoretical_loss": 3.4279258595192856, "tokens_seen": 2232156160 }, { "epoch": 0.35, "learning_rate": 0.0003268073497552756, "loss": 0.0677, "theoretical_loss": 3.4279095737272476, "tokens_seen": 2232287232 }, { "epoch": 0.35, "learning_rate": 0.0003267672310037711, "loss": 0.0636, "theoretical_loss": 3.427893289159156, "tokens_seen": 2232418304 }, { "epoch": 0.35, "learning_rate": 0.0003267271122522667, "loss": 0.0635, "theoretical_loss": 3.4278770058148464, "tokens_seen": 2232549376 }, { "epoch": 0.35, "learning_rate": 0.00032668699350076226, "loss": 0.068, "theoretical_loss": 3.427860723694156, "tokens_seen": 2232680448 }, { "epoch": 0.35, "learning_rate": 0.0003266468747492578, "loss": 0.0645, "theoretical_loss": 3.4278444427969204, "tokens_seen": 2232811520 }, { "epoch": 0.35, "learning_rate": 0.00032660675599775334, "loss": 0.0675, "theoretical_loss": 3.427828163122976, "tokens_seen": 2232942592 }, { "epoch": 0.35, "learning_rate": 0.0003265666372462489, "loss": 0.0686, "theoretical_loss": 3.427811884672159, "tokens_seen": 2233073664 }, { "epoch": 0.35, "objective/train/advantage_avg": 0.0007900100899860263, "objective/train/docs_used": 813220, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4437288045883179, "objective/train/original_loss": 1.4437286853790283, "objective/train/theoretical_loss": 3.427795607444306, "objective/train/tokens_used": 603729376, "objective/train/value_avg": -0.006397247314453125, "objective/train/value_loss": 0.0002574238460510969, "objective/train/value_max": -3.88026237487793e-05, "objective/train/value_min": -0.362060546875, "objective/train/value_reward_corr": 0.6080838480240365, "objective/train/value_std": 0.0121612548828125, "objective/train/weight_avg": 1.0008987188339233, "objective/train/weighted_lm_loss": 1.4448599815368652, "objective/train/weights_max": 1.1987440586090088, "objective/train/weights_min": 0.36883002519607544, "theoretical_loss": 3.427795607444306, "tokens_seen": 2233204736 }, { "epoch": 0.35, "learning_rate": 0.0003265265184947445, "loss": 0.0692, "theoretical_loss": 3.427795607444306, "tokens_seen": 2233204736 }, { "epoch": 0.35, "learning_rate": 0.00032648639974324, "loss": 0.0684, "theoretical_loss": 3.427779331439253, "tokens_seen": 2233335808 }, { "epoch": 0.35, "learning_rate": 0.00032644628099173556, "loss": 0.0664, "theoretical_loss": 3.4277630566568367, "tokens_seen": 2233466880 }, { "epoch": 0.35, "learning_rate": 0.0003264061622402311, "loss": 0.0667, "theoretical_loss": 3.427746783096893, "tokens_seen": 2233597952 }, { "epoch": 0.35, "learning_rate": 0.00032636604348872664, "loss": 0.0711, "theoretical_loss": 3.4277305107592593, "tokens_seen": 2233729024 }, { "epoch": 0.35, "learning_rate": 0.00032632592473722216, "loss": 0.0675, "theoretical_loss": 3.427714239643771, "tokens_seen": 2233860096 }, { "epoch": 0.35, "learning_rate": 0.0003262858059857177, "loss": 0.0659, "theoretical_loss": 3.427697969750265, "tokens_seen": 2233991168 }, { "epoch": 0.35, "learning_rate": 0.0003262456872342133, "loss": 0.0697, "theoretical_loss": 3.4276817010785785, "tokens_seen": 2234122240 }, { "epoch": 0.35, "learning_rate": 0.0003262055684827088, "loss": 0.0655, "theoretical_loss": 3.427665433628547, "tokens_seen": 2234253312 }, { "epoch": 0.35, "learning_rate": 0.0003261654497312044, "loss": 0.0672, "theoretical_loss": 3.427649167400008, "tokens_seen": 2234384384 }, { "epoch": 0.35, "learning_rate": 0.00032612533097969995, "loss": 0.0688, "theoretical_loss": 3.427632902392797, "tokens_seen": 2234515456 }, { "epoch": 0.35, "learning_rate": 0.00032608521222819546, "loss": 0.0652, "theoretical_loss": 3.427616638606752, "tokens_seen": 2234646528 }, { "epoch": 0.35, "learning_rate": 0.00032604509347669103, "loss": 0.067, "theoretical_loss": 3.427600376041709, "tokens_seen": 2234777600 }, { "epoch": 0.35, "learning_rate": 0.00032600497472518654, "loss": 0.0659, "theoretical_loss": 3.427584114697505, "tokens_seen": 2234908672 }, { "epoch": 0.35, "learning_rate": 0.0003259648559736821, "loss": 0.0695, "theoretical_loss": 3.4275678545739763, "tokens_seen": 2235039744 }, { "epoch": 0.35, "learning_rate": 0.0003259247372221776, "loss": 0.0695, "theoretical_loss": 3.42755159567096, "tokens_seen": 2235170816 }, { "epoch": 0.35, "learning_rate": 0.0003258846184706732, "loss": 0.0666, "theoretical_loss": 3.4275353379882927, "tokens_seen": 2235301888 }, { "epoch": 0.35, "learning_rate": 0.00032584449971916876, "loss": 0.068, "theoretical_loss": 3.4275190815258116, "tokens_seen": 2235432960 }, { "epoch": 0.35, "learning_rate": 0.0003258043809676643, "loss": 0.0669, "theoretical_loss": 3.427502826283354, "tokens_seen": 2235564032 }, { "epoch": 0.35, "learning_rate": 0.00032576426221615984, "loss": 0.0637, "theoretical_loss": 3.4274865722607553, "tokens_seen": 2235695104 }, { "epoch": 0.36, "learning_rate": 0.0003257241434646554, "loss": 0.0653, "theoretical_loss": 3.427470319457854, "tokens_seen": 2235826176 }, { "epoch": 0.36, "learning_rate": 0.0003256840247131509, "loss": 0.0634, "theoretical_loss": 3.4274540678744865, "tokens_seen": 2235957248 }, { "epoch": 0.36, "learning_rate": 0.0003256439059616465, "loss": 0.0678, "theoretical_loss": 3.4274378175104894, "tokens_seen": 2236088320 }, { "epoch": 0.36, "learning_rate": 0.000325603787210142, "loss": 0.0655, "theoretical_loss": 3.4274215683657006, "tokens_seen": 2236219392 }, { "epoch": 0.36, "learning_rate": 0.0003255636684586376, "loss": 0.0681, "theoretical_loss": 3.427405320439956, "tokens_seen": 2236350464 }, { "epoch": 0.36, "objective/train/advantage_avg": 0.0005948507459834218, "objective/train/docs_used": 814455, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.5140570402145386, "objective/train/original_loss": 1.5140570402145386, "objective/train/theoretical_loss": 3.427389073733094, "objective/train/tokens_used": 607006176, "objective/train/value_avg": -0.00849151611328125, "objective/train/value_loss": 0.0001455762394471094, "objective/train/value_max": -3.916025161743164e-05, "objective/train/value_min": -0.253662109375, "objective/train/value_reward_corr": 0.7543457620498487, "objective/train/value_std": 0.01392364501953125, "objective/train/weight_avg": 1.0006659030914307, "objective/train/weighted_lm_loss": 1.5134103298187256, "objective/train/weights_max": 1.2256743907928467, "objective/train/weights_min": 0.6075275540351868, "theoretical_loss": 3.427389073733094, "tokens_seen": 2236481536 }, { "epoch": 0.36, "learning_rate": 0.0003255235497071331, "loss": 0.0697, "theoretical_loss": 3.427389073733094, "tokens_seen": 2236481536 }, { "epoch": 0.36, "learning_rate": 0.00032548343095562866, "loss": 0.0702, "theoretical_loss": 3.4273728282449514, "tokens_seen": 2236612608 }, { "epoch": 0.36, "learning_rate": 0.00032544331220412423, "loss": 0.0706, "theoretical_loss": 3.4273565839753646, "tokens_seen": 2236743680 }, { "epoch": 0.36, "learning_rate": 0.00032540319345261974, "loss": 0.0686, "theoretical_loss": 3.4273403409241716, "tokens_seen": 2236874752 }, { "epoch": 0.36, "learning_rate": 0.0003253630747011153, "loss": 0.0699, "theoretical_loss": 3.427324099091209, "tokens_seen": 2237005824 }, { "epoch": 0.36, "learning_rate": 0.0003253229559496109, "loss": 0.069, "theoretical_loss": 3.427307858476315, "tokens_seen": 2237136896 }, { "epoch": 0.36, "learning_rate": 0.0003252828371981064, "loss": 0.067, "theoretical_loss": 3.4272916190793254, "tokens_seen": 2237267968 }, { "epoch": 0.36, "learning_rate": 0.00032524271844660196, "loss": 0.0698, "theoretical_loss": 3.4272753809000793, "tokens_seen": 2237399040 }, { "epoch": 0.36, "learning_rate": 0.00032520259969509753, "loss": 0.0663, "theoretical_loss": 3.4272591439384126, "tokens_seen": 2237530112 }, { "epoch": 0.36, "learning_rate": 0.00032516248094359304, "loss": 0.0703, "theoretical_loss": 3.4272429081941636, "tokens_seen": 2237661184 }, { "epoch": 0.36, "learning_rate": 0.00032512236219208856, "loss": 0.0668, "theoretical_loss": 3.4272266736671693, "tokens_seen": 2237792256 }, { "epoch": 0.36, "learning_rate": 0.0003250822434405841, "loss": 0.0707, "theoretical_loss": 3.4272104403572667, "tokens_seen": 2237923328 }, { "epoch": 0.36, "learning_rate": 0.0003250421246890797, "loss": 0.0655, "theoretical_loss": 3.4271942082642948, "tokens_seen": 2238054400 }, { "epoch": 0.36, "learning_rate": 0.0003250020059375752, "loss": 0.0688, "theoretical_loss": 3.4271779773880895, "tokens_seen": 2238185472 }, { "epoch": 0.36, "learning_rate": 0.0003249618871860708, "loss": 0.0681, "theoretical_loss": 3.427161747728489, "tokens_seen": 2238316544 }, { "epoch": 0.36, "learning_rate": 0.00032492176843456635, "loss": 0.065, "theoretical_loss": 3.427145519285331, "tokens_seen": 2238447616 }, { "epoch": 0.36, "learning_rate": 0.00032488164968306186, "loss": 0.0672, "theoretical_loss": 3.427129292058453, "tokens_seen": 2238578688 }, { "epoch": 0.36, "learning_rate": 0.00032484153093155743, "loss": 0.0708, "theoretical_loss": 3.427113066047692, "tokens_seen": 2238709760 }, { "epoch": 0.36, "learning_rate": 0.000324801412180053, "loss": 0.0664, "theoretical_loss": 3.4270968412528866, "tokens_seen": 2238840832 }, { "epoch": 0.36, "learning_rate": 0.0003247612934285485, "loss": 0.0686, "theoretical_loss": 3.4270806176738744, "tokens_seen": 2238971904 }, { "epoch": 0.36, "learning_rate": 0.000324721174677044, "loss": 0.0698, "theoretical_loss": 3.4270643953104925, "tokens_seen": 2239102976 }, { "epoch": 0.36, "learning_rate": 0.0003246810559255396, "loss": 0.0697, "theoretical_loss": 3.4270481741625796, "tokens_seen": 2239234048 }, { "epoch": 0.36, "learning_rate": 0.00032464093717403516, "loss": 0.0696, "theoretical_loss": 3.4270319542299723, "tokens_seen": 2239365120 }, { "epoch": 0.36, "learning_rate": 0.0003246008184225307, "loss": 0.0657, "theoretical_loss": 3.4270157355125095, "tokens_seen": 2239496192 }, { "epoch": 0.36, "learning_rate": 0.00032456069967102624, "loss": 0.0683, "theoretical_loss": 3.426999518010028, "tokens_seen": 2239627264 }, { "epoch": 0.36, "objective/train/advantage_avg": 0.0003331936022732407, "objective/train/docs_used": 815683, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.343484878540039, "objective/train/original_loss": 1.343484878540039, "objective/train/theoretical_loss": 3.426983301722367, "objective/train/tokens_used": 610282976, "objective/train/value_avg": -0.00795745849609375, "objective/train/value_loss": 0.0002456703514326364, "objective/train/value_max": -7.033348083496094e-05, "objective/train/value_min": -0.25830078125, "objective/train/value_reward_corr": 0.6546721490741427, "objective/train/value_std": 0.01453399658203125, "objective/train/weight_avg": 1.0004451274871826, "objective/train/weighted_lm_loss": 1.343306303024292, "objective/train/weights_max": 1.2340816259384155, "objective/train/weights_min": 0.3685407042503357, "theoretical_loss": 3.426983301722367, "tokens_seen": 2239758336 }, { "epoch": 0.36, "learning_rate": 0.0003245205809195218, "loss": 0.069, "theoretical_loss": 3.426983301722367, "tokens_seen": 2239758336 }, { "epoch": 0.36, "learning_rate": 0.00032448046216801733, "loss": 0.0693, "theoretical_loss": 3.4269670866493636, "tokens_seen": 2239889408 }, { "epoch": 0.36, "learning_rate": 0.0003244403434165129, "loss": 0.0683, "theoretical_loss": 3.4269508727908553, "tokens_seen": 2240020480 }, { "epoch": 0.36, "learning_rate": 0.00032440022466500846, "loss": 0.0702, "theoretical_loss": 3.426934660146681, "tokens_seen": 2240151552 }, { "epoch": 0.36, "learning_rate": 0.000324360105913504, "loss": 0.0713, "theoretical_loss": 3.426918448716678, "tokens_seen": 2240282624 }, { "epoch": 0.36, "learning_rate": 0.0003243199871619995, "loss": 0.0678, "theoretical_loss": 3.426902238500685, "tokens_seen": 2240413696 }, { "epoch": 0.36, "learning_rate": 0.00032427986841049506, "loss": 0.0674, "theoretical_loss": 3.42688602949854, "tokens_seen": 2240544768 }, { "epoch": 0.36, "learning_rate": 0.00032423974965899063, "loss": 0.0662, "theoretical_loss": 3.4268698217100804, "tokens_seen": 2240675840 }, { "epoch": 0.36, "learning_rate": 0.00032419963090748614, "loss": 0.0709, "theoretical_loss": 3.426853615135145, "tokens_seen": 2240806912 }, { "epoch": 0.36, "learning_rate": 0.0003241595121559817, "loss": 0.0684, "theoretical_loss": 3.426837409773572, "tokens_seen": 2240937984 }, { "epoch": 0.36, "learning_rate": 0.0003241193934044773, "loss": 0.0693, "theoretical_loss": 3.426821205625199, "tokens_seen": 2241069056 }, { "epoch": 0.36, "learning_rate": 0.00032407927465297285, "loss": 0.0655, "theoretical_loss": 3.4268050026898647, "tokens_seen": 2241200128 }, { "epoch": 0.36, "learning_rate": 0.00032403915590146836, "loss": 0.0651, "theoretical_loss": 3.4267888009674072, "tokens_seen": 2241331200 }, { "epoch": 0.36, "learning_rate": 0.00032399903714996393, "loss": 0.0697, "theoretical_loss": 3.4267726004576646, "tokens_seen": 2241462272 }, { "epoch": 0.36, "learning_rate": 0.00032395891839845945, "loss": 0.0698, "theoretical_loss": 3.4267564011604756, "tokens_seen": 2241593344 }, { "epoch": 0.36, "learning_rate": 0.00032391879964695496, "loss": 0.0683, "theoretical_loss": 3.4267402030756786, "tokens_seen": 2241724416 }, { "epoch": 0.36, "learning_rate": 0.00032387868089545053, "loss": 0.0657, "theoretical_loss": 3.426724006203112, "tokens_seen": 2241855488 }, { "epoch": 0.36, "learning_rate": 0.0003238385621439461, "loss": 0.0691, "theoretical_loss": 3.426707810542613, "tokens_seen": 2241986560 }, { "epoch": 0.36, "learning_rate": 0.0003237984433924416, "loss": 0.0708, "theoretical_loss": 3.426691616094022, "tokens_seen": 2242117632 }, { "epoch": 0.36, "learning_rate": 0.0003237583246409372, "loss": 0.0685, "theoretical_loss": 3.4266754228571763, "tokens_seen": 2242248704 }, { "epoch": 0.36, "learning_rate": 0.00032371820588943275, "loss": 0.0692, "theoretical_loss": 3.4266592308319144, "tokens_seen": 2242379776 }, { "epoch": 0.36, "learning_rate": 0.0003236780871379283, "loss": 0.0672, "theoretical_loss": 3.426643040018075, "tokens_seen": 2242510848 }, { "epoch": 0.36, "learning_rate": 0.00032363796838642383, "loss": 0.0662, "theoretical_loss": 3.426626850415497, "tokens_seen": 2242641920 }, { "epoch": 0.36, "learning_rate": 0.0003235978496349194, "loss": 0.0679, "theoretical_loss": 3.4266106620240184, "tokens_seen": 2242772992 }, { "epoch": 0.36, "learning_rate": 0.0003235577308834149, "loss": 0.0658, "theoretical_loss": 3.4265944748434785, "tokens_seen": 2242904064 }, { "epoch": 0.36, "objective/train/advantage_avg": 0.0010735767427831888, "objective/train/docs_used": 816816, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4275598526000977, "objective/train/original_loss": 1.4275598526000977, "objective/train/theoretical_loss": 3.4265782888737153, "objective/train/tokens_used": 613559776, "objective/train/value_avg": -0.0075836181640625, "objective/train/value_loss": 0.00016183604020625353, "objective/train/value_max": -2.3543834686279297e-05, "objective/train/value_min": -0.2122802734375, "objective/train/value_reward_corr": 0.6980390139874054, "objective/train/value_std": 0.013275146484375, "objective/train/weight_avg": 1.0011488199234009, "objective/train/weighted_lm_loss": 1.4290755987167358, "objective/train/weights_max": 1.1709752082824707, "objective/train/weights_min": 0.3774779438972473, "theoretical_loss": 3.4265782888737153, "tokens_seen": 2243035136 }, { "epoch": 0.36, "learning_rate": 0.0003235176121319104, "loss": 0.0692, "theoretical_loss": 3.4265782888737153, "tokens_seen": 2243035136 }, { "epoch": 0.36, "learning_rate": 0.000323477493380406, "loss": 0.0728, "theoretical_loss": 3.4265621041145677, "tokens_seen": 2243166208 }, { "epoch": 0.36, "learning_rate": 0.00032343737462890156, "loss": 0.0691, "theoretical_loss": 3.426545920565875, "tokens_seen": 2243297280 }, { "epoch": 0.36, "learning_rate": 0.0003233972558773971, "loss": 0.0642, "theoretical_loss": 3.426529738227475, "tokens_seen": 2243428352 }, { "epoch": 0.36, "learning_rate": 0.00032335713712589265, "loss": 0.0677, "theoretical_loss": 3.4265135570992076, "tokens_seen": 2243559424 }, { "epoch": 0.36, "learning_rate": 0.0003233170183743882, "loss": 0.0641, "theoretical_loss": 3.4264973771809104, "tokens_seen": 2243690496 }, { "epoch": 0.36, "learning_rate": 0.0003232768996228838, "loss": 0.0653, "theoretical_loss": 3.4264811984724233, "tokens_seen": 2243821568 }, { "epoch": 0.36, "learning_rate": 0.0003232367808713793, "loss": 0.0679, "theoretical_loss": 3.426465020973584, "tokens_seen": 2243952640 }, { "epoch": 0.36, "learning_rate": 0.00032319666211987487, "loss": 0.0707, "theoretical_loss": 3.4264488446842325, "tokens_seen": 2244083712 }, { "epoch": 0.36, "learning_rate": 0.0003231565433683704, "loss": 0.0698, "theoretical_loss": 3.4264326696042073, "tokens_seen": 2244214784 }, { "epoch": 0.36, "learning_rate": 0.0003231164246168659, "loss": 0.0676, "theoretical_loss": 3.4264164957333474, "tokens_seen": 2244345856 }, { "epoch": 0.36, "learning_rate": 0.00032307630586536146, "loss": 0.066, "theoretical_loss": 3.4264003230714923, "tokens_seen": 2244476928 }, { "epoch": 0.36, "learning_rate": 0.00032303618711385703, "loss": 0.0671, "theoretical_loss": 3.42638415161848, "tokens_seen": 2244608000 }, { "epoch": 0.36, "learning_rate": 0.00032299606836235254, "loss": 0.0666, "theoretical_loss": 3.4263679813741503, "tokens_seen": 2244739072 }, { "epoch": 0.36, "learning_rate": 0.0003229559496108481, "loss": 0.0665, "theoretical_loss": 3.426351812338342, "tokens_seen": 2244870144 }, { "epoch": 0.36, "learning_rate": 0.0003229158308593437, "loss": 0.0715, "theoretical_loss": 3.4263356445108943, "tokens_seen": 2245001216 }, { "epoch": 0.36, "learning_rate": 0.00032287571210783925, "loss": 0.0717, "theoretical_loss": 3.4263194778916466, "tokens_seen": 2245132288 }, { "epoch": 0.36, "learning_rate": 0.00032283559335633476, "loss": 0.0701, "theoretical_loss": 3.426303312480438, "tokens_seen": 2245263360 }, { "epoch": 0.36, "learning_rate": 0.00032279547460483033, "loss": 0.0687, "theoretical_loss": 3.4262871482771073, "tokens_seen": 2245394432 }, { "epoch": 0.36, "learning_rate": 0.00032275535585332585, "loss": 0.0672, "theoretical_loss": 3.426270985281494, "tokens_seen": 2245525504 }, { "epoch": 0.36, "learning_rate": 0.00032271523710182136, "loss": 0.0682, "theoretical_loss": 3.426254823493437, "tokens_seen": 2245656576 }, { "epoch": 0.36, "learning_rate": 0.00032267511835031693, "loss": 0.0706, "theoretical_loss": 3.426238662912777, "tokens_seen": 2245787648 }, { "epoch": 0.36, "learning_rate": 0.0003226349995988125, "loss": 0.0685, "theoretical_loss": 3.4262225035393516, "tokens_seen": 2245918720 }, { "epoch": 0.36, "learning_rate": 0.000322594880847308, "loss": 0.0678, "theoretical_loss": 3.4262063453730014, "tokens_seen": 2246049792 }, { "epoch": 0.36, "learning_rate": 0.0003225547620958036, "loss": 0.0651, "theoretical_loss": 3.426190188413565, "tokens_seen": 2246180864 }, { "epoch": 0.36, "objective/train/advantage_avg": 0.0002643518091645092, "objective/train/docs_used": 817879, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2732058763504028, "objective/train/original_loss": 1.2732057571411133, "objective/train/theoretical_loss": 3.4261740326608825, "objective/train/tokens_used": 616836576, "objective/train/value_avg": -0.0102081298828125, "objective/train/value_loss": 0.0002971589856315404, "objective/train/value_max": -2.8192996978759766e-05, "objective/train/value_min": -0.388427734375, "objective/train/value_reward_corr": 0.7016453914558913, "objective/train/value_std": 0.01611328125, "objective/train/weight_avg": 1.0003997087478638, "objective/train/weighted_lm_loss": 1.2735238075256348, "objective/train/weights_max": 1.2262730598449707, "objective/train/weights_min": 0.37812936305999756, "theoretical_loss": 3.4261740326608825, "tokens_seen": 2246311936 }, { "epoch": 0.36, "learning_rate": 0.00032251464334429915, "loss": 0.0674, "theoretical_loss": 3.4261740326608825, "tokens_seen": 2246311936 }, { "epoch": 0.36, "learning_rate": 0.0003224745245927947, "loss": 0.0684, "theoretical_loss": 3.4261578781147923, "tokens_seen": 2246443008 }, { "epoch": 0.36, "learning_rate": 0.00032243440584129023, "loss": 0.0681, "theoretical_loss": 3.4261417247751353, "tokens_seen": 2246574080 }, { "epoch": 0.36, "learning_rate": 0.0003223942870897858, "loss": 0.0669, "theoretical_loss": 3.4261255726417503, "tokens_seen": 2246705152 }, { "epoch": 0.36, "learning_rate": 0.0003223541683382813, "loss": 0.0668, "theoretical_loss": 3.4261094217144765, "tokens_seen": 2246836224 }, { "epoch": 0.36, "learning_rate": 0.00032231404958677683, "loss": 0.0679, "theoretical_loss": 3.426093271993154, "tokens_seen": 2246967296 }, { "epoch": 0.36, "learning_rate": 0.0003222739308352724, "loss": 0.0693, "theoretical_loss": 3.4260771234776226, "tokens_seen": 2247098368 }, { "epoch": 0.36, "learning_rate": 0.00032223381208376796, "loss": 0.0668, "theoretical_loss": 3.4260609761677214, "tokens_seen": 2247229440 }, { "epoch": 0.36, "learning_rate": 0.0003221936933322635, "loss": 0.0718, "theoretical_loss": 3.4260448300632906, "tokens_seen": 2247360512 }, { "epoch": 0.36, "learning_rate": 0.00032215357458075905, "loss": 0.0686, "theoretical_loss": 3.4260286851641695, "tokens_seen": 2247491584 }, { "epoch": 0.36, "learning_rate": 0.0003221134558292546, "loss": 0.0687, "theoretical_loss": 3.4260125414701976, "tokens_seen": 2247622656 }, { "epoch": 0.36, "learning_rate": 0.0003220733370777502, "loss": 0.0671, "theoretical_loss": 3.4259963989812157, "tokens_seen": 2247753728 }, { "epoch": 0.36, "learning_rate": 0.0003220332183262457, "loss": 0.0641, "theoretical_loss": 3.4259802576970624, "tokens_seen": 2247884800 }, { "epoch": 0.36, "learning_rate": 0.00032199309957474127, "loss": 0.0716, "theoretical_loss": 3.425964117617578, "tokens_seen": 2248015872 }, { "epoch": 0.36, "learning_rate": 0.0003219529808232368, "loss": 0.0671, "theoretical_loss": 3.425947978742603, "tokens_seen": 2248146944 }, { "epoch": 0.36, "learning_rate": 0.0003219128620717323, "loss": 0.0683, "theoretical_loss": 3.425931841071976, "tokens_seen": 2248278016 }, { "epoch": 0.36, "learning_rate": 0.00032187274332022786, "loss": 0.0642, "theoretical_loss": 3.425915704605538, "tokens_seen": 2248409088 }, { "epoch": 0.36, "learning_rate": 0.00032183262456872343, "loss": 0.0671, "theoretical_loss": 3.4258995693431284, "tokens_seen": 2248540160 }, { "epoch": 0.36, "learning_rate": 0.00032179250581721895, "loss": 0.0672, "theoretical_loss": 3.4258834352845877, "tokens_seen": 2248671232 }, { "epoch": 0.36, "learning_rate": 0.0003217523870657145, "loss": 0.0688, "theoretical_loss": 3.4258673024297552, "tokens_seen": 2248802304 }, { "epoch": 0.36, "learning_rate": 0.0003217122683142101, "loss": 0.0685, "theoretical_loss": 3.425851170778472, "tokens_seen": 2248933376 }, { "epoch": 0.36, "learning_rate": 0.00032167214956270565, "loss": 0.0649, "theoretical_loss": 3.425835040330577, "tokens_seen": 2249064448 }, { "epoch": 0.36, "learning_rate": 0.00032163203081120117, "loss": 0.0676, "theoretical_loss": 3.4258189110859107, "tokens_seen": 2249195520 }, { "epoch": 0.36, "learning_rate": 0.00032159191205969673, "loss": 0.0651, "theoretical_loss": 3.425802783044314, "tokens_seen": 2249326592 }, { "epoch": 0.36, "learning_rate": 0.00032155179330819225, "loss": 0.0657, "theoretical_loss": 3.425786656205626, "tokens_seen": 2249457664 }, { "epoch": 0.36, "objective/train/advantage_avg": 0.0005281991325318813, "objective/train/docs_used": 819025, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3830480575561523, "objective/train/original_loss": 1.3830480575561523, "objective/train/theoretical_loss": 3.425770530569687, "objective/train/tokens_used": 620113376, "objective/train/value_avg": -0.00698089599609375, "objective/train/value_loss": 0.00019987949053756893, "objective/train/value_max": -5.066394805908203e-05, "objective/train/value_min": -0.8095703125, "objective/train/value_reward_corr": 0.7237506609021904, "objective/train/value_std": 0.0161590576171875, "objective/train/weight_avg": 1.0006194114685059, "objective/train/weighted_lm_loss": 1.3830769062042236, "objective/train/weights_max": 1.6093494892120361, "objective/train/weights_min": 0.3690122961997986, "theoretical_loss": 3.425770530569687, "tokens_seen": 2249588736 }, { "epoch": 0.36, "learning_rate": 0.00032151167455668776, "loss": 0.0693, "theoretical_loss": 3.425770530569687, "tokens_seen": 2249588736 }, { "epoch": 0.36, "learning_rate": 0.00032147155580518333, "loss": 0.0675, "theoretical_loss": 3.425754406136338, "tokens_seen": 2249719808 }, { "epoch": 0.36, "learning_rate": 0.0003214314370536789, "loss": 0.0684, "theoretical_loss": 3.425738282905418, "tokens_seen": 2249850880 }, { "epoch": 0.36, "learning_rate": 0.00032139131830217447, "loss": 0.0736, "theoretical_loss": 3.425722160876769, "tokens_seen": 2249981952 }, { "epoch": 0.36, "learning_rate": 0.00032135119955067, "loss": 0.0693, "theoretical_loss": 3.42570604005023, "tokens_seen": 2250113024 }, { "epoch": 0.36, "learning_rate": 0.00032131108079916555, "loss": 0.0701, "theoretical_loss": 3.425689920425642, "tokens_seen": 2250244096 }, { "epoch": 0.36, "learning_rate": 0.0003212709620476611, "loss": 0.0678, "theoretical_loss": 3.425673802002845, "tokens_seen": 2250375168 }, { "epoch": 0.36, "learning_rate": 0.00032123084329615663, "loss": 0.0658, "theoretical_loss": 3.42565768478168, "tokens_seen": 2250506240 }, { "epoch": 0.36, "learning_rate": 0.0003211907245446522, "loss": 0.068, "theoretical_loss": 3.4256415687619866, "tokens_seen": 2250637312 }, { "epoch": 0.36, "learning_rate": 0.0003211506057931477, "loss": 0.0656, "theoretical_loss": 3.4256254539436055, "tokens_seen": 2250768384 }, { "epoch": 0.36, "learning_rate": 0.00032111048704164323, "loss": 0.0713, "theoretical_loss": 3.4256093403263774, "tokens_seen": 2250899456 }, { "epoch": 0.36, "learning_rate": 0.0003210703682901388, "loss": 0.0727, "theoretical_loss": 3.4255932279101433, "tokens_seen": 2251030528 }, { "epoch": 0.36, "learning_rate": 0.00032103024953863437, "loss": 0.0691, "theoretical_loss": 3.4255771166947424, "tokens_seen": 2251161600 }, { "epoch": 0.36, "learning_rate": 0.00032099013078712993, "loss": 0.0707, "theoretical_loss": 3.4255610066800166, "tokens_seen": 2251292672 }, { "epoch": 0.36, "learning_rate": 0.00032095001203562545, "loss": 0.0659, "theoretical_loss": 3.4255448978658065, "tokens_seen": 2251423744 }, { "epoch": 0.36, "learning_rate": 0.000320909893284121, "loss": 0.0643, "theoretical_loss": 3.425528790251952, "tokens_seen": 2251554816 }, { "epoch": 0.36, "learning_rate": 0.0003208697745326166, "loss": 0.0687, "theoretical_loss": 3.425512683838294, "tokens_seen": 2251685888 }, { "epoch": 0.36, "learning_rate": 0.0003208296557811121, "loss": 0.0677, "theoretical_loss": 3.4254965786246734, "tokens_seen": 2251816960 }, { "epoch": 0.36, "learning_rate": 0.00032078953702960767, "loss": 0.0683, "theoretical_loss": 3.425480474610931, "tokens_seen": 2251948032 }, { "epoch": 0.36, "learning_rate": 0.0003207494182781032, "loss": 0.0648, "theoretical_loss": 3.4254643717969073, "tokens_seen": 2252079104 }, { "epoch": 0.37, "learning_rate": 0.0003207092995265987, "loss": 0.0698, "theoretical_loss": 3.425448270182444, "tokens_seen": 2252210176 }, { "epoch": 0.37, "learning_rate": 0.00032066918077509426, "loss": 0.0706, "theoretical_loss": 3.42543216976738, "tokens_seen": 2252341248 }, { "epoch": 0.37, "learning_rate": 0.00032062906202358983, "loss": 0.0683, "theoretical_loss": 3.4254160705515577, "tokens_seen": 2252472320 }, { "epoch": 0.37, "learning_rate": 0.0003205889432720854, "loss": 0.0681, "theoretical_loss": 3.425399972534818, "tokens_seen": 2252603392 }, { "epoch": 0.37, "learning_rate": 0.0003205488245205809, "loss": 0.0659, "theoretical_loss": 3.4253838757170016, "tokens_seen": 2252734464 }, { "epoch": 0.37, "objective/train/advantage_avg": 0.0005987347103655338, "objective/train/docs_used": 820362, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3689802885055542, "objective/train/original_loss": 1.3689805269241333, "objective/train/theoretical_loss": 3.4253677800979494, "objective/train/tokens_used": 623390176, "objective/train/value_avg": -0.006153106689453125, "objective/train/value_loss": 0.00018753753101918846, "objective/train/value_max": -3.11732292175293e-05, "objective/train/value_min": -0.3466796875, "objective/train/value_reward_corr": 0.6083251891108439, "objective/train/value_std": 0.0106658935546875, "objective/train/weight_avg": 1.000680685043335, "objective/train/weighted_lm_loss": 1.369352102279663, "objective/train/weights_max": 1.1423146724700928, "objective/train/weights_min": 0.37077608704566956, "theoretical_loss": 3.4253677800979494, "tokens_seen": 2252865536 }, { "epoch": 0.37, "learning_rate": 0.0003205087057690765, "loss": 0.071, "theoretical_loss": 3.4253677800979494, "tokens_seen": 2252865536 }, { "epoch": 0.37, "learning_rate": 0.00032046858701757205, "loss": 0.0656, "theoretical_loss": 3.425351685677502, "tokens_seen": 2252996608 }, { "epoch": 0.37, "learning_rate": 0.00032042846826606757, "loss": 0.0692, "theoretical_loss": 3.425335592455501, "tokens_seen": 2253127680 }, { "epoch": 0.37, "learning_rate": 0.00032038834951456313, "loss": 0.0636, "theoretical_loss": 3.4253195004317867, "tokens_seen": 2253258752 }, { "epoch": 0.37, "learning_rate": 0.00032034823076305865, "loss": 0.0654, "theoretical_loss": 3.4253034096062014, "tokens_seen": 2253389824 }, { "epoch": 0.37, "learning_rate": 0.00032030811201155416, "loss": 0.0702, "theoretical_loss": 3.4252873199785854, "tokens_seen": 2253520896 }, { "epoch": 0.37, "learning_rate": 0.00032026799326004973, "loss": 0.0631, "theoretical_loss": 3.4252712315487797, "tokens_seen": 2253651968 }, { "epoch": 0.37, "learning_rate": 0.0003202278745085453, "loss": 0.0677, "theoretical_loss": 3.4252551443166257, "tokens_seen": 2253783040 }, { "epoch": 0.37, "learning_rate": 0.00032018775575704087, "loss": 0.0699, "theoretical_loss": 3.4252390582819645, "tokens_seen": 2253914112 }, { "epoch": 0.37, "learning_rate": 0.0003201476370055364, "loss": 0.0669, "theoretical_loss": 3.425222973444638, "tokens_seen": 2254045184 }, { "epoch": 0.37, "learning_rate": 0.00032010751825403195, "loss": 0.0674, "theoretical_loss": 3.4252068898044863, "tokens_seen": 2254176256 }, { "epoch": 0.37, "learning_rate": 0.0003200673995025275, "loss": 0.0723, "theoretical_loss": 3.4251908073613517, "tokens_seen": 2254307328 }, { "epoch": 0.37, "learning_rate": 0.00032002728075102303, "loss": 0.0677, "theoretical_loss": 3.425174726115075, "tokens_seen": 2254438400 }, { "epoch": 0.37, "learning_rate": 0.0003199871619995186, "loss": 0.0668, "theoretical_loss": 3.4251586460654977, "tokens_seen": 2254569472 }, { "epoch": 0.37, "learning_rate": 0.0003199470432480141, "loss": 0.0669, "theoretical_loss": 3.425142567212461, "tokens_seen": 2254700544 }, { "epoch": 0.37, "learning_rate": 0.00031990692449650963, "loss": 0.0714, "theoretical_loss": 3.4251264895558067, "tokens_seen": 2254831616 }, { "epoch": 0.37, "learning_rate": 0.0003198668057450052, "loss": 0.0682, "theoretical_loss": 3.4251104130953762, "tokens_seen": 2254962688 }, { "epoch": 0.37, "learning_rate": 0.00031982668699350077, "loss": 0.0667, "theoretical_loss": 3.4250943378310104, "tokens_seen": 2255093760 }, { "epoch": 0.37, "learning_rate": 0.00031978656824199634, "loss": 0.0643, "theoretical_loss": 3.4250782637625514, "tokens_seen": 2255224832 }, { "epoch": 0.37, "learning_rate": 0.00031974644949049185, "loss": 0.0687, "theoretical_loss": 3.4250621908898404, "tokens_seen": 2255355904 }, { "epoch": 0.37, "learning_rate": 0.0003197063307389874, "loss": 0.0653, "theoretical_loss": 3.4250461192127193, "tokens_seen": 2255486976 }, { "epoch": 0.37, "learning_rate": 0.000319666211987483, "loss": 0.0637, "theoretical_loss": 3.4250300487310295, "tokens_seen": 2255618048 }, { "epoch": 0.37, "learning_rate": 0.0003196260932359785, "loss": 0.0661, "theoretical_loss": 3.4250139794446124, "tokens_seen": 2255749120 }, { "epoch": 0.37, "learning_rate": 0.00031958597448447407, "loss": 0.071, "theoretical_loss": 3.4249979113533096, "tokens_seen": 2255880192 }, { "epoch": 0.37, "learning_rate": 0.0003195458557329696, "loss": 0.0666, "theoretical_loss": 3.4249818444569637, "tokens_seen": 2256011264 }, { "epoch": 0.37, "objective/train/advantage_avg": 0.0001873850851552561, "objective/train/docs_used": 821597, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3023977279663086, "objective/train/original_loss": 1.302397608757019, "objective/train/theoretical_loss": 3.4249657787554155, "objective/train/tokens_used": 626666976, "objective/train/value_avg": -0.00933837890625, "objective/train/value_loss": 0.0005489669274538755, "objective/train/value_max": -1.895427703857422e-05, "objective/train/value_min": -0.73046875, "objective/train/value_reward_corr": 0.7657133466585702, "objective/train/value_std": 0.0274200439453125, "objective/train/weight_avg": 1.0004335641860962, "objective/train/weighted_lm_loss": 1.303447961807251, "objective/train/weights_max": 1.8978723287582397, "objective/train/weights_min": 0.36873507499694824, "theoretical_loss": 3.4249657787554155, "tokens_seen": 2256142336 }, { "epoch": 0.37, "learning_rate": 0.0003195057369814651, "loss": 0.0656, "theoretical_loss": 3.4249657787554155, "tokens_seen": 2256142336 }, { "epoch": 0.37, "learning_rate": 0.00031946561822996067, "loss": 0.0635, "theoretical_loss": 3.424949714248507, "tokens_seen": 2256273408 }, { "epoch": 0.37, "learning_rate": 0.00031942549947845623, "loss": 0.0701, "theoretical_loss": 3.42493365093608, "tokens_seen": 2256404480 }, { "epoch": 0.37, "learning_rate": 0.0003193853807269518, "loss": 0.0689, "theoretical_loss": 3.4249175888179764, "tokens_seen": 2256535552 }, { "epoch": 0.37, "learning_rate": 0.0003193452619754473, "loss": 0.0692, "theoretical_loss": 3.4249015278940376, "tokens_seen": 2256666624 }, { "epoch": 0.37, "learning_rate": 0.0003193051432239429, "loss": 0.0676, "theoretical_loss": 3.424885468164106, "tokens_seen": 2256797696 }, { "epoch": 0.37, "learning_rate": 0.00031926502447243845, "loss": 0.0685, "theoretical_loss": 3.424869409628023, "tokens_seen": 2256928768 }, { "epoch": 0.37, "learning_rate": 0.00031922490572093397, "loss": 0.0686, "theoretical_loss": 3.4248533522856315, "tokens_seen": 2257059840 }, { "epoch": 0.37, "learning_rate": 0.00031918478696942954, "loss": 0.069, "theoretical_loss": 3.424837296136772, "tokens_seen": 2257190912 }, { "epoch": 0.37, "learning_rate": 0.00031914466821792505, "loss": 0.0686, "theoretical_loss": 3.424821241181288, "tokens_seen": 2257321984 }, { "epoch": 0.37, "learning_rate": 0.00031910454946642056, "loss": 0.0674, "theoretical_loss": 3.4248051874190204, "tokens_seen": 2257453056 }, { "epoch": 0.37, "learning_rate": 0.00031906443071491613, "loss": 0.0664, "theoretical_loss": 3.4247891348498114, "tokens_seen": 2257584128 }, { "epoch": 0.37, "learning_rate": 0.0003190243119634117, "loss": 0.0708, "theoretical_loss": 3.4247730834735037, "tokens_seen": 2257715200 }, { "epoch": 0.37, "learning_rate": 0.00031898419321190727, "loss": 0.0664, "theoretical_loss": 3.424757033289939, "tokens_seen": 2257846272 }, { "epoch": 0.37, "learning_rate": 0.0003189440744604028, "loss": 0.0694, "theoretical_loss": 3.424740984298959, "tokens_seen": 2257977344 }, { "epoch": 0.37, "learning_rate": 0.00031890395570889835, "loss": 0.0692, "theoretical_loss": 3.424724936500407, "tokens_seen": 2258108416 }, { "epoch": 0.37, "learning_rate": 0.0003188638369573939, "loss": 0.0641, "theoretical_loss": 3.424708889894124, "tokens_seen": 2258239488 }, { "epoch": 0.37, "learning_rate": 0.00031882371820588943, "loss": 0.0732, "theoretical_loss": 3.424692844479953, "tokens_seen": 2258370560 }, { "epoch": 0.37, "learning_rate": 0.000318783599454385, "loss": 0.0709, "theoretical_loss": 3.4246768002577355, "tokens_seen": 2258501632 }, { "epoch": 0.37, "learning_rate": 0.0003187434807028805, "loss": 0.0685, "theoretical_loss": 3.424660757227315, "tokens_seen": 2258632704 }, { "epoch": 0.37, "learning_rate": 0.0003187033619513761, "loss": 0.0668, "theoretical_loss": 3.4246447153885327, "tokens_seen": 2258763776 }, { "epoch": 0.37, "learning_rate": 0.0003186632431998716, "loss": 0.0679, "theoretical_loss": 3.4246286747412316, "tokens_seen": 2258894848 }, { "epoch": 0.37, "learning_rate": 0.00031862312444836717, "loss": 0.0683, "theoretical_loss": 3.4246126352852535, "tokens_seen": 2259025920 }, { "epoch": 0.37, "learning_rate": 0.00031858300569686274, "loss": 0.0661, "theoretical_loss": 3.4245965970204413, "tokens_seen": 2259156992 }, { "epoch": 0.37, "learning_rate": 0.00031854288694535825, "loss": 0.0644, "theoretical_loss": 3.4245805599466372, "tokens_seen": 2259288064 }, { "epoch": 0.37, "objective/train/advantage_avg": -0.00012807243911083788, "objective/train/docs_used": 822867, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3509057760238647, "objective/train/original_loss": 1.3509056568145752, "objective/train/theoretical_loss": 3.4245645240636833, "objective/train/tokens_used": 629943776, "objective/train/value_avg": -0.013336181640625, "objective/train/value_loss": 0.00022282774443738163, "objective/train/value_max": -6.711483001708984e-05, "objective/train/value_min": -0.40869140625, "objective/train/value_reward_corr": 0.9352347748664648, "objective/train/value_std": 0.0382080078125, "objective/train/weight_avg": 0.9999783039093018, "objective/train/weighted_lm_loss": 1.3483792543411255, "objective/train/weights_max": 1.5048472881317139, "objective/train/weights_min": 0.372374951839447, "theoretical_loss": 3.4245645240636833, "tokens_seen": 2259419136 }, { "epoch": 0.37, "learning_rate": 0.0003185027681938538, "loss": 0.0645, "theoretical_loss": 3.4245645240636833, "tokens_seen": 2259419136 }, { "epoch": 0.37, "learning_rate": 0.0003184626494423494, "loss": 0.0673, "theoretical_loss": 3.424548489371423, "tokens_seen": 2259550208 }, { "epoch": 0.37, "learning_rate": 0.0003184225306908449, "loss": 0.0683, "theoretical_loss": 3.4245324558696986, "tokens_seen": 2259681280 }, { "epoch": 0.37, "learning_rate": 0.00031838241193934047, "loss": 0.0666, "theoretical_loss": 3.424516423558352, "tokens_seen": 2259812352 }, { "epoch": 0.37, "learning_rate": 0.000318342293187836, "loss": 0.0691, "theoretical_loss": 3.4245003924372264, "tokens_seen": 2259943424 }, { "epoch": 0.37, "learning_rate": 0.00031830217443633155, "loss": 0.0708, "theoretical_loss": 3.4244843625061643, "tokens_seen": 2260074496 }, { "epoch": 0.37, "learning_rate": 0.00031826205568482707, "loss": 0.0646, "theoretical_loss": 3.424468333765008, "tokens_seen": 2260205568 }, { "epoch": 0.37, "learning_rate": 0.00031822193693332263, "loss": 0.0636, "theoretical_loss": 3.4244523062136003, "tokens_seen": 2260336640 }, { "epoch": 0.37, "learning_rate": 0.0003181818181818182, "loss": 0.0706, "theoretical_loss": 3.4244362798517844, "tokens_seen": 2260467712 }, { "epoch": 0.37, "learning_rate": 0.0003181416994303137, "loss": 0.0695, "theoretical_loss": 3.4244202546794025, "tokens_seen": 2260598784 }, { "epoch": 0.37, "learning_rate": 0.0003181015806788093, "loss": 0.0654, "theoretical_loss": 3.4244042306962976, "tokens_seen": 2260729856 }, { "epoch": 0.37, "learning_rate": 0.00031806146192730485, "loss": 0.0653, "theoretical_loss": 3.4243882079023127, "tokens_seen": 2260860928 }, { "epoch": 0.37, "learning_rate": 0.00031802134317580037, "loss": 0.066, "theoretical_loss": 3.42437218629729, "tokens_seen": 2260992000 }, { "epoch": 0.37, "learning_rate": 0.00031798122442429594, "loss": 0.064, "theoretical_loss": 3.424356165881073, "tokens_seen": 2261123072 }, { "epoch": 0.37, "learning_rate": 0.00031794110567279145, "loss": 0.0693, "theoretical_loss": 3.4243401466535044, "tokens_seen": 2261254144 }, { "epoch": 0.37, "learning_rate": 0.000317900986921287, "loss": 0.0694, "theoretical_loss": 3.4243241286144266, "tokens_seen": 2261385216 }, { "epoch": 0.37, "learning_rate": 0.00031786086816978253, "loss": 0.0709, "theoretical_loss": 3.424308111763683, "tokens_seen": 2261516288 }, { "epoch": 0.37, "learning_rate": 0.0003178207494182781, "loss": 0.0638, "theoretical_loss": 3.4242920961011167, "tokens_seen": 2261647360 }, { "epoch": 0.37, "learning_rate": 0.00031778063066677367, "loss": 0.067, "theoretical_loss": 3.4242760816265707, "tokens_seen": 2261778432 }, { "epoch": 0.37, "learning_rate": 0.0003177405119152692, "loss": 0.0651, "theoretical_loss": 3.424260068339888, "tokens_seen": 2261909504 }, { "epoch": 0.37, "learning_rate": 0.00031770039316376475, "loss": 0.0663, "theoretical_loss": 3.4242440562409113, "tokens_seen": 2262040576 }, { "epoch": 0.37, "learning_rate": 0.0003176602744122603, "loss": 0.0704, "theoretical_loss": 3.424228045329484, "tokens_seen": 2262171648 }, { "epoch": 0.37, "learning_rate": 0.00031762015566075584, "loss": 0.0702, "theoretical_loss": 3.424212035605449, "tokens_seen": 2262302720 }, { "epoch": 0.37, "learning_rate": 0.0003175800369092514, "loss": 0.0687, "theoretical_loss": 3.42419602706865, "tokens_seen": 2262433792 }, { "epoch": 0.37, "learning_rate": 0.0003175399181577469, "loss": 0.0688, "theoretical_loss": 3.4241800197189294, "tokens_seen": 2262564864 }, { "epoch": 0.37, "objective/train/advantage_avg": 0.0006335466750897467, "objective/train/docs_used": 824193, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.1558908224105835, "objective/train/original_loss": 1.155890941619873, "objective/train/theoretical_loss": 3.4241640135561306, "objective/train/tokens_used": 633220576, "objective/train/value_avg": -0.0081329345703125, "objective/train/value_loss": 0.00021993201517034322, "objective/train/value_max": -4.398822784423828e-05, "objective/train/value_min": -0.658203125, "objective/train/value_reward_corr": 0.7136359001776789, "objective/train/value_std": 0.01558685302734375, "objective/train/weight_avg": 1.0007343292236328, "objective/train/weighted_lm_loss": 1.1559464931488037, "objective/train/weights_max": 1.2020654678344727, "objective/train/weights_min": 0.3711100220680237, "theoretical_loss": 3.4241640135561306, "tokens_seen": 2262695936 }, { "epoch": 0.37, "learning_rate": 0.0003174997994062425, "loss": 0.0634, "theoretical_loss": 3.4241640135561306, "tokens_seen": 2262695936 }, { "epoch": 0.37, "learning_rate": 0.000317459680654738, "loss": 0.0657, "theoretical_loss": 3.4241480085800977, "tokens_seen": 2262827008 }, { "epoch": 0.37, "learning_rate": 0.00031741956190323357, "loss": 0.067, "theoretical_loss": 3.424132004790673, "tokens_seen": 2262958080 }, { "epoch": 0.37, "learning_rate": 0.00031737944315172914, "loss": 0.0656, "theoretical_loss": 3.4241160021877004, "tokens_seen": 2263089152 }, { "epoch": 0.37, "learning_rate": 0.00031733932440022465, "loss": 0.0689, "theoretical_loss": 3.424100000771023, "tokens_seen": 2263220224 }, { "epoch": 0.37, "learning_rate": 0.0003172992056487202, "loss": 0.0683, "theoretical_loss": 3.424084000540484, "tokens_seen": 2263351296 }, { "epoch": 0.37, "learning_rate": 0.0003172590868972158, "loss": 0.069, "theoretical_loss": 3.424068001495927, "tokens_seen": 2263482368 }, { "epoch": 0.37, "learning_rate": 0.0003172189681457113, "loss": 0.0667, "theoretical_loss": 3.424052003637195, "tokens_seen": 2263613440 }, { "epoch": 0.37, "learning_rate": 0.00031717884939420687, "loss": 0.068, "theoretical_loss": 3.4240360069641325, "tokens_seen": 2263744512 }, { "epoch": 0.37, "learning_rate": 0.00031713873064270244, "loss": 0.068, "theoretical_loss": 3.4240200114765815, "tokens_seen": 2263875584 }, { "epoch": 0.37, "learning_rate": 0.00031709861189119795, "loss": 0.0686, "theoretical_loss": 3.424004017174387, "tokens_seen": 2264006656 }, { "epoch": 0.37, "learning_rate": 0.00031705849313969347, "loss": 0.0655, "theoretical_loss": 3.423988024057391, "tokens_seen": 2264137728 }, { "epoch": 0.37, "learning_rate": 0.00031701837438818904, "loss": 0.0672, "theoretical_loss": 3.4239720321254388, "tokens_seen": 2264268800 }, { "epoch": 0.37, "learning_rate": 0.0003169782556366846, "loss": 0.0676, "theoretical_loss": 3.423956041378373, "tokens_seen": 2264399872 }, { "epoch": 0.37, "learning_rate": 0.0003169381368851801, "loss": 0.0687, "theoretical_loss": 3.423940051816037, "tokens_seen": 2264530944 }, { "epoch": 0.37, "learning_rate": 0.0003168980181336757, "loss": 0.0662, "theoretical_loss": 3.423924063438275, "tokens_seen": 2264662016 }, { "epoch": 0.37, "learning_rate": 0.00031685789938217126, "loss": 0.0698, "theoretical_loss": 3.4239080762449303, "tokens_seen": 2264793088 }, { "epoch": 0.37, "learning_rate": 0.00031681778063066677, "loss": 0.0704, "theoretical_loss": 3.4238920902358467, "tokens_seen": 2264924160 }, { "epoch": 0.37, "learning_rate": 0.00031677766187916234, "loss": 0.0671, "theoretical_loss": 3.4238761054108684, "tokens_seen": 2265055232 }, { "epoch": 0.37, "learning_rate": 0.0003167375431276579, "loss": 0.0684, "theoretical_loss": 3.423860121769839, "tokens_seen": 2265186304 }, { "epoch": 0.37, "learning_rate": 0.0003166974243761534, "loss": 0.0704, "theoretical_loss": 3.423844139312602, "tokens_seen": 2265317376 }, { "epoch": 0.37, "learning_rate": 0.00031665730562464893, "loss": 0.0681, "theoretical_loss": 3.423828158039001, "tokens_seen": 2265448448 }, { "epoch": 0.37, "learning_rate": 0.0003166171868731445, "loss": 0.07, "theoretical_loss": 3.423812177948881, "tokens_seen": 2265579520 }, { "epoch": 0.37, "learning_rate": 0.00031657706812164007, "loss": 0.0699, "theoretical_loss": 3.4237961990420844, "tokens_seen": 2265710592 }, { "epoch": 0.37, "learning_rate": 0.0003165369493701356, "loss": 0.0637, "theoretical_loss": 3.4237802213184563, "tokens_seen": 2265841664 }, { "epoch": 0.37, "objective/train/advantage_avg": 0.00012568081729114056, "objective/train/docs_used": 825438, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2610448598861694, "objective/train/original_loss": 1.2610448598861694, "objective/train/theoretical_loss": 3.42376424477784, "objective/train/tokens_used": 636497376, "objective/train/value_avg": -0.006603240966796875, "objective/train/value_loss": 0.0001940292859217152, "objective/train/value_max": -4.1961669921875e-05, "objective/train/value_min": -0.431884765625, "objective/train/value_reward_corr": 0.705414982159267, "objective/train/value_std": 0.01117706298828125, "objective/train/weight_avg": 1.0002115964889526, "objective/train/weighted_lm_loss": 1.2611907720565796, "objective/train/weights_max": 1.2771201133728027, "objective/train/weights_min": 0.3781466484069824, "theoretical_loss": 3.42376424477784, "tokens_seen": 2265972736 }, { "epoch": 0.37, "learning_rate": 0.00031649683061863115, "loss": 0.066, "theoretical_loss": 3.42376424477784, "tokens_seen": 2265972736 }, { "epoch": 0.37, "learning_rate": 0.0003164567118671267, "loss": 0.0688, "theoretical_loss": 3.42374826942008, "tokens_seen": 2266103808 }, { "epoch": 0.37, "learning_rate": 0.0003164165931156223, "loss": 0.0668, "theoretical_loss": 3.4237322952450198, "tokens_seen": 2266234880 }, { "epoch": 0.37, "learning_rate": 0.0003163764743641178, "loss": 0.0689, "theoretical_loss": 3.423716322252504, "tokens_seen": 2266365952 }, { "epoch": 0.37, "learning_rate": 0.0003163363556126134, "loss": 0.0677, "theoretical_loss": 3.423700350442376, "tokens_seen": 2266497024 }, { "epoch": 0.37, "learning_rate": 0.0003162962368611089, "loss": 0.0681, "theoretical_loss": 3.4236843798144805, "tokens_seen": 2266628096 }, { "epoch": 0.37, "learning_rate": 0.0003162561181096044, "loss": 0.0727, "theoretical_loss": 3.4236684103686614, "tokens_seen": 2266759168 }, { "epoch": 0.37, "learning_rate": 0.00031621599935809997, "loss": 0.0681, "theoretical_loss": 3.4236524421047627, "tokens_seen": 2266890240 }, { "epoch": 0.37, "learning_rate": 0.00031617588060659554, "loss": 0.0698, "theoretical_loss": 3.423636475022629, "tokens_seen": 2267021312 }, { "epoch": 0.37, "learning_rate": 0.00031613576185509105, "loss": 0.0669, "theoretical_loss": 3.423620509122104, "tokens_seen": 2267152384 }, { "epoch": 0.37, "learning_rate": 0.0003160956431035866, "loss": 0.0699, "theoretical_loss": 3.423604544403032, "tokens_seen": 2267283456 }, { "epoch": 0.37, "learning_rate": 0.0003160555243520822, "loss": 0.0628, "theoretical_loss": 3.423588580865258, "tokens_seen": 2267414528 }, { "epoch": 0.37, "learning_rate": 0.00031601540560057776, "loss": 0.0689, "theoretical_loss": 3.4235726185086257, "tokens_seen": 2267545600 }, { "epoch": 0.37, "learning_rate": 0.00031597528684907327, "loss": 0.0643, "theoretical_loss": 3.4235566573329796, "tokens_seen": 2267676672 }, { "epoch": 0.37, "learning_rate": 0.00031593516809756884, "loss": 0.0728, "theoretical_loss": 3.423540697338164, "tokens_seen": 2267807744 }, { "epoch": 0.37, "learning_rate": 0.00031589504934606435, "loss": 0.0656, "theoretical_loss": 3.4235247385240233, "tokens_seen": 2267938816 }, { "epoch": 0.37, "learning_rate": 0.00031585493059455987, "loss": 0.0655, "theoretical_loss": 3.423508780890402, "tokens_seen": 2268069888 }, { "epoch": 0.37, "learning_rate": 0.00031581481184305544, "loss": 0.0718, "theoretical_loss": 3.4234928244371448, "tokens_seen": 2268200960 }, { "epoch": 0.37, "learning_rate": 0.000315774693091551, "loss": 0.0685, "theoretical_loss": 3.4234768691640953, "tokens_seen": 2268332032 }, { "epoch": 0.37, "learning_rate": 0.0003157345743400465, "loss": 0.0661, "theoretical_loss": 3.4234609150710993, "tokens_seen": 2268463104 }, { "epoch": 0.37, "learning_rate": 0.0003156944555885421, "loss": 0.0677, "theoretical_loss": 3.4234449621580003, "tokens_seen": 2268594176 }, { "epoch": 0.38, "learning_rate": 0.00031565433683703766, "loss": 0.069, "theoretical_loss": 3.423429010424643, "tokens_seen": 2268725248 }, { "epoch": 0.38, "learning_rate": 0.0003156142180855332, "loss": 0.069, "theoretical_loss": 3.4234130598708727, "tokens_seen": 2268856320 }, { "epoch": 0.38, "learning_rate": 0.00031557409933402874, "loss": 0.0653, "theoretical_loss": 3.4233971104965333, "tokens_seen": 2268987392 }, { "epoch": 0.38, "learning_rate": 0.0003155339805825243, "loss": 0.0646, "theoretical_loss": 3.42338116230147, "tokens_seen": 2269118464 }, { "epoch": 0.38, "objective/train/advantage_avg": 0.0005067825550213456, "objective/train/docs_used": 826474, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.409843921661377, "objective/train/original_loss": 1.409843921661377, "objective/train/theoretical_loss": 3.423365215285527, "objective/train/tokens_used": 639774176, "objective/train/value_avg": -0.006450653076171875, "objective/train/value_loss": 0.00024130572273861617, "objective/train/value_max": -4.869699478149414e-05, "objective/train/value_min": -0.254150390625, "objective/train/value_reward_corr": 0.4955745942176903, "objective/train/value_std": 0.0110626220703125, "objective/train/weight_avg": 1.0006154775619507, "objective/train/weighted_lm_loss": 1.4108905792236328, "objective/train/weights_max": 1.2532600164413452, "objective/train/weights_min": 0.3750951290130615, "theoretical_loss": 3.423365215285527, "tokens_seen": 2269249536 }, { "epoch": 0.38, "learning_rate": 0.0003154938618310198, "loss": 0.0676, "theoretical_loss": 3.423365215285527, "tokens_seen": 2269249536 }, { "epoch": 0.38, "learning_rate": 0.00031545374307951534, "loss": 0.0643, "theoretical_loss": 3.4233492694485497, "tokens_seen": 2269380608 }, { "epoch": 0.38, "learning_rate": 0.0003154136243280109, "loss": 0.0701, "theoretical_loss": 3.423333324790382, "tokens_seen": 2269511680 }, { "epoch": 0.38, "learning_rate": 0.00031537350557650647, "loss": 0.0684, "theoretical_loss": 3.4233173813108695, "tokens_seen": 2269642752 }, { "epoch": 0.38, "learning_rate": 0.000315333386825002, "loss": 0.068, "theoretical_loss": 3.4233014390098564, "tokens_seen": 2269773824 }, { "epoch": 0.38, "learning_rate": 0.00031529326807349756, "loss": 0.0613, "theoretical_loss": 3.423285497887188, "tokens_seen": 2269904896 }, { "epoch": 0.38, "learning_rate": 0.0003152531493219931, "loss": 0.0677, "theoretical_loss": 3.4232695579427084, "tokens_seen": 2270035968 }, { "epoch": 0.38, "learning_rate": 0.0003152130305704887, "loss": 0.0653, "theoretical_loss": 3.4232536191762635, "tokens_seen": 2270167040 }, { "epoch": 0.38, "learning_rate": 0.0003151729118189842, "loss": 0.0641, "theoretical_loss": 3.4232376815876977, "tokens_seen": 2270298112 }, { "epoch": 0.38, "learning_rate": 0.0003151327930674798, "loss": 0.0686, "theoretical_loss": 3.423221745176856, "tokens_seen": 2270429184 }, { "epoch": 0.38, "learning_rate": 0.0003150926743159753, "loss": 0.0695, "theoretical_loss": 3.423205809943583, "tokens_seen": 2270560256 }, { "epoch": 0.38, "learning_rate": 0.0003150525555644708, "loss": 0.0657, "theoretical_loss": 3.423189875887725, "tokens_seen": 2270691328 }, { "epoch": 0.38, "learning_rate": 0.00031501243681296637, "loss": 0.0643, "theoretical_loss": 3.4231739430091253, "tokens_seen": 2270822400 }, { "epoch": 0.38, "learning_rate": 0.00031497231806146194, "loss": 0.0675, "theoretical_loss": 3.4231580113076303, "tokens_seen": 2270953472 }, { "epoch": 0.38, "learning_rate": 0.00031493219930995745, "loss": 0.0637, "theoretical_loss": 3.4231420807830846, "tokens_seen": 2271084544 }, { "epoch": 0.38, "learning_rate": 0.000314892080558453, "loss": 0.0683, "theoretical_loss": 3.4231261514353335, "tokens_seen": 2271215616 }, { "epoch": 0.38, "learning_rate": 0.0003148519618069486, "loss": 0.0656, "theoretical_loss": 3.4231102232642217, "tokens_seen": 2271346688 }, { "epoch": 0.38, "learning_rate": 0.00031481184305544416, "loss": 0.0687, "theoretical_loss": 3.4230942962695954, "tokens_seen": 2271477760 }, { "epoch": 0.38, "learning_rate": 0.0003147717243039397, "loss": 0.0645, "theoretical_loss": 3.4230783704512984, "tokens_seen": 2271608832 }, { "epoch": 0.38, "learning_rate": 0.00031473160555243524, "loss": 0.065, "theoretical_loss": 3.4230624458091774, "tokens_seen": 2271739904 }, { "epoch": 0.38, "learning_rate": 0.00031469148680093076, "loss": 0.0648, "theoretical_loss": 3.4230465223430766, "tokens_seen": 2271870976 }, { "epoch": 0.38, "learning_rate": 0.00031465136804942627, "loss": 0.0673, "theoretical_loss": 3.423030600052842, "tokens_seen": 2272002048 }, { "epoch": 0.38, "learning_rate": 0.00031461124929792184, "loss": 0.0661, "theoretical_loss": 3.4230146789383187, "tokens_seen": 2272133120 }, { "epoch": 0.38, "learning_rate": 0.0003145711305464174, "loss": 0.0669, "theoretical_loss": 3.422998758999352, "tokens_seen": 2272264192 }, { "epoch": 0.38, "learning_rate": 0.0003145310117949129, "loss": 0.0665, "theoretical_loss": 3.422982840235787, "tokens_seen": 2272395264 }, { "epoch": 0.38, "objective/train/advantage_avg": 7.407853263430297e-05, "objective/train/docs_used": 827716, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3317867517471313, "objective/train/original_loss": 1.3317866325378418, "objective/train/theoretical_loss": 3.4229669226474697, "objective/train/tokens_used": 643050976, "objective/train/value_avg": -0.00754547119140625, "objective/train/value_loss": 0.0003144394140690565, "objective/train/value_max": -3.790855407714844e-05, "objective/train/value_min": -0.91357421875, "objective/train/value_reward_corr": 0.6635963751861984, "objective/train/value_std": 0.014129638671875, "objective/train/weight_avg": 1.0002105236053467, "objective/train/weighted_lm_loss": 1.3324928283691406, "objective/train/weights_max": 1.1615424156188965, "objective/train/weights_min": 0.3840312957763672, "theoretical_loss": 3.4229669226474697, "tokens_seen": 2272526336 }, { "epoch": 0.38, "learning_rate": 0.0003144908930434085, "loss": 0.0669, "theoretical_loss": 3.4229669226474697, "tokens_seen": 2272526336 }, { "epoch": 0.38, "learning_rate": 0.00031445077429190406, "loss": 0.063, "theoretical_loss": 3.4229510062342454, "tokens_seen": 2272657408 }, { "epoch": 0.38, "learning_rate": 0.0003144106555403996, "loss": 0.0691, "theoretical_loss": 3.422935090995959, "tokens_seen": 2272788480 }, { "epoch": 0.38, "learning_rate": 0.00031437053678889514, "loss": 0.0651, "theoretical_loss": 3.4229191769324574, "tokens_seen": 2272919552 }, { "epoch": 0.38, "learning_rate": 0.0003143304180373907, "loss": 0.0693, "theoretical_loss": 3.4229032640435846, "tokens_seen": 2273050624 }, { "epoch": 0.38, "learning_rate": 0.0003142902992858862, "loss": 0.0661, "theoretical_loss": 3.422887352329187, "tokens_seen": 2273181696 }, { "epoch": 0.38, "learning_rate": 0.00031425018053438174, "loss": 0.0678, "theoretical_loss": 3.42287144178911, "tokens_seen": 2273312768 }, { "epoch": 0.38, "learning_rate": 0.0003142100617828773, "loss": 0.0663, "theoretical_loss": 3.4228555324231995, "tokens_seen": 2273443840 }, { "epoch": 0.38, "learning_rate": 0.0003141699430313729, "loss": 0.0694, "theoretical_loss": 3.4228396242313006, "tokens_seen": 2273574912 }, { "epoch": 0.38, "learning_rate": 0.0003141298242798684, "loss": 0.0676, "theoretical_loss": 3.4228237172132596, "tokens_seen": 2273705984 }, { "epoch": 0.38, "learning_rate": 0.00031408970552836396, "loss": 0.0695, "theoretical_loss": 3.422807811368922, "tokens_seen": 2273837056 }, { "epoch": 0.38, "learning_rate": 0.0003140495867768595, "loss": 0.0683, "theoretical_loss": 3.422791906698133, "tokens_seen": 2273968128 }, { "epoch": 0.38, "learning_rate": 0.0003140094680253551, "loss": 0.0679, "theoretical_loss": 3.422776003200739, "tokens_seen": 2274099200 }, { "epoch": 0.38, "learning_rate": 0.0003139693492738506, "loss": 0.0726, "theoretical_loss": 3.422760100876586, "tokens_seen": 2274230272 }, { "epoch": 0.38, "learning_rate": 0.0003139292305223462, "loss": 0.0674, "theoretical_loss": 3.4227441997255195, "tokens_seen": 2274361344 }, { "epoch": 0.38, "learning_rate": 0.0003138891117708417, "loss": 0.0647, "theoretical_loss": 3.4227282997473853, "tokens_seen": 2274492416 }, { "epoch": 0.38, "learning_rate": 0.0003138489930193372, "loss": 0.0655, "theoretical_loss": 3.4227124009420296, "tokens_seen": 2274623488 }, { "epoch": 0.38, "learning_rate": 0.00031380887426783277, "loss": 0.0682, "theoretical_loss": 3.4226965033092975, "tokens_seen": 2274754560 }, { "epoch": 0.38, "learning_rate": 0.00031376875551632834, "loss": 0.0668, "theoretical_loss": 3.422680606849036, "tokens_seen": 2274885632 }, { "epoch": 0.38, "learning_rate": 0.0003137286367648239, "loss": 0.0694, "theoretical_loss": 3.4226647115610906, "tokens_seen": 2275016704 }, { "epoch": 0.38, "learning_rate": 0.0003136885180133194, "loss": 0.0685, "theoretical_loss": 3.4226488174453076, "tokens_seen": 2275147776 }, { "epoch": 0.38, "learning_rate": 0.000313648399261815, "loss": 0.0691, "theoretical_loss": 3.4226329245015323, "tokens_seen": 2275278848 }, { "epoch": 0.38, "learning_rate": 0.00031360828051031056, "loss": 0.0677, "theoretical_loss": 3.4226170327296113, "tokens_seen": 2275409920 }, { "epoch": 0.38, "learning_rate": 0.0003135681617588061, "loss": 0.067, "theoretical_loss": 3.422601142129391, "tokens_seen": 2275540992 }, { "epoch": 0.38, "learning_rate": 0.00031352804300730164, "loss": 0.066, "theoretical_loss": 3.422585252700717, "tokens_seen": 2275672064 }, { "epoch": 0.38, "objective/train/advantage_avg": 0.0009672704618424177, "objective/train/docs_used": 828910, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.316298484802246, "objective/train/original_loss": 1.316298484802246, "objective/train/theoretical_loss": 3.4225693644434356, "objective/train/tokens_used": 646327776, "objective/train/value_avg": -0.0091094970703125, "objective/train/value_loss": 0.0004967366112396121, "objective/train/value_max": -6.252527236938477e-05, "objective/train/value_min": -0.480712890625, "objective/train/value_reward_corr": 0.6546090059568215, "objective/train/value_std": 0.017364501953125, "objective/train/weight_avg": 1.0011730194091797, "objective/train/weighted_lm_loss": 1.3172649145126343, "objective/train/weights_max": 1.2429978847503662, "objective/train/weights_min": 0.38608771562576294, "theoretical_loss": 3.4225693644434356, "tokens_seen": 2275803136 }, { "epoch": 0.38, "learning_rate": 0.00031348792425579716, "loss": 0.067, "theoretical_loss": 3.4225693644434356, "tokens_seen": 2275803136 }, { "epoch": 0.38, "learning_rate": 0.00031344780550429267, "loss": 0.0667, "theoretical_loss": 3.422553477357393, "tokens_seen": 2275934208 }, { "epoch": 0.38, "learning_rate": 0.00031340768675278824, "loss": 0.0686, "theoretical_loss": 3.4225375914424356, "tokens_seen": 2276065280 }, { "epoch": 0.38, "learning_rate": 0.0003133675680012838, "loss": 0.0706, "theoretical_loss": 3.4225217066984093, "tokens_seen": 2276196352 }, { "epoch": 0.38, "learning_rate": 0.0003133274492497794, "loss": 0.0682, "theoretical_loss": 3.4225058231251606, "tokens_seen": 2276327424 }, { "epoch": 0.38, "learning_rate": 0.0003132873304982749, "loss": 0.0676, "theoretical_loss": 3.422489940722536, "tokens_seen": 2276458496 }, { "epoch": 0.38, "learning_rate": 0.00031324721174677046, "loss": 0.0723, "theoretical_loss": 3.422474059490381, "tokens_seen": 2276589568 }, { "epoch": 0.38, "learning_rate": 0.000313207092995266, "loss": 0.0674, "theoretical_loss": 3.422458179428543, "tokens_seen": 2276720640 }, { "epoch": 0.38, "learning_rate": 0.00031316697424376154, "loss": 0.0687, "theoretical_loss": 3.422442300536868, "tokens_seen": 2276851712 }, { "epoch": 0.38, "learning_rate": 0.0003131268554922571, "loss": 0.066, "theoretical_loss": 3.422426422815202, "tokens_seen": 2276982784 }, { "epoch": 0.38, "learning_rate": 0.0003130867367407526, "loss": 0.0648, "theoretical_loss": 3.422410546263392, "tokens_seen": 2277113856 }, { "epoch": 0.38, "learning_rate": 0.00031304661798924814, "loss": 0.069, "theoretical_loss": 3.4223946708812845, "tokens_seen": 2277244928 }, { "epoch": 0.38, "learning_rate": 0.0003130064992377437, "loss": 0.0679, "theoretical_loss": 3.422378796668726, "tokens_seen": 2277376000 }, { "epoch": 0.38, "learning_rate": 0.0003129663804862393, "loss": 0.0693, "theoretical_loss": 3.422362923625562, "tokens_seen": 2277507072 }, { "epoch": 0.38, "learning_rate": 0.00031292626173473484, "loss": 0.0632, "theoretical_loss": 3.4223470517516406, "tokens_seen": 2277638144 }, { "epoch": 0.38, "learning_rate": 0.00031288614298323036, "loss": 0.07, "theoretical_loss": 3.422331181046807, "tokens_seen": 2277769216 }, { "epoch": 0.38, "learning_rate": 0.0003128460242317259, "loss": 0.0697, "theoretical_loss": 3.422315311510909, "tokens_seen": 2277900288 }, { "epoch": 0.38, "learning_rate": 0.0003128059054802215, "loss": 0.0688, "theoretical_loss": 3.4222994431437925, "tokens_seen": 2278031360 }, { "epoch": 0.38, "learning_rate": 0.000312765786728717, "loss": 0.0662, "theoretical_loss": 3.4222835759453045, "tokens_seen": 2278162432 }, { "epoch": 0.38, "learning_rate": 0.0003127256679772126, "loss": 0.0691, "theoretical_loss": 3.4222677099152916, "tokens_seen": 2278293504 }, { "epoch": 0.38, "learning_rate": 0.0003126855492257081, "loss": 0.0677, "theoretical_loss": 3.422251845053601, "tokens_seen": 2278424576 }, { "epoch": 0.38, "learning_rate": 0.0003126454304742036, "loss": 0.0669, "theoretical_loss": 3.4222359813600782, "tokens_seen": 2278555648 }, { "epoch": 0.38, "learning_rate": 0.0003126053117226992, "loss": 0.0657, "theoretical_loss": 3.422220118834572, "tokens_seen": 2278686720 }, { "epoch": 0.38, "learning_rate": 0.00031256519297119474, "loss": 0.0633, "theoretical_loss": 3.422204257476927, "tokens_seen": 2278817792 }, { "epoch": 0.38, "learning_rate": 0.0003125250742196903, "loss": 0.0662, "theoretical_loss": 3.422188397286991, "tokens_seen": 2278948864 }, { "epoch": 0.38, "objective/train/advantage_avg": 0.0006294561317190528, "objective/train/docs_used": 830070, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4641525745391846, "objective/train/original_loss": 1.4641525745391846, "objective/train/theoretical_loss": 3.4221725382646118, "objective/train/tokens_used": 649604576, "objective/train/value_avg": -0.006549835205078125, "objective/train/value_loss": 0.0004050426068715751, "objective/train/value_max": -3.707408905029297e-05, "objective/train/value_min": -0.58837890625, "objective/train/value_reward_corr": 0.5452644983594741, "objective/train/value_std": 0.01335906982421875, "objective/train/weight_avg": 1.000802755355835, "objective/train/weighted_lm_loss": 1.4649083614349365, "objective/train/weights_max": 1.615370750427246, "objective/train/weights_min": 0.36827927827835083, "theoretical_loss": 3.4221725382646118, "tokens_seen": 2279079936 }, { "epoch": 0.38, "learning_rate": 0.0003124849554681858, "loss": 0.0673, "theoretical_loss": 3.4221725382646118, "tokens_seen": 2279079936 }, { "epoch": 0.38, "learning_rate": 0.0003124448367166814, "loss": 0.0699, "theoretical_loss": 3.4221566804096355, "tokens_seen": 2279211008 }, { "epoch": 0.38, "learning_rate": 0.00031240471796517696, "loss": 0.0664, "theoretical_loss": 3.422140823721908, "tokens_seen": 2279342080 }, { "epoch": 0.38, "learning_rate": 0.0003123645992136725, "loss": 0.0683, "theoretical_loss": 3.4221249682012784, "tokens_seen": 2279473152 }, { "epoch": 0.38, "learning_rate": 0.00031232448046216804, "loss": 0.0675, "theoretical_loss": 3.422109113847592, "tokens_seen": 2279604224 }, { "epoch": 0.38, "learning_rate": 0.00031228436171066356, "loss": 0.0647, "theoretical_loss": 3.422093260660697, "tokens_seen": 2279735296 }, { "epoch": 0.38, "learning_rate": 0.00031224424295915907, "loss": 0.0689, "theoretical_loss": 3.422077408640439, "tokens_seen": 2279866368 }, { "epoch": 0.38, "learning_rate": 0.00031220412420765464, "loss": 0.0687, "theoretical_loss": 3.4220615577866664, "tokens_seen": 2279997440 }, { "epoch": 0.38, "learning_rate": 0.0003121640054561502, "loss": 0.0684, "theoretical_loss": 3.422045708099226, "tokens_seen": 2280128512 }, { "epoch": 0.38, "learning_rate": 0.0003121238867046458, "loss": 0.0645, "theoretical_loss": 3.4220298595779646, "tokens_seen": 2280259584 }, { "epoch": 0.38, "learning_rate": 0.0003120837679531413, "loss": 0.065, "theoretical_loss": 3.42201401222273, "tokens_seen": 2280390656 }, { "epoch": 0.38, "learning_rate": 0.00031204364920163686, "loss": 0.0655, "theoretical_loss": 3.4219981660333687, "tokens_seen": 2280521728 }, { "epoch": 0.38, "learning_rate": 0.00031200353045013243, "loss": 0.0686, "theoretical_loss": 3.421982321009728, "tokens_seen": 2280652800 }, { "epoch": 0.38, "learning_rate": 0.00031196341169862794, "loss": 0.0666, "theoretical_loss": 3.4219664771516554, "tokens_seen": 2280783872 }, { "epoch": 0.38, "learning_rate": 0.0003119232929471235, "loss": 0.0638, "theoretical_loss": 3.4219506344589985, "tokens_seen": 2280914944 }, { "epoch": 0.38, "learning_rate": 0.000311883174195619, "loss": 0.0687, "theoretical_loss": 3.4219347929316037, "tokens_seen": 2281046016 }, { "epoch": 0.38, "learning_rate": 0.00031184305544411454, "loss": 0.0701, "theoretical_loss": 3.421918952569319, "tokens_seen": 2281177088 }, { "epoch": 0.38, "learning_rate": 0.0003118029366926101, "loss": 0.0697, "theoretical_loss": 3.421903113371992, "tokens_seen": 2281308160 }, { "epoch": 0.38, "learning_rate": 0.0003117628179411057, "loss": 0.0705, "theoretical_loss": 3.4218872753394702, "tokens_seen": 2281439232 }, { "epoch": 0.38, "learning_rate": 0.00031172269918960124, "loss": 0.0666, "theoretical_loss": 3.4218714384716, "tokens_seen": 2281570304 }, { "epoch": 0.38, "learning_rate": 0.00031168258043809676, "loss": 0.0701, "theoretical_loss": 3.4218556027682294, "tokens_seen": 2281701376 }, { "epoch": 0.38, "learning_rate": 0.0003116424616865923, "loss": 0.069, "theoretical_loss": 3.4218397682292063, "tokens_seen": 2281832448 }, { "epoch": 0.38, "learning_rate": 0.0003116023429350879, "loss": 0.0646, "theoretical_loss": 3.4218239348543777, "tokens_seen": 2281963520 }, { "epoch": 0.38, "learning_rate": 0.0003115622241835834, "loss": 0.0654, "theoretical_loss": 3.421808102643591, "tokens_seen": 2282094592 }, { "epoch": 0.38, "learning_rate": 0.000311522105432079, "loss": 0.0653, "theoretical_loss": 3.4217922715966944, "tokens_seen": 2282225664 }, { "epoch": 0.38, "objective/train/advantage_avg": 0.0006315877544693649, "objective/train/docs_used": 831223, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.5169564485549927, "objective/train/original_loss": 1.5169565677642822, "objective/train/theoretical_loss": 3.4217764417135355, "objective/train/tokens_used": 652881376, "objective/train/value_avg": -0.005435943603515625, "objective/train/value_loss": 0.000124571452033706, "objective/train/value_max": -5.066394805908203e-05, "objective/train/value_min": -0.380859375, "objective/train/value_reward_corr": 0.6908825815664393, "objective/train/value_std": 0.01141357421875, "objective/train/weight_avg": 1.0006896257400513, "objective/train/weighted_lm_loss": 1.517517328262329, "objective/train/weights_max": 1.280738115310669, "objective/train/weights_min": 0.37116101384162903, "theoretical_loss": 3.4217764417135355, "tokens_seen": 2282356736 }, { "epoch": 0.38, "learning_rate": 0.0003114819866805745, "loss": 0.0674, "theoretical_loss": 3.4217764417135355, "tokens_seen": 2282356736 }, { "epoch": 0.38, "learning_rate": 0.00031144186792907, "loss": 0.0683, "theoretical_loss": 3.421760612993961, "tokens_seen": 2282487808 }, { "epoch": 0.38, "learning_rate": 0.0003114017491775656, "loss": 0.066, "theoretical_loss": 3.4217447854378196, "tokens_seen": 2282618880 }, { "epoch": 0.38, "learning_rate": 0.00031136163042606114, "loss": 0.0681, "theoretical_loss": 3.421728959044958, "tokens_seen": 2282749952 }, { "epoch": 0.38, "learning_rate": 0.0003113215116745567, "loss": 0.0648, "theoretical_loss": 3.421713133815225, "tokens_seen": 2282881024 }, { "epoch": 0.38, "learning_rate": 0.0003112813929230522, "loss": 0.0672, "theoretical_loss": 3.4216973097484678, "tokens_seen": 2283012096 }, { "epoch": 0.38, "learning_rate": 0.0003112412741715478, "loss": 0.0671, "theoretical_loss": 3.421681486844534, "tokens_seen": 2283143168 }, { "epoch": 0.38, "learning_rate": 0.00031120115542004336, "loss": 0.07, "theoretical_loss": 3.4216656651032715, "tokens_seen": 2283274240 }, { "epoch": 0.38, "learning_rate": 0.0003111610366685389, "loss": 0.0695, "theoretical_loss": 3.4216498445245285, "tokens_seen": 2283405312 }, { "epoch": 0.38, "learning_rate": 0.00031112091791703444, "loss": 0.0675, "theoretical_loss": 3.421634025108152, "tokens_seen": 2283536384 }, { "epoch": 0.38, "learning_rate": 0.00031108079916552996, "loss": 0.0675, "theoretical_loss": 3.4216182068539913, "tokens_seen": 2283667456 }, { "epoch": 0.38, "learning_rate": 0.00031104068041402553, "loss": 0.0668, "theoretical_loss": 3.421602389761893, "tokens_seen": 2283798528 }, { "epoch": 0.38, "learning_rate": 0.00031100056166252104, "loss": 0.0668, "theoretical_loss": 3.4215865738317057, "tokens_seen": 2283929600 }, { "epoch": 0.38, "learning_rate": 0.0003109604429110166, "loss": 0.0659, "theoretical_loss": 3.421570759063277, "tokens_seen": 2284060672 }, { "epoch": 0.38, "learning_rate": 0.0003109203241595122, "loss": 0.0725, "theoretical_loss": 3.4215549454564553, "tokens_seen": 2284191744 }, { "epoch": 0.38, "learning_rate": 0.0003108802054080077, "loss": 0.0707, "theoretical_loss": 3.4215391330110885, "tokens_seen": 2284322816 }, { "epoch": 0.38, "learning_rate": 0.00031084008665650326, "loss": 0.0665, "theoretical_loss": 3.4215233217270242, "tokens_seen": 2284453888 }, { "epoch": 0.38, "learning_rate": 0.00031079996790499883, "loss": 0.0684, "theoretical_loss": 3.421507511604111, "tokens_seen": 2284584960 }, { "epoch": 0.38, "learning_rate": 0.00031075984915349434, "loss": 0.0655, "theoretical_loss": 3.4214917026421974, "tokens_seen": 2284716032 }, { "epoch": 0.38, "learning_rate": 0.0003107197304019899, "loss": 0.0671, "theoretical_loss": 3.42147589484113, "tokens_seen": 2284847104 }, { "epoch": 0.38, "learning_rate": 0.0003106796116504854, "loss": 0.0627, "theoretical_loss": 3.4214600882007593, "tokens_seen": 2284978176 }, { "epoch": 0.38, "learning_rate": 0.000310639492898981, "loss": 0.071, "theoretical_loss": 3.4214442827209313, "tokens_seen": 2285109248 }, { "epoch": 0.39, "learning_rate": 0.0003105993741474765, "loss": 0.0668, "theoretical_loss": 3.4214284784014954, "tokens_seen": 2285240320 }, { "epoch": 0.39, "learning_rate": 0.0003105592553959721, "loss": 0.0625, "theoretical_loss": 3.4214126752423, "tokens_seen": 2285371392 }, { "epoch": 0.39, "learning_rate": 0.00031051913664446765, "loss": 0.0672, "theoretical_loss": 3.4213968732431925, "tokens_seen": 2285502464 }, { "epoch": 0.39, "objective/train/advantage_avg": -0.001240852172486484, "objective/train/docs_used": 832464, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2624748945236206, "objective/train/original_loss": 1.2624748945236206, "objective/train/theoretical_loss": 3.4213810724040217, "objective/train/tokens_used": 656158176, "objective/train/value_avg": -0.00582122802734375, "objective/train/value_loss": 0.0002748845727182925, "objective/train/value_max": -2.568960189819336e-05, "objective/train/value_min": -0.1630859375, "objective/train/value_reward_corr": 0.542123787433477, "objective/train/value_std": 0.0105743408203125, "objective/train/weight_avg": 0.9988791942596436, "objective/train/weighted_lm_loss": 1.260942816734314, "objective/train/weights_max": 1.1516956090927124, "objective/train/weights_min": 0.3681437373161316, "theoretical_loss": 3.4213810724040217, "tokens_seen": 2285633536 }, { "epoch": 0.39, "learning_rate": 0.00031047901789296316, "loss": 0.0666, "theoretical_loss": 3.4213810724040217, "tokens_seen": 2285633536 }, { "epoch": 0.39, "learning_rate": 0.00031043889914145873, "loss": 0.066, "theoretical_loss": 3.4213652727246364, "tokens_seen": 2285764608 }, { "epoch": 0.39, "learning_rate": 0.0003103987803899543, "loss": 0.068, "theoretical_loss": 3.421349474204884, "tokens_seen": 2285895680 }, { "epoch": 0.39, "learning_rate": 0.0003103586616384498, "loss": 0.0695, "theoretical_loss": 3.421333676844614, "tokens_seen": 2286026752 }, { "epoch": 0.39, "learning_rate": 0.0003103185428869454, "loss": 0.0658, "theoretical_loss": 3.4213178806436737, "tokens_seen": 2286157824 }, { "epoch": 0.39, "learning_rate": 0.0003102784241354409, "loss": 0.0664, "theoretical_loss": 3.4213020856019125, "tokens_seen": 2286288896 }, { "epoch": 0.39, "learning_rate": 0.00031023830538393646, "loss": 0.0662, "theoretical_loss": 3.4212862917191784, "tokens_seen": 2286419968 }, { "epoch": 0.39, "learning_rate": 0.000310198186632432, "loss": 0.0657, "theoretical_loss": 3.4212704989953204, "tokens_seen": 2286551040 }, { "epoch": 0.39, "learning_rate": 0.00031015806788092754, "loss": 0.0696, "theoretical_loss": 3.4212547074301862, "tokens_seen": 2286682112 }, { "epoch": 0.39, "learning_rate": 0.0003101179491294231, "loss": 0.0661, "theoretical_loss": 3.421238917023625, "tokens_seen": 2286813184 }, { "epoch": 0.39, "learning_rate": 0.0003100778303779186, "loss": 0.0684, "theoretical_loss": 3.4212231277754848, "tokens_seen": 2286944256 }, { "epoch": 0.39, "learning_rate": 0.0003100377116264142, "loss": 0.0656, "theoretical_loss": 3.4212073396856155, "tokens_seen": 2287075328 }, { "epoch": 0.39, "learning_rate": 0.00030999759287490976, "loss": 0.0686, "theoretical_loss": 3.4211915527538643, "tokens_seen": 2287206400 }, { "epoch": 0.39, "learning_rate": 0.0003099574741234053, "loss": 0.0689, "theoretical_loss": 3.421175766980081, "tokens_seen": 2287337472 }, { "epoch": 0.39, "learning_rate": 0.00030991735537190085, "loss": 0.0673, "theoretical_loss": 3.4211599823641134, "tokens_seen": 2287468544 }, { "epoch": 0.39, "learning_rate": 0.00030987723662039636, "loss": 0.0674, "theoretical_loss": 3.421144198905811, "tokens_seen": 2287599616 }, { "epoch": 0.39, "learning_rate": 0.00030983711786889193, "loss": 0.0697, "theoretical_loss": 3.421128416605022, "tokens_seen": 2287730688 }, { "epoch": 0.39, "learning_rate": 0.00030979699911738744, "loss": 0.0664, "theoretical_loss": 3.4211126354615957, "tokens_seen": 2287861760 }, { "epoch": 0.39, "learning_rate": 0.000309756880365883, "loss": 0.0655, "theoretical_loss": 3.4210968554753807, "tokens_seen": 2287992832 }, { "epoch": 0.39, "learning_rate": 0.0003097167616143786, "loss": 0.0652, "theoretical_loss": 3.4210810766462254, "tokens_seen": 2288123904 }, { "epoch": 0.39, "learning_rate": 0.0003096766428628741, "loss": 0.0685, "theoretical_loss": 3.42106529897398, "tokens_seen": 2288254976 }, { "epoch": 0.39, "learning_rate": 0.00030963652411136966, "loss": 0.0668, "theoretical_loss": 3.4210495224584916, "tokens_seen": 2288386048 }, { "epoch": 0.39, "learning_rate": 0.00030959640535986523, "loss": 0.0667, "theoretical_loss": 3.4210337470996106, "tokens_seen": 2288517120 }, { "epoch": 0.39, "learning_rate": 0.00030955628660836074, "loss": 0.0645, "theoretical_loss": 3.421017972897185, "tokens_seen": 2288648192 }, { "epoch": 0.39, "learning_rate": 0.0003095161678568563, "loss": 0.069, "theoretical_loss": 3.4210021998510647, "tokens_seen": 2288779264 }, { "epoch": 0.39, "objective/train/advantage_avg": 0.00041790769319050014, "objective/train/docs_used": 833545, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2621018886566162, "objective/train/original_loss": 1.2621017694473267, "objective/train/theoretical_loss": 3.420986427961098, "objective/train/tokens_used": 659434976, "objective/train/value_avg": -0.004253387451171875, "objective/train/value_loss": 6.406034663086757e-05, "objective/train/value_max": -5.1021575927734375e-05, "objective/train/value_min": -0.281005859375, "objective/train/value_reward_corr": 0.5231729656691123, "objective/train/value_std": 0.006252288818359375, "objective/train/weight_avg": 1.0004496574401855, "objective/train/weighted_lm_loss": 1.26314115524292, "objective/train/weights_max": 1.2031420469284058, "objective/train/weights_min": 0.7892084717750549, "theoretical_loss": 3.420986427961098, "tokens_seen": 2288910336 }, { "epoch": 0.39, "learning_rate": 0.0003094760491053519, "loss": 0.064, "theoretical_loss": 3.420986427961098, "tokens_seen": 2288910336 }, { "epoch": 0.39, "learning_rate": 0.0003094359303538474, "loss": 0.0654, "theoretical_loss": 3.4209706572271346, "tokens_seen": 2289041408 }, { "epoch": 0.39, "learning_rate": 0.0003093958116023429, "loss": 0.0658, "theoretical_loss": 3.4209548876490232, "tokens_seen": 2289172480 }, { "epoch": 0.39, "learning_rate": 0.0003093556928508385, "loss": 0.0672, "theoretical_loss": 3.4209391192266128, "tokens_seen": 2289303552 }, { "epoch": 0.39, "learning_rate": 0.00030931557409933405, "loss": 0.0701, "theoretical_loss": 3.4209233519597526, "tokens_seen": 2289434624 }, { "epoch": 0.39, "learning_rate": 0.00030927545534782956, "loss": 0.0668, "theoretical_loss": 3.420907585848292, "tokens_seen": 2289565696 }, { "epoch": 0.39, "learning_rate": 0.00030923533659632513, "loss": 0.0657, "theoretical_loss": 3.4208918208920798, "tokens_seen": 2289696768 }, { "epoch": 0.39, "learning_rate": 0.0003091952178448207, "loss": 0.0675, "theoretical_loss": 3.4208760570909655, "tokens_seen": 2289827840 }, { "epoch": 0.39, "learning_rate": 0.00030915509909331627, "loss": 0.0646, "theoretical_loss": 3.4208602944447986, "tokens_seen": 2289958912 }, { "epoch": 0.39, "learning_rate": 0.0003091149803418118, "loss": 0.0611, "theoretical_loss": 3.420844532953428, "tokens_seen": 2290089984 }, { "epoch": 0.39, "learning_rate": 0.00030907486159030735, "loss": 0.0661, "theoretical_loss": 3.420828772616703, "tokens_seen": 2290221056 }, { "epoch": 0.39, "learning_rate": 0.00030903474283880286, "loss": 0.0676, "theoretical_loss": 3.420813013434473, "tokens_seen": 2290352128 }, { "epoch": 0.39, "learning_rate": 0.0003089946240872984, "loss": 0.065, "theoretical_loss": 3.4207972554065877, "tokens_seen": 2290483200 }, { "epoch": 0.39, "learning_rate": 0.00030895450533579395, "loss": 0.0638, "theoretical_loss": 3.4207814985328957, "tokens_seen": 2290614272 }, { "epoch": 0.39, "learning_rate": 0.0003089143865842895, "loss": 0.0677, "theoretical_loss": 3.4207657428132476, "tokens_seen": 2290745344 }, { "epoch": 0.39, "learning_rate": 0.00030887426783278503, "loss": 0.0672, "theoretical_loss": 3.4207499882474917, "tokens_seen": 2290876416 }, { "epoch": 0.39, "learning_rate": 0.0003088341490812806, "loss": 0.0682, "theoretical_loss": 3.4207342348354786, "tokens_seen": 2291007488 }, { "epoch": 0.39, "learning_rate": 0.00030879403032977616, "loss": 0.0676, "theoretical_loss": 3.4207184825770565, "tokens_seen": 2291138560 }, { "epoch": 0.39, "learning_rate": 0.00030875391157827173, "loss": 0.0649, "theoretical_loss": 3.420702731472076, "tokens_seen": 2291269632 }, { "epoch": 0.39, "learning_rate": 0.00030871379282676725, "loss": 0.0692, "theoretical_loss": 3.420686981520386, "tokens_seen": 2291400704 }, { "epoch": 0.39, "learning_rate": 0.0003086736740752628, "loss": 0.0675, "theoretical_loss": 3.4206712327218365, "tokens_seen": 2291531776 }, { "epoch": 0.39, "learning_rate": 0.00030863355532375833, "loss": 0.0678, "theoretical_loss": 3.420655485076277, "tokens_seen": 2291662848 }, { "epoch": 0.39, "learning_rate": 0.00030859343657225384, "loss": 0.0634, "theoretical_loss": 3.4206397385835574, "tokens_seen": 2291793920 }, { "epoch": 0.39, "learning_rate": 0.0003085533178207494, "loss": 0.0714, "theoretical_loss": 3.420623993243527, "tokens_seen": 2291924992 }, { "epoch": 0.39, "learning_rate": 0.000308513199069245, "loss": 0.067, "theoretical_loss": 3.4206082490560354, "tokens_seen": 2292056064 }, { "epoch": 0.39, "objective/train/advantage_avg": -3.548367021721788e-05, "objective/train/docs_used": 834793, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2583752870559692, "objective/train/original_loss": 1.2583751678466797, "objective/train/theoretical_loss": 3.420592506020933, "objective/train/tokens_used": 662711776, "objective/train/value_avg": -0.0065155029296875, "objective/train/value_loss": 0.00029059298685751855, "objective/train/value_max": -2.092123031616211e-05, "objective/train/value_min": -0.99169921875, "objective/train/value_reward_corr": 0.7456121586959819, "objective/train/value_std": 0.0173797607421875, "objective/train/weight_avg": 1.000084638595581, "objective/train/weighted_lm_loss": 1.2575234174728394, "objective/train/weights_max": 1.256239414215088, "objective/train/weights_min": 0.2320682257413864, "theoretical_loss": 3.420592506020933, "tokens_seen": 2292187136 }, { "epoch": 0.39, "learning_rate": 0.0003084730803177405, "loss": 0.0645, "theoretical_loss": 3.420592506020933, "tokens_seen": 2292187136 }, { "epoch": 0.39, "learning_rate": 0.00030843296156623606, "loss": 0.0632, "theoretical_loss": 3.420576764138069, "tokens_seen": 2292318208 }, { "epoch": 0.39, "learning_rate": 0.00030839284281473163, "loss": 0.0658, "theoretical_loss": 3.420561023407293, "tokens_seen": 2292449280 }, { "epoch": 0.39, "learning_rate": 0.0003083527240632272, "loss": 0.0693, "theoretical_loss": 3.4205452838284556, "tokens_seen": 2292580352 }, { "epoch": 0.39, "learning_rate": 0.0003083126053117227, "loss": 0.0619, "theoretical_loss": 3.420529545401406, "tokens_seen": 2292711424 }, { "epoch": 0.39, "learning_rate": 0.0003082724865602183, "loss": 0.07, "theoretical_loss": 3.4205138081259943, "tokens_seen": 2292842496 }, { "epoch": 0.39, "learning_rate": 0.0003082323678087138, "loss": 0.0669, "theoretical_loss": 3.42049807200207, "tokens_seen": 2292973568 }, { "epoch": 0.39, "learning_rate": 0.0003081922490572093, "loss": 0.0676, "theoretical_loss": 3.4204823370294837, "tokens_seen": 2293104640 }, { "epoch": 0.39, "learning_rate": 0.0003081521303057049, "loss": 0.0622, "theoretical_loss": 3.420466603208085, "tokens_seen": 2293235712 }, { "epoch": 0.39, "learning_rate": 0.00030811201155420045, "loss": 0.0664, "theoretical_loss": 3.4204508705377243, "tokens_seen": 2293366784 }, { "epoch": 0.39, "learning_rate": 0.00030807189280269596, "loss": 0.0711, "theoretical_loss": 3.4204351390182506, "tokens_seen": 2293497856 }, { "epoch": 0.39, "learning_rate": 0.00030803177405119153, "loss": 0.0657, "theoretical_loss": 3.420419408649515, "tokens_seen": 2293628928 }, { "epoch": 0.39, "learning_rate": 0.0003079916552996871, "loss": 0.069, "theoretical_loss": 3.4204036794313675, "tokens_seen": 2293760000 }, { "epoch": 0.39, "learning_rate": 0.00030795153654818267, "loss": 0.0662, "theoretical_loss": 3.4203879513636575, "tokens_seen": 2293891072 }, { "epoch": 0.39, "learning_rate": 0.0003079114177966782, "loss": 0.0637, "theoretical_loss": 3.4203722244462353, "tokens_seen": 2294022144 }, { "epoch": 0.39, "learning_rate": 0.00030787129904517375, "loss": 0.0636, "theoretical_loss": 3.420356498678952, "tokens_seen": 2294153216 }, { "epoch": 0.39, "learning_rate": 0.00030783118029366926, "loss": 0.0653, "theoretical_loss": 3.4203407740616565, "tokens_seen": 2294284288 }, { "epoch": 0.39, "learning_rate": 0.0003077910615421648, "loss": 0.0676, "theoretical_loss": 3.4203250505941996, "tokens_seen": 2294415360 }, { "epoch": 0.39, "learning_rate": 0.00030775094279066035, "loss": 0.0691, "theoretical_loss": 3.4203093282764314, "tokens_seen": 2294546432 }, { "epoch": 0.39, "learning_rate": 0.0003077108240391559, "loss": 0.0661, "theoretical_loss": 3.4202936071082024, "tokens_seen": 2294677504 }, { "epoch": 0.39, "learning_rate": 0.00030767070528765143, "loss": 0.0621, "theoretical_loss": 3.420277887089363, "tokens_seen": 2294808576 }, { "epoch": 0.39, "learning_rate": 0.000307630586536147, "loss": 0.0759, "theoretical_loss": 3.420262168219763, "tokens_seen": 2294939648 }, { "epoch": 0.39, "learning_rate": 0.00030759046778464257, "loss": 0.0676, "theoretical_loss": 3.420246450499253, "tokens_seen": 2295070720 }, { "epoch": 0.39, "learning_rate": 0.00030755034903313813, "loss": 0.0713, "theoretical_loss": 3.4202307339276836, "tokens_seen": 2295201792 }, { "epoch": 0.39, "learning_rate": 0.00030751023028163365, "loss": 0.0658, "theoretical_loss": 3.420215018504905, "tokens_seen": 2295332864 }, { "epoch": 0.39, "objective/train/advantage_avg": 0.000824108486995101, "objective/train/docs_used": 836058, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.40658700466156, "objective/train/original_loss": 1.40658700466156, "objective/train/theoretical_loss": 3.420199304230768, "objective/train/tokens_used": 665988576, "objective/train/value_avg": -0.009735107421875, "objective/train/value_loss": 0.00012434717791620642, "objective/train/value_max": -3.707408905029297e-05, "objective/train/value_min": -0.349853515625, "objective/train/value_reward_corr": 0.8015300047951283, "objective/train/value_std": 0.0160980224609375, "objective/train/weight_avg": 1.0008856058120728, "objective/train/weighted_lm_loss": 1.4073649644851685, "objective/train/weights_max": 1.2004094123840332, "objective/train/weights_min": 0.7738307118415833, "theoretical_loss": 3.420199304230768, "tokens_seen": 2295463936 }, { "epoch": 0.39, "learning_rate": 0.0003074701115301292, "loss": 0.0684, "theoretical_loss": 3.420199304230768, "tokens_seen": 2295463936 }, { "epoch": 0.39, "learning_rate": 0.00030742999277862473, "loss": 0.0687, "theoretical_loss": 3.4201835911051224, "tokens_seen": 2295595008 }, { "epoch": 0.39, "learning_rate": 0.00030738987402712024, "loss": 0.0674, "theoretical_loss": 3.420167879127819, "tokens_seen": 2295726080 }, { "epoch": 0.39, "learning_rate": 0.0003073497552756158, "loss": 0.0668, "theoretical_loss": 3.4201521682987086, "tokens_seen": 2295857152 }, { "epoch": 0.39, "learning_rate": 0.0003073096365241114, "loss": 0.069, "theoretical_loss": 3.420136458617641, "tokens_seen": 2295988224 }, { "epoch": 0.39, "learning_rate": 0.0003072695177726069, "loss": 0.066, "theoretical_loss": 3.420120750084468, "tokens_seen": 2296119296 }, { "epoch": 0.39, "learning_rate": 0.00030722939902110246, "loss": 0.0681, "theoretical_loss": 3.420105042699039, "tokens_seen": 2296250368 }, { "epoch": 0.39, "learning_rate": 0.00030718928026959803, "loss": 0.0644, "theoretical_loss": 3.4200893364612055, "tokens_seen": 2296381440 }, { "epoch": 0.39, "learning_rate": 0.0003071491615180936, "loss": 0.0609, "theoretical_loss": 3.4200736313708173, "tokens_seen": 2296512512 }, { "epoch": 0.39, "learning_rate": 0.0003071090427665891, "loss": 0.0669, "theoretical_loss": 3.4200579274277256, "tokens_seen": 2296643584 }, { "epoch": 0.39, "learning_rate": 0.0003070689240150847, "loss": 0.0693, "theoretical_loss": 3.420042224631781, "tokens_seen": 2296774656 }, { "epoch": 0.39, "learning_rate": 0.0003070288052635802, "loss": 0.0655, "theoretical_loss": 3.420026522982835, "tokens_seen": 2296905728 }, { "epoch": 0.39, "learning_rate": 0.0003069886865120757, "loss": 0.0637, "theoretical_loss": 3.4200108224807373, "tokens_seen": 2297036800 }, { "epoch": 0.39, "learning_rate": 0.0003069485677605713, "loss": 0.068, "theoretical_loss": 3.4199951231253394, "tokens_seen": 2297167872 }, { "epoch": 0.39, "learning_rate": 0.00030690844900906685, "loss": 0.0677, "theoretical_loss": 3.419979424916491, "tokens_seen": 2297298944 }, { "epoch": 0.39, "learning_rate": 0.00030686833025756236, "loss": 0.0674, "theoretical_loss": 3.4199637278540447, "tokens_seen": 2297430016 }, { "epoch": 0.39, "learning_rate": 0.00030682821150605793, "loss": 0.0688, "theoretical_loss": 3.41994803193785, "tokens_seen": 2297561088 }, { "epoch": 0.39, "learning_rate": 0.0003067880927545535, "loss": 0.0693, "theoretical_loss": 3.4199323371677584, "tokens_seen": 2297692160 }, { "epoch": 0.39, "learning_rate": 0.00030674797400304907, "loss": 0.0669, "theoretical_loss": 3.419916643543621, "tokens_seen": 2297823232 }, { "epoch": 0.39, "learning_rate": 0.0003067078552515446, "loss": 0.0697, "theoretical_loss": 3.419900951065288, "tokens_seen": 2297954304 }, { "epoch": 0.39, "learning_rate": 0.00030666773650004015, "loss": 0.0697, "theoretical_loss": 3.419885259732611, "tokens_seen": 2298085376 }, { "epoch": 0.39, "learning_rate": 0.00030662761774853566, "loss": 0.0662, "theoretical_loss": 3.419869569545441, "tokens_seen": 2298216448 }, { "epoch": 0.39, "learning_rate": 0.0003065874989970312, "loss": 0.0658, "theoretical_loss": 3.419853880503629, "tokens_seen": 2298347520 }, { "epoch": 0.39, "learning_rate": 0.00030654738024552675, "loss": 0.0647, "theoretical_loss": 3.419838192607026, "tokens_seen": 2298478592 }, { "epoch": 0.39, "learning_rate": 0.0003065072614940223, "loss": 0.0666, "theoretical_loss": 3.419822505855483, "tokens_seen": 2298609664 }, { "epoch": 0.39, "objective/train/advantage_avg": 0.0004832773411180824, "objective/train/docs_used": 837138, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3885146379470825, "objective/train/original_loss": 1.3885146379470825, "objective/train/theoretical_loss": 3.4198068202488514, "objective/train/tokens_used": 669265376, "objective/train/value_avg": -0.007579803466796875, "objective/train/value_loss": 0.0001855616719694808, "objective/train/value_max": -3.4809112548828125e-05, "objective/train/value_min": -0.358154296875, "objective/train/value_reward_corr": 0.7902511820202558, "objective/train/value_std": 0.0175933837890625, "objective/train/weight_avg": 1.0005677938461304, "objective/train/weighted_lm_loss": 1.3890827894210815, "objective/train/weights_max": 1.4122933149337769, "objective/train/weights_min": 0.28861093521118164, "theoretical_loss": 3.4198068202488514, "tokens_seen": 2298740736 }, { "epoch": 0.39, "learning_rate": 0.0003064671427425179, "loss": 0.0621, "theoretical_loss": 3.4198068202488514, "tokens_seen": 2298740736 }, { "epoch": 0.39, "learning_rate": 0.0003064270239910134, "loss": 0.0665, "theoretical_loss": 3.4197911357869826, "tokens_seen": 2298871808 }, { "epoch": 0.39, "learning_rate": 0.00030638690523950897, "loss": 0.0717, "theoretical_loss": 3.419775452469727, "tokens_seen": 2299002880 }, { "epoch": 0.39, "learning_rate": 0.00030634678648800454, "loss": 0.0671, "theoretical_loss": 3.4197597702969365, "tokens_seen": 2299133952 }, { "epoch": 0.39, "learning_rate": 0.00030630666773650005, "loss": 0.0655, "theoretical_loss": 3.4197440892684616, "tokens_seen": 2299265024 }, { "epoch": 0.39, "learning_rate": 0.0003062665489849956, "loss": 0.0666, "theoretical_loss": 3.4197284093841547, "tokens_seen": 2299396096 }, { "epoch": 0.39, "learning_rate": 0.00030622643023349113, "loss": 0.0666, "theoretical_loss": 3.4197127306438664, "tokens_seen": 2299527168 }, { "epoch": 0.39, "learning_rate": 0.00030618631148198665, "loss": 0.0677, "theoretical_loss": 3.419697053047448, "tokens_seen": 2299658240 }, { "epoch": 0.39, "learning_rate": 0.0003061461927304822, "loss": 0.0696, "theoretical_loss": 3.4196813765947507, "tokens_seen": 2299789312 }, { "epoch": 0.39, "learning_rate": 0.0003061060739789778, "loss": 0.0698, "theoretical_loss": 3.4196657012856266, "tokens_seen": 2299920384 }, { "epoch": 0.39, "learning_rate": 0.00030606595522747335, "loss": 0.0667, "theoretical_loss": 3.4196500271199266, "tokens_seen": 2300051456 }, { "epoch": 0.39, "learning_rate": 0.00030602583647596887, "loss": 0.0673, "theoretical_loss": 3.419634354097502, "tokens_seen": 2300182528 }, { "epoch": 0.39, "learning_rate": 0.00030598571772446443, "loss": 0.0642, "theoretical_loss": 3.4196186822182044, "tokens_seen": 2300313600 }, { "epoch": 0.39, "learning_rate": 0.00030594559897296, "loss": 0.0647, "theoretical_loss": 3.4196030114818856, "tokens_seen": 2300444672 }, { "epoch": 0.39, "learning_rate": 0.0003059054802214555, "loss": 0.0684, "theoretical_loss": 3.419587341888397, "tokens_seen": 2300575744 }, { "epoch": 0.39, "learning_rate": 0.0003058653614699511, "loss": 0.0669, "theoretical_loss": 3.4195716734375896, "tokens_seen": 2300706816 }, { "epoch": 0.39, "learning_rate": 0.0003058252427184466, "loss": 0.0687, "theoretical_loss": 3.4195560061293158, "tokens_seen": 2300837888 }, { "epoch": 0.39, "learning_rate": 0.0003057851239669421, "loss": 0.0648, "theoretical_loss": 3.4195403399634268, "tokens_seen": 2300968960 }, { "epoch": 0.39, "learning_rate": 0.0003057450052154377, "loss": 0.0659, "theoretical_loss": 3.4195246749397743, "tokens_seen": 2301100032 }, { "epoch": 0.39, "learning_rate": 0.00030570488646393325, "loss": 0.0679, "theoretical_loss": 3.41950901105821, "tokens_seen": 2301231104 }, { "epoch": 0.39, "learning_rate": 0.0003056647677124288, "loss": 0.0669, "theoretical_loss": 3.4194933483185856, "tokens_seen": 2301362176 }, { "epoch": 0.39, "learning_rate": 0.00030562464896092433, "loss": 0.0689, "theoretical_loss": 3.4194776867207524, "tokens_seen": 2301493248 }, { "epoch": 0.39, "learning_rate": 0.0003055845302094199, "loss": 0.0665, "theoretical_loss": 3.4194620262645627, "tokens_seen": 2301624320 }, { "epoch": 0.4, "learning_rate": 0.00030554441145791547, "loss": 0.0676, "theoretical_loss": 3.419446366949868, "tokens_seen": 2301755392 }, { "epoch": 0.4, "learning_rate": 0.000305504292706411, "loss": 0.065, "theoretical_loss": 3.4194307087765203, "tokens_seen": 2301886464 }, { "epoch": 0.4, "objective/train/advantage_avg": 0.00032254241523332894, "objective/train/docs_used": 838318, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.441489815711975, "objective/train/original_loss": 1.4414896965026855, "objective/train/theoretical_loss": 3.419415051744371, "objective/train/tokens_used": 672542176, "objective/train/value_avg": -0.0076751708984375, "objective/train/value_loss": 0.00017329276306554675, "objective/train/value_max": -4.13060188293457e-05, "objective/train/value_min": -0.260009765625, "objective/train/value_reward_corr": 0.7374166684730615, "objective/train/value_std": 0.01448822021484375, "objective/train/weight_avg": 1.0004041194915771, "objective/train/weighted_lm_loss": 1.4406757354736328, "objective/train/weights_max": 1.1704035997390747, "objective/train/weights_min": 0.36923545598983765, "theoretical_loss": 3.419415051744371, "tokens_seen": 2302017536 }, { "epoch": 0.4, "learning_rate": 0.00030546417395490655, "loss": 0.0695, "theoretical_loss": 3.419415051744371, "tokens_seen": 2302017536 }, { "epoch": 0.4, "learning_rate": 0.00030542405520340207, "loss": 0.0627, "theoretical_loss": 3.4193993958532722, "tokens_seen": 2302148608 }, { "epoch": 0.4, "learning_rate": 0.0003053839364518976, "loss": 0.0665, "theoretical_loss": 3.4193837411030765, "tokens_seen": 2302279680 }, { "epoch": 0.4, "learning_rate": 0.00030534381770039315, "loss": 0.0651, "theoretical_loss": 3.4193680874936345, "tokens_seen": 2302410752 }, { "epoch": 0.4, "learning_rate": 0.0003053036989488887, "loss": 0.0652, "theoretical_loss": 3.4193524350247992, "tokens_seen": 2302541824 }, { "epoch": 0.4, "learning_rate": 0.0003052635801973843, "loss": 0.0699, "theoretical_loss": 3.419336783696422, "tokens_seen": 2302672896 }, { "epoch": 0.4, "learning_rate": 0.0003052234614458798, "loss": 0.0662, "theoretical_loss": 3.4193211335083546, "tokens_seen": 2302803968 }, { "epoch": 0.4, "learning_rate": 0.00030518334269437537, "loss": 0.0688, "theoretical_loss": 3.41930548446045, "tokens_seen": 2302935040 }, { "epoch": 0.4, "learning_rate": 0.00030514322394287094, "loss": 0.0686, "theoretical_loss": 3.4192898365525597, "tokens_seen": 2303066112 }, { "epoch": 0.4, "learning_rate": 0.00030510310519136645, "loss": 0.0697, "theoretical_loss": 3.419274189784536, "tokens_seen": 2303197184 }, { "epoch": 0.4, "learning_rate": 0.000305062986439862, "loss": 0.0695, "theoretical_loss": 3.4192585441562304, "tokens_seen": 2303328256 }, { "epoch": 0.4, "learning_rate": 0.00030502286768835753, "loss": 0.0693, "theoretical_loss": 3.4192428996674957, "tokens_seen": 2303459328 }, { "epoch": 0.4, "learning_rate": 0.00030498274893685305, "loss": 0.0721, "theoretical_loss": 3.419227256318184, "tokens_seen": 2303590400 }, { "epoch": 0.4, "learning_rate": 0.0003049426301853486, "loss": 0.0692, "theoretical_loss": 3.4192116141081463, "tokens_seen": 2303721472 }, { "epoch": 0.4, "learning_rate": 0.0003049025114338442, "loss": 0.0634, "theoretical_loss": 3.4191959730372368, "tokens_seen": 2303852544 }, { "epoch": 0.4, "learning_rate": 0.00030486239268233975, "loss": 0.0718, "theoretical_loss": 3.4191803331053063, "tokens_seen": 2303983616 }, { "epoch": 0.4, "learning_rate": 0.00030482227393083527, "loss": 0.0676, "theoretical_loss": 3.419164694312208, "tokens_seen": 2304114688 }, { "epoch": 0.4, "learning_rate": 0.00030478215517933083, "loss": 0.0673, "theoretical_loss": 3.419149056657793, "tokens_seen": 2304245760 }, { "epoch": 0.4, "learning_rate": 0.0003047420364278264, "loss": 0.0712, "theoretical_loss": 3.419133420141915, "tokens_seen": 2304376832 }, { "epoch": 0.4, "learning_rate": 0.0003047019176763219, "loss": 0.0653, "theoretical_loss": 3.4191177847644254, "tokens_seen": 2304507904 }, { "epoch": 0.4, "learning_rate": 0.0003046617989248175, "loss": 0.0643, "theoretical_loss": 3.419102150525177, "tokens_seen": 2304638976 }, { "epoch": 0.4, "learning_rate": 0.000304621680173313, "loss": 0.0697, "theoretical_loss": 3.419086517424022, "tokens_seen": 2304770048 }, { "epoch": 0.4, "learning_rate": 0.0003045815614218085, "loss": 0.0678, "theoretical_loss": 3.4190708854608127, "tokens_seen": 2304901120 }, { "epoch": 0.4, "learning_rate": 0.0003045414426703041, "loss": 0.0689, "theoretical_loss": 3.4190552546354023, "tokens_seen": 2305032192 }, { "epoch": 0.4, "learning_rate": 0.00030450132391879965, "loss": 0.066, "theoretical_loss": 3.4190396249476427, "tokens_seen": 2305163264 }, { "debugging/Compilability": 1.0, "debugging/distinct-1-grams": 0.7355514851310847, "debugging/entropy-1-grams": 4.948879557253174, "debugging/length": 466.625, "debugging/num_segments": 8, "debugging/raw_token_scores_avg": 0.012094466015696526, "debugging/raw_token_scores_std": 0.023884281516075134, "debugging/score": 0.005694908315944096, "debugging/score_std": 0.003567329076286955, "epoch": 0.4, "objective/train/advantage_avg": -0.0007530044531449676, "objective/train/docs_used": 839437, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3488891124725342, "objective/train/original_loss": 1.3488889932632446, "objective/train/theoretical_loss": 3.419023996397386, "objective/train/tokens_used": 675818976, "objective/train/value_avg": -0.01134490966796875, "objective/train/value_loss": 0.00024354978813789785, "objective/train/value_max": -6.0617923736572266e-05, "objective/train/value_min": -0.349365234375, "objective/train/value_reward_corr": 0.7585227256902212, "objective/train/value_std": 0.017303466796875, "objective/train/weight_avg": 0.9993655681610107, "objective/train/weighted_lm_loss": 1.3483755588531494, "objective/train/weights_max": 1.2162350416183472, "objective/train/weights_min": 0.5647739171981812, "theoretical_loss": 3.419023996397386, "tokens_seen": 2305294336 }, { "epoch": 0.4, "learning_rate": 0.0003044612051672952, "loss": 0.0645, "theoretical_loss": 3.419023996397386, "tokens_seen": 2305294336 }, { "epoch": 0.4, "learning_rate": 0.00030442108641579073, "loss": 0.0664, "theoretical_loss": 3.4190083689844855, "tokens_seen": 2305425408 }, { "epoch": 0.4, "learning_rate": 0.0003043809676642863, "loss": 0.0706, "theoretical_loss": 3.418992742708794, "tokens_seen": 2305556480 }, { "epoch": 0.4, "learning_rate": 0.00030434084891278187, "loss": 0.0681, "theoretical_loss": 3.4189771175701633, "tokens_seen": 2305687552 }, { "epoch": 0.4, "learning_rate": 0.0003043007301612774, "loss": 0.0643, "theoretical_loss": 3.418961493568446, "tokens_seen": 2305818624 }, { "epoch": 0.4, "learning_rate": 0.00030426061140977295, "loss": 0.0682, "theoretical_loss": 3.4189458707034954, "tokens_seen": 2305949696 }, { "epoch": 0.4, "learning_rate": 0.00030422049265826847, "loss": 0.0699, "theoretical_loss": 3.4189302489751636, "tokens_seen": 2306080768 }, { "epoch": 0.4, "learning_rate": 0.000304180373906764, "loss": 0.0699, "theoretical_loss": 3.418914628383304, "tokens_seen": 2306211840 }, { "epoch": 0.4, "learning_rate": 0.00030414025515525955, "loss": 0.0677, "theoretical_loss": 3.418899008927769, "tokens_seen": 2306342912 }, { "epoch": 0.4, "learning_rate": 0.0003041001364037551, "loss": 0.0679, "theoretical_loss": 3.418883390608411, "tokens_seen": 2306473984 }, { "epoch": 0.4, "learning_rate": 0.0003040600176522507, "loss": 0.0659, "theoretical_loss": 3.4188677734250836, "tokens_seen": 2306605056 }, { "epoch": 0.4, "learning_rate": 0.0003040198989007462, "loss": 0.0657, "theoretical_loss": 3.4188521573776383, "tokens_seen": 2306736128 }, { "epoch": 0.4, "learning_rate": 0.00030397978014924177, "loss": 0.066, "theoretical_loss": 3.4188365424659293, "tokens_seen": 2306867200 }, { "epoch": 0.4, "learning_rate": 0.00030393966139773734, "loss": 0.0728, "theoretical_loss": 3.418820928689809, "tokens_seen": 2306998272 }, { "epoch": 0.4, "learning_rate": 0.00030389954264623285, "loss": 0.0693, "theoretical_loss": 3.4188053160491303, "tokens_seen": 2307129344 }, { "epoch": 0.4, "learning_rate": 0.0003038594238947284, "loss": 0.0691, "theoretical_loss": 3.418789704543746, "tokens_seen": 2307260416 }, { "epoch": 0.4, "learning_rate": 0.00030381930514322393, "loss": 0.0649, "theoretical_loss": 3.418774094173509, "tokens_seen": 2307391488 }, { "epoch": 0.4, "learning_rate": 0.0003037791863917195, "loss": 0.072, "theoretical_loss": 3.418758484938272, "tokens_seen": 2307522560 }, { "epoch": 0.4, "learning_rate": 0.000303739067640215, "loss": 0.0698, "theoretical_loss": 3.4187428768378894, "tokens_seen": 2307653632 }, { "epoch": 0.4, "learning_rate": 0.0003036989488887106, "loss": 0.0688, "theoretical_loss": 3.4187272698722126, "tokens_seen": 2307784704 }, { "epoch": 0.4, "learning_rate": 0.00030365883013720615, "loss": 0.0717, "theoretical_loss": 3.4187116640410955, "tokens_seen": 2307915776 }, { "epoch": 0.4, "learning_rate": 0.00030361871138570167, "loss": 0.0683, "theoretical_loss": 3.4186960593443914, "tokens_seen": 2308046848 }, { "epoch": 0.4, "learning_rate": 0.00030357859263419724, "loss": 0.0689, "theoretical_loss": 3.418680455781953, "tokens_seen": 2308177920 }, { "epoch": 0.4, "learning_rate": 0.0003035384738826928, "loss": 0.0714, "theoretical_loss": 3.418664853353633, "tokens_seen": 2308308992 }, { "epoch": 0.4, "learning_rate": 0.0003034983551311883, "loss": 0.0668, "theoretical_loss": 3.4186492520592853, "tokens_seen": 2308440064 }, { "epoch": 0.4, "objective/train/advantage_avg": 0.0005888169398531318, "objective/train/docs_used": 840762, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2193442583084106, "objective/train/original_loss": 1.2193443775177002, "objective/train/theoretical_loss": 3.418633651898763, "objective/train/tokens_used": 679095776, "objective/train/value_avg": -0.01026153564453125, "objective/train/value_loss": 0.00029686521156691015, "objective/train/value_max": -1.996755599975586e-05, "objective/train/value_min": -0.49267578125, "objective/train/value_reward_corr": 0.6913750159345937, "objective/train/value_std": 0.0181884765625, "objective/train/weight_avg": 1.000723123550415, "objective/train/weighted_lm_loss": 1.2194254398345947, "objective/train/weights_max": 1.1580395698547363, "objective/train/weights_min": 0.3686865568161011, "theoretical_loss": 3.418633651898763, "tokens_seen": 2308571136 }, { "epoch": 0.4, "learning_rate": 0.0003034582363796839, "loss": 0.0648, "theoretical_loss": 3.418633651898763, "tokens_seen": 2308571136 }, { "epoch": 0.4, "learning_rate": 0.0003034181176281794, "loss": 0.0684, "theoretical_loss": 3.4186180528719188, "tokens_seen": 2308702208 }, { "epoch": 0.4, "learning_rate": 0.00030337799887667497, "loss": 0.0664, "theoretical_loss": 3.418602454978607, "tokens_seen": 2308833280 }, { "epoch": 0.4, "learning_rate": 0.0003033378801251705, "loss": 0.0672, "theoretical_loss": 3.41858685821868, "tokens_seen": 2308964352 }, { "epoch": 0.4, "learning_rate": 0.00030329776137366605, "loss": 0.069, "theoretical_loss": 3.4185712625919917, "tokens_seen": 2309095424 }, { "epoch": 0.4, "learning_rate": 0.0003032576426221616, "loss": 0.07, "theoretical_loss": 3.418555668098395, "tokens_seen": 2309226496 }, { "epoch": 0.4, "learning_rate": 0.00030321752387065713, "loss": 0.0706, "theoretical_loss": 3.4185400747377432, "tokens_seen": 2309357568 }, { "epoch": 0.4, "learning_rate": 0.0003031774051191527, "loss": 0.0689, "theoretical_loss": 3.41852448250989, "tokens_seen": 2309488640 }, { "epoch": 0.4, "learning_rate": 0.00030313728636764827, "loss": 0.0678, "theoretical_loss": 3.4185088914146884, "tokens_seen": 2309619712 }, { "epoch": 0.4, "learning_rate": 0.0003030971676161438, "loss": 0.0677, "theoretical_loss": 3.4184933014519925, "tokens_seen": 2309750784 }, { "epoch": 0.4, "learning_rate": 0.00030305704886463935, "loss": 0.064, "theoretical_loss": 3.4184777126216552, "tokens_seen": 2309881856 }, { "epoch": 0.4, "learning_rate": 0.00030301693011313487, "loss": 0.0763, "theoretical_loss": 3.4184621249235305, "tokens_seen": 2310012928 }, { "epoch": 0.4, "learning_rate": 0.00030297681136163044, "loss": 0.0683, "theoretical_loss": 3.418446538357472, "tokens_seen": 2310144000 }, { "epoch": 0.4, "learning_rate": 0.00030293669261012595, "loss": 0.0659, "theoretical_loss": 3.418430952923332, "tokens_seen": 2310275072 }, { "epoch": 0.4, "learning_rate": 0.0003028965738586215, "loss": 0.0656, "theoretical_loss": 3.418415368620966, "tokens_seen": 2310406144 }, { "epoch": 0.4, "learning_rate": 0.0003028564551071171, "loss": 0.0696, "theoretical_loss": 3.418399785450226, "tokens_seen": 2310537216 }, { "epoch": 0.4, "learning_rate": 0.0003028163363556126, "loss": 0.0679, "theoretical_loss": 3.418384203410967, "tokens_seen": 2310668288 }, { "epoch": 0.4, "learning_rate": 0.00030277621760410817, "loss": 0.0635, "theoretical_loss": 3.418368622503041, "tokens_seen": 2310799360 }, { "epoch": 0.4, "learning_rate": 0.00030273609885260374, "loss": 0.0677, "theoretical_loss": 3.418353042726303, "tokens_seen": 2310930432 }, { "epoch": 0.4, "learning_rate": 0.00030269598010109925, "loss": 0.0669, "theoretical_loss": 3.4183374640806066, "tokens_seen": 2311061504 }, { "epoch": 0.4, "learning_rate": 0.0003026558613495948, "loss": 0.0681, "theoretical_loss": 3.4183218865658054, "tokens_seen": 2311192576 }, { "epoch": 0.4, "learning_rate": 0.00030261574259809034, "loss": 0.0664, "theoretical_loss": 3.4183063101817526, "tokens_seen": 2311323648 }, { "epoch": 0.4, "learning_rate": 0.0003025756238465859, "loss": 0.0648, "theoretical_loss": 3.418290734928303, "tokens_seen": 2311454720 }, { "epoch": 0.4, "learning_rate": 0.0003025355050950814, "loss": 0.0656, "theoretical_loss": 3.41827516080531, "tokens_seen": 2311585792 }, { "epoch": 0.4, "learning_rate": 0.000302495386343577, "loss": 0.068, "theoretical_loss": 3.418259587812627, "tokens_seen": 2311716864 }, { "epoch": 0.4, "objective/train/advantage_avg": 0.00033356191124767065, "objective/train/docs_used": 841951, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4150850772857666, "objective/train/original_loss": 1.415084958076477, "objective/train/theoretical_loss": 3.418244015950108, "objective/train/tokens_used": 682372576, "objective/train/value_avg": -0.005840301513671875, "objective/train/value_loss": 0.00013601088721770793, "objective/train/value_max": -3.272294998168945e-05, "objective/train/value_min": -0.2646484375, "objective/train/value_reward_corr": 0.6543302856436789, "objective/train/value_std": 0.00994873046875, "objective/train/weight_avg": 1.0003966093063354, "objective/train/weighted_lm_loss": 1.4152822494506836, "objective/train/weights_max": 1.2225364446640015, "objective/train/weights_min": 0.37184974551200867, "theoretical_loss": 3.418244015950108, "tokens_seen": 2311847936 }, { "epoch": 0.4, "learning_rate": 0.00030245526759207255, "loss": 0.0683, "theoretical_loss": 3.418244015950108, "tokens_seen": 2311847936 }, { "epoch": 0.4, "learning_rate": 0.00030241514884056807, "loss": 0.068, "theoretical_loss": 3.4182284452176077, "tokens_seen": 2311979008 }, { "epoch": 0.4, "learning_rate": 0.00030237503008906364, "loss": 0.0694, "theoretical_loss": 3.418212875614979, "tokens_seen": 2312110080 }, { "epoch": 0.4, "learning_rate": 0.0003023349113375592, "loss": 0.0657, "theoretical_loss": 3.4181973071420773, "tokens_seen": 2312241152 }, { "epoch": 0.4, "learning_rate": 0.0003022947925860547, "loss": 0.0653, "theoretical_loss": 3.4181817397987553, "tokens_seen": 2312372224 }, { "epoch": 0.4, "learning_rate": 0.0003022546738345503, "loss": 0.0664, "theoretical_loss": 3.418166173584867, "tokens_seen": 2312503296 }, { "epoch": 0.4, "learning_rate": 0.0003022145550830458, "loss": 0.0701, "theoretical_loss": 3.4181506085002673, "tokens_seen": 2312634368 }, { "epoch": 0.4, "learning_rate": 0.00030217443633154137, "loss": 0.0631, "theoretical_loss": 3.4181350445448095, "tokens_seen": 2312765440 }, { "epoch": 0.4, "learning_rate": 0.0003021343175800369, "loss": 0.0627, "theoretical_loss": 3.4181194817183487, "tokens_seen": 2312896512 }, { "epoch": 0.4, "learning_rate": 0.00030209419882853245, "loss": 0.0646, "theoretical_loss": 3.4181039200207377, "tokens_seen": 2313027584 }, { "epoch": 0.4, "learning_rate": 0.000302054080077028, "loss": 0.0697, "theoretical_loss": 3.418088359451832, "tokens_seen": 2313158656 }, { "epoch": 0.4, "learning_rate": 0.00030201396132552354, "loss": 0.0665, "theoretical_loss": 3.418072800011484, "tokens_seen": 2313289728 }, { "epoch": 0.4, "learning_rate": 0.0003019738425740191, "loss": 0.0664, "theoretical_loss": 3.41805724169955, "tokens_seen": 2313420800 }, { "epoch": 0.4, "learning_rate": 0.00030193372382251467, "loss": 0.0696, "theoretical_loss": 3.4180416845158827, "tokens_seen": 2313551872 }, { "epoch": 0.4, "learning_rate": 0.0003018936050710102, "loss": 0.0715, "theoretical_loss": 3.4180261284603373, "tokens_seen": 2313682944 }, { "epoch": 0.4, "learning_rate": 0.00030185348631950576, "loss": 0.0709, "theoretical_loss": 3.4180105735327677, "tokens_seen": 2313814016 }, { "epoch": 0.4, "learning_rate": 0.00030181336756800127, "loss": 0.068, "theoretical_loss": 3.417995019733028, "tokens_seen": 2313945088 }, { "epoch": 0.4, "learning_rate": 0.00030177324881649684, "loss": 0.0693, "theoretical_loss": 3.417979467060973, "tokens_seen": 2314076160 }, { "epoch": 0.4, "learning_rate": 0.00030173313006499235, "loss": 0.0704, "theoretical_loss": 3.4179639155164567, "tokens_seen": 2314207232 }, { "epoch": 0.4, "learning_rate": 0.0003016930113134879, "loss": 0.0643, "theoretical_loss": 3.417948365099334, "tokens_seen": 2314338304 }, { "epoch": 0.4, "learning_rate": 0.0003016528925619835, "loss": 0.0655, "theoretical_loss": 3.4179328158094586, "tokens_seen": 2314469376 }, { "epoch": 0.4, "learning_rate": 0.000301612773810479, "loss": 0.0656, "theoretical_loss": 3.4179172676466854, "tokens_seen": 2314600448 }, { "epoch": 0.4, "learning_rate": 0.00030157265505897457, "loss": 0.0652, "theoretical_loss": 3.417901720610869, "tokens_seen": 2314731520 }, { "epoch": 0.4, "learning_rate": 0.00030153253630747014, "loss": 0.0684, "theoretical_loss": 3.417886174701864, "tokens_seen": 2314862592 }, { "epoch": 0.4, "learning_rate": 0.0003014924175559657, "loss": 0.0669, "theoretical_loss": 3.417870629919524, "tokens_seen": 2314993664 }, { "epoch": 0.4, "objective/train/advantage_avg": 0.0007659764378331602, "objective/train/docs_used": 843188, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3537318706512451, "objective/train/original_loss": 1.3537318706512451, "objective/train/theoretical_loss": 3.4178550862637045, "objective/train/tokens_used": 685649376, "objective/train/value_avg": -0.005214691162109375, "objective/train/value_loss": 7.70714832469821e-05, "objective/train/value_max": -5.519390106201172e-05, "objective/train/value_min": -0.299072265625, "objective/train/value_reward_corr": 0.70850119071364, "objective/train/value_std": 0.00960540771484375, "objective/train/weight_avg": 1.0008035898208618, "objective/train/weighted_lm_loss": 1.354805588722229, "objective/train/weights_max": 1.1275123357772827, "objective/train/weights_min": 0.6108494997024536, "theoretical_loss": 3.4178550862637045, "tokens_seen": 2315124736 }, { "epoch": 0.4, "learning_rate": 0.0003014522988044612, "loss": 0.0701, "theoretical_loss": 3.4178550862637045, "tokens_seen": 2315124736 }, { "epoch": 0.4, "learning_rate": 0.0003014121800529568, "loss": 0.0682, "theoretical_loss": 3.41783954373426, "tokens_seen": 2315255808 }, { "epoch": 0.4, "learning_rate": 0.0003013720613014523, "loss": 0.0673, "theoretical_loss": 3.417824002331045, "tokens_seen": 2315386880 }, { "epoch": 0.4, "learning_rate": 0.0003013319425499478, "loss": 0.0661, "theoretical_loss": 3.4178084620539138, "tokens_seen": 2315517952 }, { "epoch": 0.4, "learning_rate": 0.0003012918237984434, "loss": 0.0694, "theoretical_loss": 3.4177929229027217, "tokens_seen": 2315649024 }, { "epoch": 0.4, "learning_rate": 0.00030125170504693896, "loss": 0.0715, "theoretical_loss": 3.4177773848773225, "tokens_seen": 2315780096 }, { "epoch": 0.4, "learning_rate": 0.00030121158629543447, "loss": 0.0662, "theoretical_loss": 3.4177618479775718, "tokens_seen": 2315911168 }, { "epoch": 0.4, "learning_rate": 0.00030117146754393004, "loss": 0.0695, "theoretical_loss": 3.4177463122033243, "tokens_seen": 2316042240 }, { "epoch": 0.4, "learning_rate": 0.0003011313487924256, "loss": 0.0683, "theoretical_loss": 3.4177307775544343, "tokens_seen": 2316173312 }, { "epoch": 0.4, "learning_rate": 0.0003010912300409212, "loss": 0.068, "theoretical_loss": 3.4177152440307568, "tokens_seen": 2316304384 }, { "epoch": 0.4, "learning_rate": 0.0003010511112894167, "loss": 0.0663, "theoretical_loss": 3.417699711632147, "tokens_seen": 2316435456 }, { "epoch": 0.4, "learning_rate": 0.00030101099253791226, "loss": 0.0653, "theoretical_loss": 3.417684180358459, "tokens_seen": 2316566528 }, { "epoch": 0.4, "learning_rate": 0.00030097087378640777, "loss": 0.0679, "theoretical_loss": 3.417668650209548, "tokens_seen": 2316697600 }, { "epoch": 0.4, "learning_rate": 0.0003009307550349033, "loss": 0.0701, "theoretical_loss": 3.417653121185269, "tokens_seen": 2316828672 }, { "epoch": 0.4, "learning_rate": 0.00030089063628339885, "loss": 0.0677, "theoretical_loss": 3.4176375932854772, "tokens_seen": 2316959744 }, { "epoch": 0.4, "learning_rate": 0.0003008505175318944, "loss": 0.0646, "theoretical_loss": 3.4176220665100274, "tokens_seen": 2317090816 }, { "epoch": 0.4, "learning_rate": 0.00030081039878038994, "loss": 0.0659, "theoretical_loss": 3.417606540858774, "tokens_seen": 2317221888 }, { "epoch": 0.4, "learning_rate": 0.0003007702800288855, "loss": 0.0633, "theoretical_loss": 3.4175910163315733, "tokens_seen": 2317352960 }, { "epoch": 0.4, "learning_rate": 0.0003007301612773811, "loss": 0.0694, "theoretical_loss": 3.417575492928279, "tokens_seen": 2317484032 }, { "epoch": 0.4, "learning_rate": 0.00030069004252587664, "loss": 0.0674, "theoretical_loss": 3.417559970648747, "tokens_seen": 2317615104 }, { "epoch": 0.4, "learning_rate": 0.00030064992377437216, "loss": 0.0645, "theoretical_loss": 3.4175444494928318, "tokens_seen": 2317746176 }, { "epoch": 0.4, "learning_rate": 0.0003006098050228677, "loss": 0.0652, "theoretical_loss": 3.4175289294603894, "tokens_seen": 2317877248 }, { "epoch": 0.4, "learning_rate": 0.00030056968627136324, "loss": 0.0659, "theoretical_loss": 3.4175134105512743, "tokens_seen": 2318008320 }, { "epoch": 0.4, "learning_rate": 0.00030052956751985875, "loss": 0.0678, "theoretical_loss": 3.4174978927653417, "tokens_seen": 2318139392 }, { "epoch": 0.41, "learning_rate": 0.0003004894487683543, "loss": 0.0705, "theoretical_loss": 3.417482376102447, "tokens_seen": 2318270464 }, { "epoch": 0.41, "objective/train/advantage_avg": 0.0001618694659555331, "objective/train/docs_used": 844398, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3777817487716675, "objective/train/original_loss": 1.3777817487716675, "objective/train/theoretical_loss": 3.4174668605624454, "objective/train/tokens_used": 688926176, "objective/train/value_avg": -0.006107330322265625, "objective/train/value_loss": 0.00025282177375629544, "objective/train/value_max": -3.510713577270508e-05, "objective/train/value_min": -0.67822265625, "objective/train/value_reward_corr": 0.6012164966054482, "objective/train/value_std": 0.012359619140625, "objective/train/weight_avg": 1.000273585319519, "objective/train/weighted_lm_loss": 1.3781646490097046, "objective/train/weights_max": 1.2796441316604614, "objective/train/weights_min": 0.39421960711479187, "theoretical_loss": 3.4174668605624454, "tokens_seen": 2318401536 }, { "epoch": 0.41, "learning_rate": 0.0003004493300168499, "loss": 0.0658, "theoretical_loss": 3.4174668605624454, "tokens_seen": 2318401536 }, { "epoch": 0.41, "learning_rate": 0.0003004092112653454, "loss": 0.0689, "theoretical_loss": 3.4174513461451923, "tokens_seen": 2318532608 }, { "epoch": 0.41, "learning_rate": 0.00030036909251384097, "loss": 0.0667, "theoretical_loss": 3.4174358328505425, "tokens_seen": 2318663680 }, { "epoch": 0.41, "learning_rate": 0.00030032897376233654, "loss": 0.0678, "theoretical_loss": 3.417420320678352, "tokens_seen": 2318794752 }, { "epoch": 0.41, "learning_rate": 0.0003002888550108321, "loss": 0.0704, "theoretical_loss": 3.4174048096284757, "tokens_seen": 2318925824 }, { "epoch": 0.41, "learning_rate": 0.0003002487362593276, "loss": 0.0702, "theoretical_loss": 3.417389299700769, "tokens_seen": 2319056896 }, { "epoch": 0.41, "learning_rate": 0.0003002086175078232, "loss": 0.0674, "theoretical_loss": 3.4173737908950876, "tokens_seen": 2319187968 }, { "epoch": 0.41, "learning_rate": 0.0003001684987563187, "loss": 0.0703, "theoretical_loss": 3.417358283211286, "tokens_seen": 2319319040 }, { "epoch": 0.41, "learning_rate": 0.0003001283800048142, "loss": 0.0658, "theoretical_loss": 3.417342776649221, "tokens_seen": 2319450112 }, { "epoch": 0.41, "learning_rate": 0.0003000882612533098, "loss": 0.0654, "theoretical_loss": 3.4173272712087472, "tokens_seen": 2319581184 }, { "epoch": 0.41, "learning_rate": 0.00030004814250180536, "loss": 0.0658, "theoretical_loss": 3.4173117668897204, "tokens_seen": 2319712256 }, { "epoch": 0.41, "learning_rate": 0.00030000802375030087, "loss": 0.0673, "theoretical_loss": 3.4172962636919957, "tokens_seen": 2319843328 }, { "epoch": 0.41, "learning_rate": 0.00029996790499879644, "loss": 0.0708, "theoretical_loss": 3.4172807616154293, "tokens_seen": 2319974400 }, { "epoch": 0.41, "learning_rate": 0.000299927786247292, "loss": 0.0693, "theoretical_loss": 3.4172652606598763, "tokens_seen": 2320105472 }, { "epoch": 0.41, "learning_rate": 0.0002998876674957876, "loss": 0.0678, "theoretical_loss": 3.417249760825193, "tokens_seen": 2320236544 }, { "epoch": 0.41, "learning_rate": 0.0002998475487442831, "loss": 0.0685, "theoretical_loss": 3.417234262111234, "tokens_seen": 2320367616 }, { "epoch": 0.41, "learning_rate": 0.00029980742999277866, "loss": 0.0671, "theoretical_loss": 3.417218764517856, "tokens_seen": 2320498688 }, { "epoch": 0.41, "learning_rate": 0.00029976731124127417, "loss": 0.068, "theoretical_loss": 3.4172032680449136, "tokens_seen": 2320629760 }, { "epoch": 0.41, "learning_rate": 0.0002997271924897697, "loss": 0.0676, "theoretical_loss": 3.4171877726922633, "tokens_seen": 2320760832 }, { "epoch": 0.41, "learning_rate": 0.00029968707373826526, "loss": 0.0637, "theoretical_loss": 3.417172278459761, "tokens_seen": 2320891904 }, { "epoch": 0.41, "learning_rate": 0.0002996469549867608, "loss": 0.0659, "theoretical_loss": 3.417156785347262, "tokens_seen": 2321022976 }, { "epoch": 0.41, "learning_rate": 0.00029960683623525634, "loss": 0.0673, "theoretical_loss": 3.4171412933546224, "tokens_seen": 2321154048 }, { "epoch": 0.41, "learning_rate": 0.0002995667174837519, "loss": 0.0675, "theoretical_loss": 3.4171258024816975, "tokens_seen": 2321285120 }, { "epoch": 0.41, "learning_rate": 0.0002995265987322475, "loss": 0.0654, "theoretical_loss": 3.4171103127283433, "tokens_seen": 2321416192 }, { "epoch": 0.41, "learning_rate": 0.00029948647998074304, "loss": 0.0665, "theoretical_loss": 3.4170948240944163, "tokens_seen": 2321547264 }, { "epoch": 0.41, "objective/train/advantage_avg": -0.0007399885798804462, "objective/train/docs_used": 845155, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.456810474395752, "objective/train/original_loss": 1.456810474395752, "objective/train/theoretical_loss": 3.417079336579772, "objective/train/tokens_used": 692202976, "objective/train/value_avg": -0.00867462158203125, "objective/train/value_loss": 0.00045225609210319817, "objective/train/value_max": -6.204843521118164e-05, "objective/train/value_min": -0.986328125, "objective/train/value_reward_corr": 0.7020698491609376, "objective/train/value_std": 0.01885986328125, "objective/train/weight_avg": 0.999454915523529, "objective/train/weighted_lm_loss": 1.4555225372314453, "objective/train/weights_max": 1.6228960752487183, "objective/train/weights_min": 0.24087868630886078, "theoretical_loss": 3.417079336579772, "tokens_seen": 2321678336 }, { "epoch": 0.41, "learning_rate": 0.00029944636122923856, "loss": 0.0683, "theoretical_loss": 3.417079336579772, "tokens_seen": 2321678336 }, { "epoch": 0.41, "learning_rate": 0.0002994062424777341, "loss": 0.0644, "theoretical_loss": 3.4170638501842663, "tokens_seen": 2321809408 }, { "epoch": 0.41, "learning_rate": 0.00029936612372622964, "loss": 0.0689, "theoretical_loss": 3.417048364907755, "tokens_seen": 2321940480 }, { "epoch": 0.41, "learning_rate": 0.00029932600497472515, "loss": 0.0688, "theoretical_loss": 3.417032880750094, "tokens_seen": 2322071552 }, { "epoch": 0.41, "learning_rate": 0.0002992858862232207, "loss": 0.0685, "theoretical_loss": 3.4170173977111404, "tokens_seen": 2322202624 }, { "epoch": 0.41, "learning_rate": 0.0002992457674717163, "loss": 0.068, "theoretical_loss": 3.417001915790749, "tokens_seen": 2322333696 }, { "epoch": 0.41, "learning_rate": 0.0002992056487202118, "loss": 0.0656, "theoretical_loss": 3.416986434988776, "tokens_seen": 2322464768 }, { "epoch": 0.41, "learning_rate": 0.0002991655299687074, "loss": 0.0656, "theoretical_loss": 3.416970955305078, "tokens_seen": 2322595840 }, { "epoch": 0.41, "learning_rate": 0.00029912541121720294, "loss": 0.0638, "theoretical_loss": 3.416955476739511, "tokens_seen": 2322726912 }, { "epoch": 0.41, "learning_rate": 0.0002990852924656985, "loss": 0.0663, "theoretical_loss": 3.416939999291931, "tokens_seen": 2322857984 }, { "epoch": 0.41, "learning_rate": 0.000299045173714194, "loss": 0.0648, "theoretical_loss": 3.4169245229621943, "tokens_seen": 2322989056 }, { "epoch": 0.41, "learning_rate": 0.0002990050549626896, "loss": 0.068, "theoretical_loss": 3.416909047750157, "tokens_seen": 2323120128 }, { "epoch": 0.41, "learning_rate": 0.0002989649362111851, "loss": 0.068, "theoretical_loss": 3.4168935736556754, "tokens_seen": 2323251200 }, { "epoch": 0.41, "learning_rate": 0.0002989248174596806, "loss": 0.0648, "theoretical_loss": 3.4168781006786055, "tokens_seen": 2323382272 }, { "epoch": 0.41, "learning_rate": 0.0002988846987081762, "loss": 0.0681, "theoretical_loss": 3.4168626288188038, "tokens_seen": 2323513344 }, { "epoch": 0.41, "learning_rate": 0.00029884457995667176, "loss": 0.0712, "theoretical_loss": 3.4168471580761266, "tokens_seen": 2323644416 }, { "epoch": 0.41, "learning_rate": 0.0002988044612051673, "loss": 0.0691, "theoretical_loss": 3.41683168845043, "tokens_seen": 2323775488 }, { "epoch": 0.41, "learning_rate": 0.00029876434245366284, "loss": 0.0705, "theoretical_loss": 3.416816219941571, "tokens_seen": 2323906560 }, { "epoch": 0.41, "learning_rate": 0.0002987242237021584, "loss": 0.0675, "theoretical_loss": 3.4168007525494053, "tokens_seen": 2324037632 }, { "epoch": 0.41, "learning_rate": 0.000298684104950654, "loss": 0.0678, "theoretical_loss": 3.4167852862737895, "tokens_seen": 2324168704 }, { "epoch": 0.41, "learning_rate": 0.0002986439861991495, "loss": 0.0666, "theoretical_loss": 3.4167698211145803, "tokens_seen": 2324299776 }, { "epoch": 0.41, "learning_rate": 0.00029860386744764506, "loss": 0.069, "theoretical_loss": 3.4167543570716337, "tokens_seen": 2324430848 }, { "epoch": 0.41, "learning_rate": 0.0002985637486961406, "loss": 0.0647, "theoretical_loss": 3.4167388941448067, "tokens_seen": 2324561920 }, { "epoch": 0.41, "learning_rate": 0.0002985236299446361, "loss": 0.0688, "theoretical_loss": 3.416723432333956, "tokens_seen": 2324692992 }, { "epoch": 0.41, "learning_rate": 0.00029848351119313166, "loss": 0.0678, "theoretical_loss": 3.416707971638937, "tokens_seen": 2324824064 }, { "epoch": 0.41, "objective/train/advantage_avg": 0.0005708211101591587, "objective/train/docs_used": 846164, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.1567628383636475, "objective/train/original_loss": 1.1567628383636475, "objective/train/theoretical_loss": 3.416692512059607, "objective/train/tokens_used": 695479776, "objective/train/value_avg": -0.01166534423828125, "objective/train/value_loss": 0.0002669969398993999, "objective/train/value_max": -3.1948089599609375e-05, "objective/train/value_min": -0.96728515625, "objective/train/value_reward_corr": 0.7494831591997778, "objective/train/value_std": 0.019317626953125, "objective/train/weight_avg": 1.0006908178329468, "objective/train/weighted_lm_loss": 1.1565965414047241, "objective/train/weights_max": 1.2148007154464722, "objective/train/weights_min": 0.24090808629989624, "theoretical_loss": 3.416692512059607, "tokens_seen": 2324955136 }, { "epoch": 0.41, "learning_rate": 0.0002984433924416272, "loss": 0.0671, "theoretical_loss": 3.416692512059607, "tokens_seen": 2324955136 }, { "epoch": 0.41, "learning_rate": 0.0002984032736901228, "loss": 0.0635, "theoretical_loss": 3.416677053595823, "tokens_seen": 2325086208 }, { "epoch": 0.41, "learning_rate": 0.0002983631549386183, "loss": 0.0682, "theoretical_loss": 3.416661596247441, "tokens_seen": 2325217280 }, { "epoch": 0.41, "learning_rate": 0.0002983230361871139, "loss": 0.0697, "theoretical_loss": 3.4166461400143175, "tokens_seen": 2325348352 }, { "epoch": 0.41, "learning_rate": 0.00029828291743560944, "loss": 0.0672, "theoretical_loss": 3.41663068489631, "tokens_seen": 2325479424 }, { "epoch": 0.41, "learning_rate": 0.00029824279868410496, "loss": 0.0682, "theoretical_loss": 3.416615230893275, "tokens_seen": 2325610496 }, { "epoch": 0.41, "learning_rate": 0.0002982026799326005, "loss": 0.0681, "theoretical_loss": 3.4165997780050685, "tokens_seen": 2325741568 }, { "epoch": 0.41, "learning_rate": 0.00029816256118109604, "loss": 0.0732, "theoretical_loss": 3.416584326231548, "tokens_seen": 2325872640 }, { "epoch": 0.41, "learning_rate": 0.00029812244242959156, "loss": 0.0664, "theoretical_loss": 3.41656887557257, "tokens_seen": 2326003712 }, { "epoch": 0.41, "learning_rate": 0.0002980823236780871, "loss": 0.0675, "theoretical_loss": 3.416553426027991, "tokens_seen": 2326134784 }, { "epoch": 0.41, "learning_rate": 0.0002980422049265827, "loss": 0.0667, "theoretical_loss": 3.416537977597668, "tokens_seen": 2326265856 }, { "epoch": 0.41, "learning_rate": 0.00029800208617507826, "loss": 0.0655, "theoretical_loss": 3.4165225302814584, "tokens_seen": 2326396928 }, { "epoch": 0.41, "learning_rate": 0.0002979619674235738, "loss": 0.0665, "theoretical_loss": 3.4165070840792184, "tokens_seen": 2326528000 }, { "epoch": 0.41, "learning_rate": 0.00029792184867206934, "loss": 0.0669, "theoretical_loss": 3.4164916389908058, "tokens_seen": 2326659072 }, { "epoch": 0.41, "learning_rate": 0.0002978817299205649, "loss": 0.0653, "theoretical_loss": 3.4164761950160765, "tokens_seen": 2326790144 }, { "epoch": 0.41, "learning_rate": 0.0002978416111690604, "loss": 0.0684, "theoretical_loss": 3.416460752154888, "tokens_seen": 2326921216 }, { "epoch": 0.41, "learning_rate": 0.000297801492417556, "loss": 0.0681, "theoretical_loss": 3.4164453104070973, "tokens_seen": 2327052288 }, { "epoch": 0.41, "learning_rate": 0.0002977613736660515, "loss": 0.0656, "theoretical_loss": 3.4164298697725615, "tokens_seen": 2327183360 }, { "epoch": 0.41, "learning_rate": 0.000297721254914547, "loss": 0.0692, "theoretical_loss": 3.416414430251137, "tokens_seen": 2327314432 }, { "epoch": 0.41, "learning_rate": 0.0002976811361630426, "loss": 0.0612, "theoretical_loss": 3.416398991842682, "tokens_seen": 2327445504 }, { "epoch": 0.41, "learning_rate": 0.00029764101741153816, "loss": 0.0718, "theoretical_loss": 3.4163835545470524, "tokens_seen": 2327576576 }, { "epoch": 0.41, "learning_rate": 0.00029760089866003373, "loss": 0.0662, "theoretical_loss": 3.4163681183641064, "tokens_seen": 2327707648 }, { "epoch": 0.41, "learning_rate": 0.00029756077990852924, "loss": 0.0662, "theoretical_loss": 3.4163526832937, "tokens_seen": 2327838720 }, { "epoch": 0.41, "learning_rate": 0.0002975206611570248, "loss": 0.0639, "theoretical_loss": 3.416337249335691, "tokens_seen": 2327969792 }, { "epoch": 0.41, "learning_rate": 0.0002974805424055204, "loss": 0.0718, "theoretical_loss": 3.4163218164899374, "tokens_seen": 2328100864 }, { "epoch": 0.41, "objective/train/advantage_avg": 0.00042356978519819677, "objective/train/docs_used": 847468, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.223412275314331, "objective/train/original_loss": 1.223412275314331, "objective/train/theoretical_loss": 3.4163063847562953, "objective/train/tokens_used": 698756576, "objective/train/value_avg": -0.00667572021484375, "objective/train/value_loss": 0.00013261278218124062, "objective/train/value_max": -4.83393669128418e-05, "objective/train/value_min": -0.42333984375, "objective/train/value_reward_corr": 0.8046092212840682, "objective/train/value_std": 0.015838623046875, "objective/train/weight_avg": 1.0004891157150269, "objective/train/weighted_lm_loss": 1.224427580833435, "objective/train/weights_max": 1.4233700037002563, "objective/train/weights_min": 0.6627775430679321, "theoretical_loss": 3.4163063847562953, "tokens_seen": 2328231936 }, { "epoch": 0.41, "learning_rate": 0.0002974404236540159, "loss": 0.0652, "theoretical_loss": 3.4163063847562953, "tokens_seen": 2328231936 }, { "epoch": 0.41, "learning_rate": 0.00029740030490251146, "loss": 0.0677, "theoretical_loss": 3.416290954134622, "tokens_seen": 2328363008 }, { "epoch": 0.41, "learning_rate": 0.000297360186151007, "loss": 0.0656, "theoretical_loss": 3.416275524624776, "tokens_seen": 2328494080 }, { "epoch": 0.41, "learning_rate": 0.0002973200673995025, "loss": 0.064, "theoretical_loss": 3.4162600962266128, "tokens_seen": 2328625152 }, { "epoch": 0.41, "learning_rate": 0.00029727994864799806, "loss": 0.0707, "theoretical_loss": 3.416244668939991, "tokens_seen": 2328756224 }, { "epoch": 0.41, "learning_rate": 0.0002972398298964936, "loss": 0.0683, "theoretical_loss": 3.4162292427647674, "tokens_seen": 2328887296 }, { "epoch": 0.41, "learning_rate": 0.0002971997111449892, "loss": 0.0674, "theoretical_loss": 3.4162138177007995, "tokens_seen": 2329018368 }, { "epoch": 0.41, "learning_rate": 0.0002971595923934847, "loss": 0.0666, "theoretical_loss": 3.4161983937479454, "tokens_seen": 2329149440 }, { "epoch": 0.41, "learning_rate": 0.0002971194736419803, "loss": 0.0612, "theoretical_loss": 3.4161829709060614, "tokens_seen": 2329280512 }, { "epoch": 0.41, "learning_rate": 0.00029707935489047585, "loss": 0.0656, "theoretical_loss": 3.4161675491750056, "tokens_seen": 2329411584 }, { "epoch": 0.41, "learning_rate": 0.00029703923613897136, "loss": 0.0687, "theoretical_loss": 3.4161521285546357, "tokens_seen": 2329542656 }, { "epoch": 0.41, "learning_rate": 0.00029699911738746693, "loss": 0.0659, "theoretical_loss": 3.416136709044809, "tokens_seen": 2329673728 }, { "epoch": 0.41, "learning_rate": 0.00029695899863596244, "loss": 0.0732, "theoretical_loss": 3.416121290645383, "tokens_seen": 2329804800 }, { "epoch": 0.41, "learning_rate": 0.00029691887988445796, "loss": 0.0683, "theoretical_loss": 3.416105873356215, "tokens_seen": 2329935872 }, { "epoch": 0.41, "learning_rate": 0.0002968787611329535, "loss": 0.0635, "theoretical_loss": 3.416090457177163, "tokens_seen": 2330066944 }, { "epoch": 0.41, "learning_rate": 0.0002968386423814491, "loss": 0.0703, "theoretical_loss": 3.4160750421080843, "tokens_seen": 2330198016 }, { "epoch": 0.41, "learning_rate": 0.00029679852362994466, "loss": 0.0707, "theoretical_loss": 3.4160596281488367, "tokens_seen": 2330329088 }, { "epoch": 0.41, "learning_rate": 0.0002967584048784402, "loss": 0.0703, "theoretical_loss": 3.4160442152992783, "tokens_seen": 2330460160 }, { "epoch": 0.41, "learning_rate": 0.00029671828612693574, "loss": 0.0648, "theoretical_loss": 3.416028803559266, "tokens_seen": 2330591232 }, { "epoch": 0.41, "learning_rate": 0.0002966781673754313, "loss": 0.0694, "theoretical_loss": 3.4160133929286585, "tokens_seen": 2330722304 }, { "epoch": 0.41, "learning_rate": 0.0002966380486239268, "loss": 0.0709, "theoretical_loss": 3.415997983407313, "tokens_seen": 2330853376 }, { "epoch": 0.41, "learning_rate": 0.0002965979298724224, "loss": 0.0636, "theoretical_loss": 3.4159825749950867, "tokens_seen": 2330984448 }, { "epoch": 0.41, "learning_rate": 0.0002965578111209179, "loss": 0.0705, "theoretical_loss": 3.4159671676918384, "tokens_seen": 2331115520 }, { "epoch": 0.41, "learning_rate": 0.0002965176923694134, "loss": 0.0651, "theoretical_loss": 3.4159517614974257, "tokens_seen": 2331246592 }, { "epoch": 0.41, "learning_rate": 0.000296477573617909, "loss": 0.0646, "theoretical_loss": 3.415936356411706, "tokens_seen": 2331377664 }, { "epoch": 0.41, "objective/train/advantage_avg": 0.0010579488007351756, "objective/train/docs_used": 848727, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.52366042137146, "objective/train/original_loss": 1.52366042137146, "objective/train/theoretical_loss": 3.4159209524345373, "objective/train/tokens_used": 702033376, "objective/train/value_avg": -0.010223388671875, "objective/train/value_loss": 0.00021021277643740177, "objective/train/value_max": -3.170967102050781e-05, "objective/train/value_min": -0.8525390625, "objective/train/value_reward_corr": 0.7513121915662979, "objective/train/value_std": 0.017242431640625, "objective/train/weight_avg": 1.0011595487594604, "objective/train/weighted_lm_loss": 1.5246429443359375, "objective/train/weights_max": 1.6090333461761475, "objective/train/weights_min": 0.3933543562889099, "theoretical_loss": 3.4159209524345373, "tokens_seen": 2331508736 }, { "epoch": 0.41, "learning_rate": 0.00029643745486640456, "loss": 0.0727, "theoretical_loss": 3.4159209524345373, "tokens_seen": 2331508736 }, { "epoch": 0.41, "learning_rate": 0.00029639733611490013, "loss": 0.0652, "theoretical_loss": 3.415905549565778, "tokens_seen": 2331639808 }, { "epoch": 0.41, "learning_rate": 0.00029635721736339564, "loss": 0.0652, "theoretical_loss": 3.4158901478052863, "tokens_seen": 2331770880 }, { "epoch": 0.41, "learning_rate": 0.0002963170986118912, "loss": 0.0705, "theoretical_loss": 3.4158747471529187, "tokens_seen": 2331901952 }, { "epoch": 0.41, "learning_rate": 0.0002962769798603868, "loss": 0.0677, "theoretical_loss": 3.4158593476085346, "tokens_seen": 2332033024 }, { "epoch": 0.41, "learning_rate": 0.0002962368611088823, "loss": 0.0658, "theoretical_loss": 3.4158439491719914, "tokens_seen": 2332164096 }, { "epoch": 0.41, "learning_rate": 0.00029619674235737786, "loss": 0.0664, "theoretical_loss": 3.4158285518431475, "tokens_seen": 2332295168 }, { "epoch": 0.41, "learning_rate": 0.0002961566236058734, "loss": 0.067, "theoretical_loss": 3.4158131556218607, "tokens_seen": 2332426240 }, { "epoch": 0.41, "learning_rate": 0.00029611650485436894, "loss": 0.0668, "theoretical_loss": 3.4157977605079894, "tokens_seen": 2332557312 }, { "epoch": 0.41, "learning_rate": 0.00029607638610286446, "loss": 0.0704, "theoretical_loss": 3.415782366501391, "tokens_seen": 2332688384 }, { "epoch": 0.41, "learning_rate": 0.00029603626735136, "loss": 0.0644, "theoretical_loss": 3.415766973601924, "tokens_seen": 2332819456 }, { "epoch": 0.41, "learning_rate": 0.0002959961485998556, "loss": 0.0654, "theoretical_loss": 3.415751581809447, "tokens_seen": 2332950528 }, { "epoch": 0.41, "learning_rate": 0.0002959560298483511, "loss": 0.0672, "theoretical_loss": 3.415736191123818, "tokens_seen": 2333081600 }, { "epoch": 0.41, "learning_rate": 0.0002959159110968467, "loss": 0.067, "theoretical_loss": 3.415720801544895, "tokens_seen": 2333212672 }, { "epoch": 0.41, "learning_rate": 0.00029587579234534225, "loss": 0.0669, "theoretical_loss": 3.4157054130725366, "tokens_seen": 2333343744 }, { "epoch": 0.41, "learning_rate": 0.00029583567359383776, "loss": 0.0693, "theoretical_loss": 3.415690025706601, "tokens_seen": 2333474816 }, { "epoch": 0.41, "learning_rate": 0.00029579555484233333, "loss": 0.0646, "theoretical_loss": 3.415674639446946, "tokens_seen": 2333605888 }, { "epoch": 0.41, "learning_rate": 0.00029575543609082884, "loss": 0.0669, "theoretical_loss": 3.4156592542934305, "tokens_seen": 2333736960 }, { "epoch": 0.41, "learning_rate": 0.0002957153173393244, "loss": 0.0674, "theoretical_loss": 3.415643870245913, "tokens_seen": 2333868032 }, { "epoch": 0.41, "learning_rate": 0.0002956751985878199, "loss": 0.067, "theoretical_loss": 3.415628487304251, "tokens_seen": 2333999104 }, { "epoch": 0.41, "learning_rate": 0.0002956350798363155, "loss": 0.0691, "theoretical_loss": 3.4156131054683034, "tokens_seen": 2334130176 }, { "epoch": 0.41, "learning_rate": 0.00029559496108481106, "loss": 0.0633, "theoretical_loss": 3.415597724737929, "tokens_seen": 2334261248 }, { "epoch": 0.41, "learning_rate": 0.0002955548423333066, "loss": 0.0661, "theoretical_loss": 3.415582345112986, "tokens_seen": 2334392320 }, { "epoch": 0.41, "learning_rate": 0.00029551472358180215, "loss": 0.0667, "theoretical_loss": 3.415566966593333, "tokens_seen": 2334523392 }, { "epoch": 0.41, "learning_rate": 0.0002954746048302977, "loss": 0.0682, "theoretical_loss": 3.415551589178828, "tokens_seen": 2334654464 }, { "epoch": 0.41, "objective/train/advantage_avg": 1.4060563444218133e-05, "objective/train/docs_used": 849886, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3074522018432617, "objective/train/original_loss": 1.3074522018432617, "objective/train/theoretical_loss": 3.4155362128693305, "objective/train/tokens_used": 705310176, "objective/train/value_avg": -0.008514404296875, "objective/train/value_loss": 0.00028260028921067715, "objective/train/value_max": -4.398822784423828e-05, "objective/train/value_min": -0.9873046875, "objective/train/value_reward_corr": 0.9017063224057861, "objective/train/value_std": 0.035675048828125, "objective/train/weight_avg": 1.0001401901245117, "objective/train/weighted_lm_loss": 1.306518316268921, "objective/train/weights_max": 1.3191113471984863, "objective/train/weights_min": 0.3959558308124542, "theoretical_loss": 3.4155362128693305, "tokens_seen": 2334785536 }, { "epoch": 0.42, "learning_rate": 0.00029543448607879323, "loss": 0.069, "theoretical_loss": 3.4155362128693305, "tokens_seen": 2334785536 }, { "epoch": 0.42, "learning_rate": 0.0002953943673272888, "loss": 0.0671, "theoretical_loss": 3.415520837664698, "tokens_seen": 2334916608 }, { "epoch": 0.42, "learning_rate": 0.0002953542485757843, "loss": 0.0638, "theoretical_loss": 3.4155054635647897, "tokens_seen": 2335047680 }, { "epoch": 0.42, "learning_rate": 0.0002953141298242799, "loss": 0.0665, "theoretical_loss": 3.415490090569464, "tokens_seen": 2335178752 }, { "epoch": 0.42, "learning_rate": 0.0002952740110727754, "loss": 0.0714, "theoretical_loss": 3.41547471867858, "tokens_seen": 2335309824 }, { "epoch": 0.42, "learning_rate": 0.00029523389232127096, "loss": 0.0687, "theoretical_loss": 3.415459347891996, "tokens_seen": 2335440896 }, { "epoch": 0.42, "learning_rate": 0.00029519377356976653, "loss": 0.0666, "theoretical_loss": 3.4154439782095705, "tokens_seen": 2335571968 }, { "epoch": 0.42, "learning_rate": 0.00029515365481826204, "loss": 0.066, "theoretical_loss": 3.4154286096311623, "tokens_seen": 2335703040 }, { "epoch": 0.42, "learning_rate": 0.0002951135360667576, "loss": 0.0684, "theoretical_loss": 3.415413242156631, "tokens_seen": 2335834112 }, { "epoch": 0.42, "learning_rate": 0.0002950734173152532, "loss": 0.0688, "theoretical_loss": 3.4153978757858336, "tokens_seen": 2335965184 }, { "epoch": 0.42, "learning_rate": 0.0002950332985637487, "loss": 0.0654, "theoretical_loss": 3.4153825105186306, "tokens_seen": 2336096256 }, { "epoch": 0.42, "learning_rate": 0.00029499317981224426, "loss": 0.0678, "theoretical_loss": 3.41536714635488, "tokens_seen": 2336227328 }, { "epoch": 0.42, "learning_rate": 0.0002949530610607398, "loss": 0.0641, "theoretical_loss": 3.415351783294441, "tokens_seen": 2336358400 }, { "epoch": 0.42, "learning_rate": 0.00029491294230923535, "loss": 0.0692, "theoretical_loss": 3.4153364213371726, "tokens_seen": 2336489472 }, { "epoch": 0.42, "learning_rate": 0.00029487282355773086, "loss": 0.0649, "theoretical_loss": 3.415321060482933, "tokens_seen": 2336620544 }, { "epoch": 0.42, "learning_rate": 0.00029483270480622643, "loss": 0.0684, "theoretical_loss": 3.415305700731582, "tokens_seen": 2336751616 }, { "epoch": 0.42, "learning_rate": 0.000294792586054722, "loss": 0.0662, "theoretical_loss": 3.415290342082978, "tokens_seen": 2336882688 }, { "epoch": 0.42, "learning_rate": 0.0002947524673032175, "loss": 0.0691, "theoretical_loss": 3.41527498453698, "tokens_seen": 2337013760 }, { "epoch": 0.42, "learning_rate": 0.0002947123485517131, "loss": 0.0677, "theoretical_loss": 3.4152596280934473, "tokens_seen": 2337144832 }, { "epoch": 0.42, "learning_rate": 0.00029467222980020865, "loss": 0.0687, "theoretical_loss": 3.4152442727522385, "tokens_seen": 2337275904 }, { "epoch": 0.42, "learning_rate": 0.00029463211104870416, "loss": 0.0708, "theoretical_loss": 3.415228918513213, "tokens_seen": 2337406976 }, { "epoch": 0.42, "learning_rate": 0.00029459199229719973, "loss": 0.0685, "theoretical_loss": 3.41521356537623, "tokens_seen": 2337538048 }, { "epoch": 0.42, "learning_rate": 0.00029455187354569524, "loss": 0.0651, "theoretical_loss": 3.4151982133411485, "tokens_seen": 2337669120 }, { "epoch": 0.42, "learning_rate": 0.0002945117547941908, "loss": 0.0706, "theoretical_loss": 3.415182862407827, "tokens_seen": 2337800192 }, { "epoch": 0.42, "learning_rate": 0.0002944716360426863, "loss": 0.0704, "theoretical_loss": 3.415167512576126, "tokens_seen": 2337931264 }, { "epoch": 0.42, "objective/train/advantage_avg": -0.0004972875467501581, "objective/train/docs_used": 851124, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.377321481704712, "objective/train/original_loss": 1.3773213624954224, "objective/train/theoretical_loss": 3.4151521638459035, "objective/train/tokens_used": 708586976, "objective/train/value_avg": -0.00743865966796875, "objective/train/value_loss": 0.0005390208680182695, "objective/train/value_max": -4.267692565917969e-05, "objective/train/value_min": -0.66748046875, "objective/train/value_reward_corr": 0.6054687264983623, "objective/train/value_std": 0.0149383544921875, "objective/train/weight_avg": 0.9997267723083496, "objective/train/weighted_lm_loss": 1.376246452331543, "objective/train/weights_max": 1.3618415594100952, "objective/train/weights_min": 0.36883002519607544, "theoretical_loss": 3.4151521638459035, "tokens_seen": 2338062336 }, { "epoch": 0.42, "learning_rate": 0.0002944315172911819, "loss": 0.0667, "theoretical_loss": 3.4151521638459035, "tokens_seen": 2338062336 }, { "epoch": 0.42, "learning_rate": 0.00029439139853967746, "loss": 0.0667, "theoretical_loss": 3.4151368162170193, "tokens_seen": 2338193408 }, { "epoch": 0.42, "learning_rate": 0.000294351279788173, "loss": 0.0639, "theoretical_loss": 3.4151214696893324, "tokens_seen": 2338324480 }, { "epoch": 0.42, "learning_rate": 0.00029431116103666855, "loss": 0.0677, "theoretical_loss": 3.415106124262702, "tokens_seen": 2338455552 }, { "epoch": 0.42, "learning_rate": 0.0002942710422851641, "loss": 0.0668, "theoretical_loss": 3.4150907799369876, "tokens_seen": 2338586624 }, { "epoch": 0.42, "learning_rate": 0.00029423092353365963, "loss": 0.0722, "theoretical_loss": 3.4150754367120486, "tokens_seen": 2338717696 }, { "epoch": 0.42, "learning_rate": 0.0002941908047821552, "loss": 0.0678, "theoretical_loss": 3.4150600945877443, "tokens_seen": 2338848768 }, { "epoch": 0.42, "learning_rate": 0.0002941506860306507, "loss": 0.0687, "theoretical_loss": 3.415044753563934, "tokens_seen": 2338979840 }, { "epoch": 0.42, "learning_rate": 0.0002941105672791463, "loss": 0.0637, "theoretical_loss": 3.415029413640477, "tokens_seen": 2339110912 }, { "epoch": 0.42, "learning_rate": 0.0002940704485276418, "loss": 0.0678, "theoretical_loss": 3.4150140748172326, "tokens_seen": 2339241984 }, { "epoch": 0.42, "learning_rate": 0.00029403032977613736, "loss": 0.0681, "theoretical_loss": 3.414998737094061, "tokens_seen": 2339373056 }, { "epoch": 0.42, "learning_rate": 0.00029399021102463293, "loss": 0.0682, "theoretical_loss": 3.4149834004708204, "tokens_seen": 2339504128 }, { "epoch": 0.42, "learning_rate": 0.00029395009227312844, "loss": 0.0658, "theoretical_loss": 3.414968064947372, "tokens_seen": 2339635200 }, { "epoch": 0.42, "learning_rate": 0.000293909973521624, "loss": 0.0637, "theoretical_loss": 3.4149527305235736, "tokens_seen": 2339766272 }, { "epoch": 0.42, "learning_rate": 0.0002938698547701196, "loss": 0.0653, "theoretical_loss": 3.4149373971992856, "tokens_seen": 2339897344 }, { "epoch": 0.42, "learning_rate": 0.00029382973601861515, "loss": 0.0677, "theoretical_loss": 3.414922064974368, "tokens_seen": 2340028416 }, { "epoch": 0.42, "learning_rate": 0.00029378961726711066, "loss": 0.0705, "theoretical_loss": 3.4149067338486794, "tokens_seen": 2340159488 }, { "epoch": 0.42, "learning_rate": 0.0002937494985156062, "loss": 0.0717, "theoretical_loss": 3.41489140382208, "tokens_seen": 2340290560 }, { "epoch": 0.42, "learning_rate": 0.00029370937976410175, "loss": 0.0669, "theoretical_loss": 3.4148760748944293, "tokens_seen": 2340421632 }, { "epoch": 0.42, "learning_rate": 0.00029366926101259726, "loss": 0.0692, "theoretical_loss": 3.414860747065587, "tokens_seen": 2340552704 }, { "epoch": 0.42, "learning_rate": 0.00029362914226109283, "loss": 0.0723, "theoretical_loss": 3.4148454203354133, "tokens_seen": 2340683776 }, { "epoch": 0.42, "learning_rate": 0.0002935890235095884, "loss": 0.0657, "theoretical_loss": 3.4148300947037677, "tokens_seen": 2340814848 }, { "epoch": 0.42, "learning_rate": 0.0002935489047580839, "loss": 0.0712, "theoretical_loss": 3.4148147701705094, "tokens_seen": 2340945920 }, { "epoch": 0.42, "learning_rate": 0.0002935087860065795, "loss": 0.0682, "theoretical_loss": 3.4147994467354987, "tokens_seen": 2341076992 }, { "epoch": 0.42, "learning_rate": 0.00029346866725507505, "loss": 0.0702, "theoretical_loss": 3.414784124398595, "tokens_seen": 2341208064 }, { "epoch": 0.42, "objective/train/advantage_avg": 0.000883105443790555, "objective/train/docs_used": 852174, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.370391607284546, "objective/train/original_loss": 1.3703913688659668, "objective/train/theoretical_loss": 3.414768803159659, "objective/train/tokens_used": 711863776, "objective/train/value_avg": -0.005092620849609375, "objective/train/value_loss": 0.00011912696209037676, "objective/train/value_max": -4.988908767700195e-05, "objective/train/value_min": -0.2626953125, "objective/train/value_reward_corr": 0.6348136630080949, "objective/train/value_std": 0.00907135009765625, "objective/train/weight_avg": 1.000938057899475, "objective/train/weighted_lm_loss": 1.3716849088668823, "objective/train/weights_max": 1.207998514175415, "objective/train/weights_min": 0.40907374024391174, "theoretical_loss": 3.414768803159659, "tokens_seen": 2341339136 }, { "epoch": 0.42, "learning_rate": 0.0002934285485035706, "loss": 0.0677, "theoretical_loss": 3.414768803159659, "tokens_seen": 2341339136 }, { "epoch": 0.42, "learning_rate": 0.00029338842975206613, "loss": 0.0684, "theoretical_loss": 3.4147534830185493, "tokens_seen": 2341470208 }, { "epoch": 0.42, "learning_rate": 0.0002933483110005617, "loss": 0.0676, "theoretical_loss": 3.4147381639751266, "tokens_seen": 2341601280 }, { "epoch": 0.42, "learning_rate": 0.0002933081922490572, "loss": 0.0672, "theoretical_loss": 3.414722846029251, "tokens_seen": 2341732352 }, { "epoch": 0.42, "learning_rate": 0.00029326807349755273, "loss": 0.0694, "theoretical_loss": 3.414707529180782, "tokens_seen": 2341863424 }, { "epoch": 0.42, "learning_rate": 0.0002932279547460483, "loss": 0.0632, "theoretical_loss": 3.4146922134295794, "tokens_seen": 2341994496 }, { "epoch": 0.42, "learning_rate": 0.00029318783599454386, "loss": 0.0664, "theoretical_loss": 3.414676898775504, "tokens_seen": 2342125568 }, { "epoch": 0.42, "learning_rate": 0.0002931477172430394, "loss": 0.0699, "theoretical_loss": 3.414661585218415, "tokens_seen": 2342256640 }, { "epoch": 0.42, "learning_rate": 0.00029310759849153495, "loss": 0.0659, "theoretical_loss": 3.4146462727581723, "tokens_seen": 2342387712 }, { "epoch": 0.42, "learning_rate": 0.0002930674797400305, "loss": 0.0695, "theoretical_loss": 3.4146309613946366, "tokens_seen": 2342518784 }, { "epoch": 0.42, "learning_rate": 0.0002930273609885261, "loss": 0.0669, "theoretical_loss": 3.414615651127668, "tokens_seen": 2342649856 }, { "epoch": 0.42, "learning_rate": 0.0002929872422370216, "loss": 0.0699, "theoretical_loss": 3.4146003419571267, "tokens_seen": 2342780928 }, { "epoch": 0.42, "learning_rate": 0.00029294712348551717, "loss": 0.0669, "theoretical_loss": 3.4145850338828723, "tokens_seen": 2342912000 }, { "epoch": 0.42, "learning_rate": 0.0002929070047340127, "loss": 0.0698, "theoretical_loss": 3.414569726904765, "tokens_seen": 2343043072 }, { "epoch": 0.42, "learning_rate": 0.0002928668859825082, "loss": 0.0705, "theoretical_loss": 3.4145544210226655, "tokens_seen": 2343174144 }, { "epoch": 0.42, "learning_rate": 0.00029282676723100376, "loss": 0.0668, "theoretical_loss": 3.414539116236434, "tokens_seen": 2343305216 }, { "epoch": 0.42, "learning_rate": 0.00029278664847949933, "loss": 0.069, "theoretical_loss": 3.41452381254593, "tokens_seen": 2343436288 }, { "epoch": 0.42, "learning_rate": 0.00029274652972799485, "loss": 0.0673, "theoretical_loss": 3.4145085099510144, "tokens_seen": 2343567360 }, { "epoch": 0.42, "learning_rate": 0.0002927064109764904, "loss": 0.0669, "theoretical_loss": 3.4144932084515474, "tokens_seen": 2343698432 }, { "epoch": 0.42, "learning_rate": 0.000292666292224986, "loss": 0.0693, "theoretical_loss": 3.4144779080473895, "tokens_seen": 2343829504 }, { "epoch": 0.42, "learning_rate": 0.00029262617347348155, "loss": 0.0667, "theoretical_loss": 3.4144626087384005, "tokens_seen": 2343960576 }, { "epoch": 0.42, "learning_rate": 0.00029258605472197707, "loss": 0.0649, "theoretical_loss": 3.4144473105244413, "tokens_seen": 2344091648 }, { "epoch": 0.42, "learning_rate": 0.00029254593597047263, "loss": 0.0669, "theoretical_loss": 3.414432013405372, "tokens_seen": 2344222720 }, { "epoch": 0.42, "learning_rate": 0.00029250581721896815, "loss": 0.0658, "theoretical_loss": 3.4144167173810533, "tokens_seen": 2344353792 }, { "epoch": 0.42, "learning_rate": 0.00029246569846746366, "loss": 0.0686, "theoretical_loss": 3.4144014224513457, "tokens_seen": 2344484864 }, { "epoch": 0.42, "objective/train/advantage_avg": -0.00028078421019017696, "objective/train/docs_used": 853299, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2762986421585083, "objective/train/original_loss": 1.2762985229492188, "objective/train/theoretical_loss": 3.414386128616109, "objective/train/tokens_used": 715140576, "objective/train/value_avg": -0.007602691650390625, "objective/train/value_loss": 0.00017296598525717854, "objective/train/value_max": -2.6702880859375e-05, "objective/train/value_min": -0.265380859375, "objective/train/value_reward_corr": 0.7668798459498578, "objective/train/value_std": 0.01363372802734375, "objective/train/weight_avg": 0.9997972249984741, "objective/train/weighted_lm_loss": 1.2766621112823486, "objective/train/weights_max": 1.0678812265396118, "objective/train/weights_min": 0.3732737898826599, "theoretical_loss": 3.414386128616109, "tokens_seen": 2344615936 }, { "epoch": 0.42, "learning_rate": 0.00029242557971595923, "loss": 0.065, "theoretical_loss": 3.414386128616109, "tokens_seen": 2344615936 }, { "epoch": 0.42, "learning_rate": 0.0002923854609644548, "loss": 0.0661, "theoretical_loss": 3.4143708358752045, "tokens_seen": 2344747008 }, { "epoch": 0.42, "learning_rate": 0.0002923453422129503, "loss": 0.07, "theoretical_loss": 3.414355544228492, "tokens_seen": 2344878080 }, { "epoch": 0.42, "learning_rate": 0.0002923052234614459, "loss": 0.0685, "theoretical_loss": 3.414340253675833, "tokens_seen": 2345009152 }, { "epoch": 0.42, "learning_rate": 0.00029226510470994145, "loss": 0.0674, "theoretical_loss": 3.414324964217087, "tokens_seen": 2345140224 }, { "epoch": 0.42, "learning_rate": 0.000292224985958437, "loss": 0.0643, "theoretical_loss": 3.414309675852116, "tokens_seen": 2345271296 }, { "epoch": 0.42, "learning_rate": 0.00029218486720693253, "loss": 0.0669, "theoretical_loss": 3.414294388580779, "tokens_seen": 2345402368 }, { "epoch": 0.42, "learning_rate": 0.0002921447484554281, "loss": 0.0675, "theoretical_loss": 3.4142791024029377, "tokens_seen": 2345533440 }, { "epoch": 0.42, "learning_rate": 0.0002921046297039236, "loss": 0.0669, "theoretical_loss": 3.4142638173184525, "tokens_seen": 2345664512 }, { "epoch": 0.42, "learning_rate": 0.00029206451095241913, "loss": 0.0666, "theoretical_loss": 3.4142485333271844, "tokens_seen": 2345795584 }, { "epoch": 0.42, "learning_rate": 0.0002920243922009147, "loss": 0.0653, "theoretical_loss": 3.4142332504289934, "tokens_seen": 2345926656 }, { "epoch": 0.42, "learning_rate": 0.00029198427344941027, "loss": 0.07, "theoretical_loss": 3.414217968623741, "tokens_seen": 2346057728 }, { "epoch": 0.42, "learning_rate": 0.0002919441546979058, "loss": 0.0626, "theoretical_loss": 3.414202687911288, "tokens_seen": 2346188800 }, { "epoch": 0.42, "learning_rate": 0.00029190403594640135, "loss": 0.066, "theoretical_loss": 3.4141874082914945, "tokens_seen": 2346319872 }, { "epoch": 0.42, "learning_rate": 0.0002918639171948969, "loss": 0.0705, "theoretical_loss": 3.414172129764222, "tokens_seen": 2346450944 }, { "epoch": 0.42, "learning_rate": 0.0002918237984433925, "loss": 0.0694, "theoretical_loss": 3.414156852329331, "tokens_seen": 2346582016 }, { "epoch": 0.42, "learning_rate": 0.000291783679691888, "loss": 0.0709, "theoretical_loss": 3.414141575986682, "tokens_seen": 2346713088 }, { "epoch": 0.42, "learning_rate": 0.00029174356094038357, "loss": 0.0637, "theoretical_loss": 3.4141263007361373, "tokens_seen": 2346844160 }, { "epoch": 0.42, "learning_rate": 0.0002917034421888791, "loss": 0.0673, "theoretical_loss": 3.4141110265775563, "tokens_seen": 2346975232 }, { "epoch": 0.42, "learning_rate": 0.0002916633234373746, "loss": 0.0671, "theoretical_loss": 3.414095753510801, "tokens_seen": 2347106304 }, { "epoch": 0.42, "learning_rate": 0.00029162320468587016, "loss": 0.0682, "theoretical_loss": 3.414080481535732, "tokens_seen": 2347237376 }, { "epoch": 0.42, "learning_rate": 0.00029158308593436573, "loss": 0.066, "theoretical_loss": 3.4140652106522102, "tokens_seen": 2347368448 }, { "epoch": 0.42, "learning_rate": 0.00029154296718286125, "loss": 0.0707, "theoretical_loss": 3.414049940860097, "tokens_seen": 2347499520 }, { "epoch": 0.42, "learning_rate": 0.0002915028484313568, "loss": 0.0697, "theoretical_loss": 3.4140346721592527, "tokens_seen": 2347630592 }, { "epoch": 0.42, "learning_rate": 0.0002914627296798524, "loss": 0.0731, "theoretical_loss": 3.4140194045495393, "tokens_seen": 2347761664 }, { "epoch": 0.42, "objective/train/advantage_avg": 0.0007640849216841161, "objective/train/docs_used": 854534, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3154059648513794, "objective/train/original_loss": 1.3154058456420898, "objective/train/theoretical_loss": 3.4140041380308177, "objective/train/tokens_used": 718417376, "objective/train/value_avg": -0.00811767578125, "objective/train/value_loss": 0.00021107665088493377, "objective/train/value_max": -6.455183029174805e-05, "objective/train/value_min": -0.32177734375, "objective/train/value_reward_corr": 0.7207603483721852, "objective/train/value_std": 0.01474761962890625, "objective/train/weight_avg": 1.0008602142333984, "objective/train/weighted_lm_loss": 1.3165479898452759, "objective/train/weights_max": 1.2596932649612427, "objective/train/weights_min": 0.3702559173107147, "theoretical_loss": 3.4140041380308177, "tokens_seen": 2347892736 }, { "epoch": 0.42, "learning_rate": 0.00029142261092834795, "loss": 0.0693, "theoretical_loss": 3.4140041380308177, "tokens_seen": 2347892736 }, { "epoch": 0.42, "learning_rate": 0.00029138249217684347, "loss": 0.0675, "theoretical_loss": 3.413988872602949, "tokens_seen": 2348023808 }, { "epoch": 0.42, "learning_rate": 0.00029134237342533903, "loss": 0.0645, "theoretical_loss": 3.4139736082657937, "tokens_seen": 2348154880 }, { "epoch": 0.42, "learning_rate": 0.00029130225467383455, "loss": 0.0666, "theoretical_loss": 3.4139583450192137, "tokens_seen": 2348285952 }, { "epoch": 0.42, "learning_rate": 0.00029126213592233006, "loss": 0.0651, "theoretical_loss": 3.4139430828630704, "tokens_seen": 2348417024 }, { "epoch": 0.42, "learning_rate": 0.00029122201717082563, "loss": 0.0687, "theoretical_loss": 3.413927821797224, "tokens_seen": 2348548096 }, { "epoch": 0.42, "learning_rate": 0.0002911818984193212, "loss": 0.0655, "theoretical_loss": 3.413912561821537, "tokens_seen": 2348679168 }, { "epoch": 0.42, "learning_rate": 0.00029114177966781677, "loss": 0.0677, "theoretical_loss": 3.41389730293587, "tokens_seen": 2348810240 }, { "epoch": 0.42, "learning_rate": 0.0002911016609163123, "loss": 0.0681, "theoretical_loss": 3.4138820451400846, "tokens_seen": 2348941312 }, { "epoch": 0.42, "learning_rate": 0.00029106154216480785, "loss": 0.0698, "theoretical_loss": 3.413866788434042, "tokens_seen": 2349072384 }, { "epoch": 0.42, "learning_rate": 0.0002910214234133034, "loss": 0.0689, "theoretical_loss": 3.4138515328176036, "tokens_seen": 2349203456 }, { "epoch": 0.42, "learning_rate": 0.00029098130466179893, "loss": 0.0728, "theoretical_loss": 3.413836278290631, "tokens_seen": 2349334528 }, { "epoch": 0.42, "learning_rate": 0.0002909411859102945, "loss": 0.0666, "theoretical_loss": 3.413821024852985, "tokens_seen": 2349465600 }, { "epoch": 0.42, "learning_rate": 0.00029090106715879, "loss": 0.0694, "theoretical_loss": 3.4138057725045274, "tokens_seen": 2349596672 }, { "epoch": 0.42, "learning_rate": 0.00029086094840728553, "loss": 0.0691, "theoretical_loss": 3.41379052124512, "tokens_seen": 2349727744 }, { "epoch": 0.42, "learning_rate": 0.0002908208296557811, "loss": 0.0677, "theoretical_loss": 3.413775271074624, "tokens_seen": 2349858816 }, { "epoch": 0.42, "learning_rate": 0.00029078071090427667, "loss": 0.0654, "theoretical_loss": 3.4137600219929007, "tokens_seen": 2349989888 }, { "epoch": 0.42, "learning_rate": 0.00029074059215277224, "loss": 0.0666, "theoretical_loss": 3.413744773999812, "tokens_seen": 2350120960 }, { "epoch": 0.42, "learning_rate": 0.00029070047340126775, "loss": 0.0655, "theoretical_loss": 3.41372952709522, "tokens_seen": 2350252032 }, { "epoch": 0.42, "learning_rate": 0.0002906603546497633, "loss": 0.0666, "theoretical_loss": 3.413714281278985, "tokens_seen": 2350383104 }, { "epoch": 0.42, "learning_rate": 0.0002906202358982589, "loss": 0.0634, "theoretical_loss": 3.4136990365509696, "tokens_seen": 2350514176 }, { "epoch": 0.42, "learning_rate": 0.0002905801171467544, "loss": 0.0691, "theoretical_loss": 3.413683792911035, "tokens_seen": 2350645248 }, { "epoch": 0.42, "learning_rate": 0.00029053999839524997, "loss": 0.0677, "theoretical_loss": 3.413668550359043, "tokens_seen": 2350776320 }, { "epoch": 0.42, "learning_rate": 0.0002904998796437455, "loss": 0.0669, "theoretical_loss": 3.413653308894855, "tokens_seen": 2350907392 }, { "epoch": 0.42, "learning_rate": 0.000290459760892241, "loss": 0.0639, "theoretical_loss": 3.4136380685183334, "tokens_seen": 2351038464 }, { "epoch": 0.42, "objective/train/advantage_avg": -0.0005348845152184367, "objective/train/docs_used": 855824, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.33915114402771, "objective/train/original_loss": 1.33915114402771, "objective/train/theoretical_loss": 3.4136228292293396, "objective/train/tokens_used": 721694176, "objective/train/value_avg": -0.01026153564453125, "objective/train/value_loss": 0.00046351258060894907, "objective/train/value_max": -4.07099723815918e-05, "objective/train/value_min": -0.69384765625, "objective/train/value_reward_corr": 0.7368765269570093, "objective/train/value_std": 0.0191802978515625, "objective/train/weight_avg": 0.9996702075004578, "objective/train/weighted_lm_loss": 1.3367323875427246, "objective/train/weights_max": 1.6283200979232788, "objective/train/weights_min": 0.23182757198810577, "theoretical_loss": 3.4136228292293396, "tokens_seen": 2351169536 }, { "epoch": 0.42, "learning_rate": 0.00029041964214073657, "loss": 0.0669, "theoretical_loss": 3.4136228292293396, "tokens_seen": 2351169536 }, { "epoch": 0.43, "learning_rate": 0.00029037952338923213, "loss": 0.0647, "theoretical_loss": 3.413607591027735, "tokens_seen": 2351300608 }, { "epoch": 0.43, "learning_rate": 0.0002903394046377277, "loss": 0.0699, "theoretical_loss": 3.4135923539133826, "tokens_seen": 2351431680 }, { "epoch": 0.43, "learning_rate": 0.0002902992858862232, "loss": 0.0687, "theoretical_loss": 3.4135771178861427, "tokens_seen": 2351562752 }, { "epoch": 0.43, "learning_rate": 0.0002902591671347188, "loss": 0.0682, "theoretical_loss": 3.413561882945878, "tokens_seen": 2351693824 }, { "epoch": 0.43, "learning_rate": 0.00029021904838321435, "loss": 0.0674, "theoretical_loss": 3.41354664909245, "tokens_seen": 2351824896 }, { "epoch": 0.43, "learning_rate": 0.00029017892963170987, "loss": 0.07, "theoretical_loss": 3.4135314163257213, "tokens_seen": 2351955968 }, { "epoch": 0.43, "learning_rate": 0.00029013881088020544, "loss": 0.0674, "theoretical_loss": 3.413516184645553, "tokens_seen": 2352087040 }, { "epoch": 0.43, "learning_rate": 0.00029009869212870095, "loss": 0.068, "theoretical_loss": 3.4135009540518073, "tokens_seen": 2352218112 }, { "epoch": 0.43, "learning_rate": 0.00029005857337719646, "loss": 0.0682, "theoretical_loss": 3.413485724544347, "tokens_seen": 2352349184 }, { "epoch": 0.43, "learning_rate": 0.00029001845462569203, "loss": 0.0683, "theoretical_loss": 3.4134704961230327, "tokens_seen": 2352480256 }, { "epoch": 0.43, "learning_rate": 0.0002899783358741876, "loss": 0.0675, "theoretical_loss": 3.413455268787727, "tokens_seen": 2352611328 }, { "epoch": 0.43, "learning_rate": 0.00028993821712268317, "loss": 0.0682, "theoretical_loss": 3.4134400425382925, "tokens_seen": 2352742400 }, { "epoch": 0.43, "learning_rate": 0.0002898980983711787, "loss": 0.0669, "theoretical_loss": 3.4134248173745902, "tokens_seen": 2352873472 }, { "epoch": 0.43, "learning_rate": 0.00028985797961967425, "loss": 0.0687, "theoretical_loss": 3.4134095932964836, "tokens_seen": 2353004544 }, { "epoch": 0.43, "learning_rate": 0.0002898178608681698, "loss": 0.0693, "theoretical_loss": 3.4133943703038336, "tokens_seen": 2353135616 }, { "epoch": 0.43, "learning_rate": 0.00028977774211666533, "loss": 0.0666, "theoretical_loss": 3.413379148396503, "tokens_seen": 2353266688 }, { "epoch": 0.43, "learning_rate": 0.0002897376233651609, "loss": 0.0712, "theoretical_loss": 3.4133639275743537, "tokens_seen": 2353397760 }, { "epoch": 0.43, "learning_rate": 0.0002896975046136564, "loss": 0.0673, "theoretical_loss": 3.413348707837248, "tokens_seen": 2353528832 }, { "epoch": 0.43, "learning_rate": 0.00028965738586215193, "loss": 0.0657, "theoretical_loss": 3.4133334891850486, "tokens_seen": 2353659904 }, { "epoch": 0.43, "learning_rate": 0.0002896172671106475, "loss": 0.0685, "theoretical_loss": 3.4133182716176167, "tokens_seen": 2353790976 }, { "epoch": 0.43, "learning_rate": 0.00028957714835914307, "loss": 0.0704, "theoretical_loss": 3.4133030551348154, "tokens_seen": 2353922048 }, { "epoch": 0.43, "learning_rate": 0.00028953702960763864, "loss": 0.0661, "theoretical_loss": 3.4132878397365065, "tokens_seen": 2354053120 }, { "epoch": 0.43, "learning_rate": 0.00028949691085613415, "loss": 0.067, "theoretical_loss": 3.413272625422553, "tokens_seen": 2354184192 }, { "epoch": 0.43, "learning_rate": 0.0002894567921046297, "loss": 0.0675, "theoretical_loss": 3.4132574121928165, "tokens_seen": 2354315264 }, { "epoch": 0.43, "objective/train/advantage_avg": -0.0005523854633793235, "objective/train/docs_used": 856929, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3781569004058838, "objective/train/original_loss": 1.3781567811965942, "objective/train/theoretical_loss": 3.41324220004716, "objective/train/tokens_used": 724970976, "objective/train/value_avg": -0.00923919677734375, "objective/train/value_loss": 0.0006364321452565491, "objective/train/value_max": -4.13060188293457e-05, "objective/train/value_min": -0.9384765625, "objective/train/value_reward_corr": 0.82521183707931, "objective/train/value_std": 0.03277587890625, "objective/train/weight_avg": 0.9997232556343079, "objective/train/weighted_lm_loss": 1.3764467239379883, "objective/train/weights_max": 1.3826122283935547, "objective/train/weights_min": 0.3970448672771454, "theoretical_loss": 3.41324220004716, "tokens_seen": 2354446336 }, { "epoch": 0.43, "learning_rate": 0.0002894166733531253, "loss": 0.0654, "theoretical_loss": 3.41324220004716, "tokens_seen": 2354446336 }, { "epoch": 0.43, "learning_rate": 0.0002893765546016208, "loss": 0.0657, "theoretical_loss": 3.413226988985446, "tokens_seen": 2354577408 }, { "epoch": 0.43, "learning_rate": 0.00028933643585011637, "loss": 0.0657, "theoretical_loss": 3.413211779007536, "tokens_seen": 2354708480 }, { "epoch": 0.43, "learning_rate": 0.0002892963170986119, "loss": 0.0633, "theoretical_loss": 3.4131965701132927, "tokens_seen": 2354839552 }, { "epoch": 0.43, "learning_rate": 0.0002892561983471074, "loss": 0.0664, "theoretical_loss": 3.4131813623025797, "tokens_seen": 2354970624 }, { "epoch": 0.43, "learning_rate": 0.00028921607959560297, "loss": 0.0639, "theoretical_loss": 3.4131661555752584, "tokens_seen": 2355101696 }, { "epoch": 0.43, "learning_rate": 0.00028917596084409854, "loss": 0.0619, "theoretical_loss": 3.4131509499311914, "tokens_seen": 2355232768 }, { "epoch": 0.43, "learning_rate": 0.0002891358420925941, "loss": 0.0665, "theoretical_loss": 3.413135745370242, "tokens_seen": 2355363840 }, { "epoch": 0.43, "learning_rate": 0.0002890957233410896, "loss": 0.0656, "theoretical_loss": 3.413120541892272, "tokens_seen": 2355494912 }, { "epoch": 0.43, "learning_rate": 0.0002890556045895852, "loss": 0.0671, "theoretical_loss": 3.4131053394971445, "tokens_seen": 2355625984 }, { "epoch": 0.43, "learning_rate": 0.00028901548583808075, "loss": 0.0712, "theoretical_loss": 3.413090138184722, "tokens_seen": 2355757056 }, { "epoch": 0.43, "learning_rate": 0.00028897536708657627, "loss": 0.0683, "theoretical_loss": 3.4130749379548666, "tokens_seen": 2355888128 }, { "epoch": 0.43, "learning_rate": 0.00028893524833507184, "loss": 0.0699, "theoretical_loss": 3.4130597388074415, "tokens_seen": 2356019200 }, { "epoch": 0.43, "learning_rate": 0.00028889512958356735, "loss": 0.0698, "theoretical_loss": 3.41304454074231, "tokens_seen": 2356150272 }, { "epoch": 0.43, "learning_rate": 0.00028885501083206287, "loss": 0.0661, "theoretical_loss": 3.4130293437593333, "tokens_seen": 2356281344 }, { "epoch": 0.43, "learning_rate": 0.00028881489208055843, "loss": 0.0665, "theoretical_loss": 3.4130141478583758, "tokens_seen": 2356412416 }, { "epoch": 0.43, "learning_rate": 0.000288774773329054, "loss": 0.0689, "theoretical_loss": 3.412998953039299, "tokens_seen": 2356543488 }, { "epoch": 0.43, "learning_rate": 0.00028873465457754957, "loss": 0.0682, "theoretical_loss": 3.4129837593019667, "tokens_seen": 2356674560 }, { "epoch": 0.43, "learning_rate": 0.0002886945358260451, "loss": 0.0654, "theoretical_loss": 3.412968566646241, "tokens_seen": 2356805632 }, { "epoch": 0.43, "learning_rate": 0.00028865441707454065, "loss": 0.0658, "theoretical_loss": 3.412953375071985, "tokens_seen": 2356936704 }, { "epoch": 0.43, "learning_rate": 0.0002886142983230362, "loss": 0.0655, "theoretical_loss": 3.412938184579062, "tokens_seen": 2357067776 }, { "epoch": 0.43, "learning_rate": 0.00028857417957153174, "loss": 0.0638, "theoretical_loss": 3.412922995167334, "tokens_seen": 2357198848 }, { "epoch": 0.43, "learning_rate": 0.0002885340608200273, "loss": 0.0648, "theoretical_loss": 3.4129078068366647, "tokens_seen": 2357329920 }, { "epoch": 0.43, "learning_rate": 0.0002884939420685228, "loss": 0.0662, "theoretical_loss": 3.4128926195869163, "tokens_seen": 2357460992 }, { "epoch": 0.43, "learning_rate": 0.0002884538233170184, "loss": 0.0665, "theoretical_loss": 3.412877433417953, "tokens_seen": 2357592064 }, { "epoch": 0.43, "objective/train/advantage_avg": 0.00044362532207742333, "objective/train/docs_used": 858137, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3444005250930786, "objective/train/original_loss": 1.344400405883789, "objective/train/theoretical_loss": 3.4128622483296365, "objective/train/tokens_used": 728247776, "objective/train/value_avg": -0.00603485107421875, "objective/train/value_loss": 8.898707164917141e-05, "objective/train/value_max": -4.07099723815918e-05, "objective/train/value_min": -0.224609375, "objective/train/value_reward_corr": 0.7586723246646664, "objective/train/value_std": 0.0108642578125, "objective/train/weight_avg": 1.0004876852035522, "objective/train/weighted_lm_loss": 1.3445626497268677, "objective/train/weights_max": 1.1481863260269165, "objective/train/weights_min": 0.8202790021896362, "theoretical_loss": 3.4128622483296365, "tokens_seen": 2357723136 }, { "epoch": 0.43, "learning_rate": 0.0002884137045655139, "loss": 0.0652, "theoretical_loss": 3.4128622483296365, "tokens_seen": 2357723136 }, { "epoch": 0.43, "learning_rate": 0.00028837358581400947, "loss": 0.0677, "theoretical_loss": 3.412847064321831, "tokens_seen": 2357854208 }, { "epoch": 0.43, "learning_rate": 0.00028833346706250504, "loss": 0.0685, "theoretical_loss": 3.412831881394399, "tokens_seen": 2357985280 }, { "epoch": 0.43, "learning_rate": 0.00028829334831100055, "loss": 0.067, "theoretical_loss": 3.412816699547203, "tokens_seen": 2358116352 }, { "epoch": 0.43, "learning_rate": 0.0002882532295594961, "loss": 0.0691, "theoretical_loss": 3.4128015187801073, "tokens_seen": 2358247424 }, { "epoch": 0.43, "learning_rate": 0.0002882131108079917, "loss": 0.0673, "theoretical_loss": 3.412786339092974, "tokens_seen": 2358378496 }, { "epoch": 0.43, "learning_rate": 0.0002881729920564872, "loss": 0.0701, "theoretical_loss": 3.4127711604856668, "tokens_seen": 2358509568 }, { "epoch": 0.43, "learning_rate": 0.00028813287330498277, "loss": 0.0664, "theoretical_loss": 3.4127559829580485, "tokens_seen": 2358640640 }, { "epoch": 0.43, "learning_rate": 0.0002880927545534783, "loss": 0.0675, "theoretical_loss": 3.4127408065099827, "tokens_seen": 2358771712 }, { "epoch": 0.43, "learning_rate": 0.00028805263580197385, "loss": 0.0731, "theoretical_loss": 3.4127256311413325, "tokens_seen": 2358902784 }, { "epoch": 0.43, "learning_rate": 0.00028801251705046937, "loss": 0.0687, "theoretical_loss": 3.4127104568519613, "tokens_seen": 2359033856 }, { "epoch": 0.43, "learning_rate": 0.00028797239829896494, "loss": 0.0695, "theoretical_loss": 3.4126952836417317, "tokens_seen": 2359164928 }, { "epoch": 0.43, "learning_rate": 0.0002879322795474605, "loss": 0.0658, "theoretical_loss": 3.412680111510508, "tokens_seen": 2359296000 }, { "epoch": 0.43, "learning_rate": 0.000287892160795956, "loss": 0.0645, "theoretical_loss": 3.412664940458153, "tokens_seen": 2359427072 }, { "epoch": 0.43, "learning_rate": 0.0002878520420444516, "loss": 0.0678, "theoretical_loss": 3.41264977048453, "tokens_seen": 2359558144 }, { "epoch": 0.43, "learning_rate": 0.00028781192329294716, "loss": 0.0653, "theoretical_loss": 3.4126346015895024, "tokens_seen": 2359689216 }, { "epoch": 0.43, "learning_rate": 0.00028777180454144267, "loss": 0.0658, "theoretical_loss": 3.412619433772934, "tokens_seen": 2359820288 }, { "epoch": 0.43, "learning_rate": 0.00028773168578993824, "loss": 0.0642, "theoretical_loss": 3.4126042670346877, "tokens_seen": 2359951360 }, { "epoch": 0.43, "learning_rate": 0.00028769156703843375, "loss": 0.07, "theoretical_loss": 3.412589101374627, "tokens_seen": 2360082432 }, { "epoch": 0.43, "learning_rate": 0.0002876514482869293, "loss": 0.0643, "theoretical_loss": 3.412573936792616, "tokens_seen": 2360213504 }, { "epoch": 0.43, "learning_rate": 0.00028761132953542483, "loss": 0.0641, "theoretical_loss": 3.4125587732885174, "tokens_seen": 2360344576 }, { "epoch": 0.43, "learning_rate": 0.0002875712107839204, "loss": 0.0654, "theoretical_loss": 3.412543610862195, "tokens_seen": 2360475648 }, { "epoch": 0.43, "learning_rate": 0.00028753109203241597, "loss": 0.0649, "theoretical_loss": 3.4125284495135126, "tokens_seen": 2360606720 }, { "epoch": 0.43, "learning_rate": 0.0002874909732809115, "loss": 0.067, "theoretical_loss": 3.4125132892423338, "tokens_seen": 2360737792 }, { "epoch": 0.43, "learning_rate": 0.00028745085452940705, "loss": 0.0661, "theoretical_loss": 3.412498130048522, "tokens_seen": 2360868864 }, { "epoch": 0.43, "objective/train/advantage_avg": 0.0004693961818702519, "objective/train/docs_used": 859369, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4312232732772827, "objective/train/original_loss": 1.4312232732772827, "objective/train/theoretical_loss": 3.412482971931941, "objective/train/tokens_used": 731524576, "objective/train/value_avg": -0.01153564453125, "objective/train/value_loss": 0.0006167383980937302, "objective/train/value_max": -4.6133995056152344e-05, "objective/train/value_min": -0.89013671875, "objective/train/value_reward_corr": 0.7208587854990354, "objective/train/value_std": 0.0234222412109375, "objective/train/weight_avg": 1.0007383823394775, "objective/train/weighted_lm_loss": 1.4322428703308105, "objective/train/weights_max": 1.5018017292022705, "objective/train/weights_min": 0.37406063079833984, "theoretical_loss": 3.412482971931941, "tokens_seen": 2360999936 }, { "epoch": 0.43, "learning_rate": 0.0002874107357779026, "loss": 0.068, "theoretical_loss": 3.412482971931941, "tokens_seen": 2360999936 }, { "epoch": 0.43, "learning_rate": 0.00028737061702639814, "loss": 0.0666, "theoretical_loss": 3.412467814892454, "tokens_seen": 2361131008 }, { "epoch": 0.43, "learning_rate": 0.0002873304982748937, "loss": 0.0651, "theoretical_loss": 3.412452658929925, "tokens_seen": 2361262080 }, { "epoch": 0.43, "learning_rate": 0.0002872903795233892, "loss": 0.0676, "theoretical_loss": 3.4124375040442176, "tokens_seen": 2361393152 }, { "epoch": 0.43, "learning_rate": 0.0002872502607718848, "loss": 0.0666, "theoretical_loss": 3.412422350235196, "tokens_seen": 2361524224 }, { "epoch": 0.43, "learning_rate": 0.0002872101420203803, "loss": 0.0681, "theoretical_loss": 3.4124071975027235, "tokens_seen": 2361655296 }, { "epoch": 0.43, "learning_rate": 0.00028717002326887587, "loss": 0.0677, "theoretical_loss": 3.412392045846664, "tokens_seen": 2361786368 }, { "epoch": 0.43, "learning_rate": 0.00028712990451737144, "loss": 0.0703, "theoretical_loss": 3.4123768952668807, "tokens_seen": 2361917440 }, { "epoch": 0.43, "learning_rate": 0.00028708978576586695, "loss": 0.0682, "theoretical_loss": 3.4123617457632385, "tokens_seen": 2362048512 }, { "epoch": 0.43, "learning_rate": 0.0002870496670143625, "loss": 0.0669, "theoretical_loss": 3.412346597335601, "tokens_seen": 2362179584 }, { "epoch": 0.43, "learning_rate": 0.0002870095482628581, "loss": 0.067, "theoretical_loss": 3.412331449983831, "tokens_seen": 2362310656 }, { "epoch": 0.43, "learning_rate": 0.0002869694295113536, "loss": 0.0702, "theoretical_loss": 3.4123163037077937, "tokens_seen": 2362441728 }, { "epoch": 0.43, "learning_rate": 0.00028692931075984917, "loss": 0.0652, "theoretical_loss": 3.4123011585073524, "tokens_seen": 2362572800 }, { "epoch": 0.43, "learning_rate": 0.0002868891920083447, "loss": 0.0668, "theoretical_loss": 3.4122860143823717, "tokens_seen": 2362703872 }, { "epoch": 0.43, "learning_rate": 0.00028684907325684025, "loss": 0.0707, "theoretical_loss": 3.4122708713327143, "tokens_seen": 2362834944 }, { "epoch": 0.43, "learning_rate": 0.00028680895450533577, "loss": 0.0662, "theoretical_loss": 3.4122557293582454, "tokens_seen": 2362966016 }, { "epoch": 0.43, "learning_rate": 0.00028676883575383134, "loss": 0.0649, "theoretical_loss": 3.412240588458829, "tokens_seen": 2363097088 }, { "epoch": 0.43, "learning_rate": 0.0002867287170023269, "loss": 0.0634, "theoretical_loss": 3.412225448634328, "tokens_seen": 2363228160 }, { "epoch": 0.43, "learning_rate": 0.0002866885982508224, "loss": 0.0671, "theoretical_loss": 3.4122103098846077, "tokens_seen": 2363359232 }, { "epoch": 0.43, "learning_rate": 0.000286648479499318, "loss": 0.0653, "theoretical_loss": 3.4121951722095316, "tokens_seen": 2363490304 }, { "epoch": 0.43, "learning_rate": 0.00028660836074781356, "loss": 0.0664, "theoretical_loss": 3.4121800356089635, "tokens_seen": 2363621376 }, { "epoch": 0.43, "learning_rate": 0.0002865682419963091, "loss": 0.0685, "theoretical_loss": 3.412164900082769, "tokens_seen": 2363752448 }, { "epoch": 0.43, "learning_rate": 0.00028652812324480464, "loss": 0.0675, "theoretical_loss": 3.4121497656308106, "tokens_seen": 2363883520 }, { "epoch": 0.43, "learning_rate": 0.00028648800449330015, "loss": 0.0662, "theoretical_loss": 3.412134632252953, "tokens_seen": 2364014592 }, { "epoch": 0.43, "learning_rate": 0.0002864478857417957, "loss": 0.0669, "theoretical_loss": 3.412119499949061, "tokens_seen": 2364145664 }, { "epoch": 0.43, "objective/train/advantage_avg": 0.00035812638816423714, "objective/train/docs_used": 860660, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3973697423934937, "objective/train/original_loss": 1.397369623184204, "objective/train/theoretical_loss": 3.4121043687189982, "objective/train/tokens_used": 734801376, "objective/train/value_avg": -0.00799560546875, "objective/train/value_loss": 0.0001277210976695642, "objective/train/value_max": -5.9664249420166016e-05, "objective/train/value_min": -0.525390625, "objective/train/value_reward_corr": 0.7975237396281811, "objective/train/value_std": 0.01494598388671875, "objective/train/weight_avg": 1.0004206895828247, "objective/train/weighted_lm_loss": 1.3983070850372314, "objective/train/weights_max": 1.1465057134628296, "objective/train/weights_min": 0.6073635816574097, "theoretical_loss": 3.4121043687189982, "tokens_seen": 2364276736 }, { "epoch": 0.43, "learning_rate": 0.00028640776699029124, "loss": 0.0678, "theoretical_loss": 3.4121043687189982, "tokens_seen": 2364276736 }, { "epoch": 0.43, "learning_rate": 0.0002863676482387868, "loss": 0.0655, "theoretical_loss": 3.412089238562629, "tokens_seen": 2364407808 }, { "epoch": 0.43, "learning_rate": 0.00028632752948728237, "loss": 0.0692, "theoretical_loss": 3.4120741094798186, "tokens_seen": 2364538880 }, { "epoch": 0.43, "learning_rate": 0.0002862874107357779, "loss": 0.064, "theoretical_loss": 3.41205898147043, "tokens_seen": 2364669952 }, { "epoch": 0.43, "learning_rate": 0.00028624729198427346, "loss": 0.0675, "theoretical_loss": 3.412043854534328, "tokens_seen": 2364801024 }, { "epoch": 0.43, "learning_rate": 0.000286207173232769, "loss": 0.0694, "theoretical_loss": 3.412028728671377, "tokens_seen": 2364932096 }, { "epoch": 0.43, "learning_rate": 0.0002861670544812646, "loss": 0.0653, "theoretical_loss": 3.4120136038814417, "tokens_seen": 2365063168 }, { "epoch": 0.43, "learning_rate": 0.0002861269357297601, "loss": 0.0646, "theoretical_loss": 3.411998480164386, "tokens_seen": 2365194240 }, { "epoch": 0.43, "learning_rate": 0.0002860868169782556, "loss": 0.065, "theoretical_loss": 3.411983357520075, "tokens_seen": 2365325312 }, { "epoch": 0.43, "learning_rate": 0.0002860466982267512, "loss": 0.0677, "theoretical_loss": 3.4119682359483727, "tokens_seen": 2365456384 }, { "epoch": 0.43, "learning_rate": 0.0002860065794752467, "loss": 0.0696, "theoretical_loss": 3.4119531154491436, "tokens_seen": 2365587456 }, { "epoch": 0.43, "learning_rate": 0.00028596646072374227, "loss": 0.0681, "theoretical_loss": 3.4119379960222522, "tokens_seen": 2365718528 }, { "epoch": 0.43, "learning_rate": 0.00028592634197223784, "loss": 0.0647, "theoretical_loss": 3.411922877667563, "tokens_seen": 2365849600 }, { "epoch": 0.43, "learning_rate": 0.00028588622322073335, "loss": 0.0655, "theoretical_loss": 3.411907760384941, "tokens_seen": 2365980672 }, { "epoch": 0.43, "learning_rate": 0.0002858461044692289, "loss": 0.0682, "theoretical_loss": 3.4118926441742508, "tokens_seen": 2366111744 }, { "epoch": 0.43, "learning_rate": 0.0002858059857177245, "loss": 0.0688, "theoretical_loss": 3.4118775290353565, "tokens_seen": 2366242816 }, { "epoch": 0.43, "learning_rate": 0.00028576586696622006, "loss": 0.0674, "theoretical_loss": 3.4118624149681227, "tokens_seen": 2366373888 }, { "epoch": 0.43, "learning_rate": 0.0002857257482147156, "loss": 0.0653, "theoretical_loss": 3.4118473019724145, "tokens_seen": 2366504960 }, { "epoch": 0.43, "learning_rate": 0.00028568562946321114, "loss": 0.0682, "theoretical_loss": 3.4118321900480963, "tokens_seen": 2366636032 }, { "epoch": 0.43, "learning_rate": 0.00028564551071170666, "loss": 0.0673, "theoretical_loss": 3.4118170791950333, "tokens_seen": 2366767104 }, { "epoch": 0.43, "learning_rate": 0.00028560539196020217, "loss": 0.065, "theoretical_loss": 3.4118019694130894, "tokens_seen": 2366898176 }, { "epoch": 0.43, "learning_rate": 0.00028556527320869774, "loss": 0.0668, "theoretical_loss": 3.41178686070213, "tokens_seen": 2367029248 }, { "epoch": 0.43, "learning_rate": 0.0002855251544571933, "loss": 0.0658, "theoretical_loss": 3.41177175306202, "tokens_seen": 2367160320 }, { "epoch": 0.43, "learning_rate": 0.0002854850357056888, "loss": 0.0667, "theoretical_loss": 3.411756646492624, "tokens_seen": 2367291392 }, { "epoch": 0.43, "learning_rate": 0.0002854449169541844, "loss": 0.065, "theoretical_loss": 3.4117415409938068, "tokens_seen": 2367422464 }, { "epoch": 0.43, "objective/train/advantage_avg": -1.1867542525578756e-05, "objective/train/docs_used": 861837, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3643743991851807, "objective/train/original_loss": 1.3643741607666016, "objective/train/theoretical_loss": 3.411726436565433, "objective/train/tokens_used": 738078176, "objective/train/value_avg": -0.00632476806640625, "objective/train/value_loss": 0.0001429946132702753, "objective/train/value_max": -4.4345855712890625e-05, "objective/train/value_min": -0.206298828125, "objective/train/value_reward_corr": 0.7080588819160132, "objective/train/value_std": 0.01139068603515625, "objective/train/weight_avg": 1.000055193901062, "objective/train/weighted_lm_loss": 1.3643438816070557, "objective/train/weights_max": 1.1705464124679565, "objective/train/weights_min": 0.4101487994194031, "theoretical_loss": 3.411726436565433, "tokens_seen": 2367553536 }, { "epoch": 0.43, "learning_rate": 0.00028540479820267996, "loss": 0.0621, "theoretical_loss": 3.411726436565433, "tokens_seen": 2367553536 }, { "epoch": 0.43, "learning_rate": 0.0002853646794511755, "loss": 0.0655, "theoretical_loss": 3.411711333207368, "tokens_seen": 2367684608 }, { "epoch": 0.44, "learning_rate": 0.00028532456069967104, "loss": 0.0629, "theoretical_loss": 3.411696230919476, "tokens_seen": 2367815680 }, { "epoch": 0.44, "learning_rate": 0.0002852844419481666, "loss": 0.0697, "theoretical_loss": 3.411681129701623, "tokens_seen": 2367946752 }, { "epoch": 0.44, "learning_rate": 0.0002852443231966621, "loss": 0.0675, "theoretical_loss": 3.4116660295536727, "tokens_seen": 2368077824 }, { "epoch": 0.44, "learning_rate": 0.00028520420444515764, "loss": 0.0654, "theoretical_loss": 3.411650930475491, "tokens_seen": 2368208896 }, { "epoch": 0.44, "learning_rate": 0.0002851640856936532, "loss": 0.0672, "theoretical_loss": 3.411635832466943, "tokens_seen": 2368339968 }, { "epoch": 0.44, "learning_rate": 0.0002851239669421488, "loss": 0.0675, "theoretical_loss": 3.4116207355278934, "tokens_seen": 2368471040 }, { "epoch": 0.44, "learning_rate": 0.0002850838481906443, "loss": 0.0688, "theoretical_loss": 3.411605639658207, "tokens_seen": 2368602112 }, { "epoch": 0.44, "learning_rate": 0.00028504372943913986, "loss": 0.0681, "theoretical_loss": 3.411590544857749, "tokens_seen": 2368733184 }, { "epoch": 0.44, "learning_rate": 0.0002850036106876354, "loss": 0.07, "theoretical_loss": 3.4115754511263847, "tokens_seen": 2368864256 }, { "epoch": 0.44, "learning_rate": 0.000284963491936131, "loss": 0.0665, "theoretical_loss": 3.4115603584639795, "tokens_seen": 2368995328 }, { "epoch": 0.44, "learning_rate": 0.0002849233731846265, "loss": 0.0664, "theoretical_loss": 3.411545266870398, "tokens_seen": 2369126400 }, { "epoch": 0.44, "learning_rate": 0.0002848832544331221, "loss": 0.0691, "theoretical_loss": 3.4115301763455057, "tokens_seen": 2369257472 }, { "epoch": 0.44, "learning_rate": 0.0002848431356816176, "loss": 0.0654, "theoretical_loss": 3.4115150868891675, "tokens_seen": 2369388544 }, { "epoch": 0.44, "learning_rate": 0.0002848030169301131, "loss": 0.0653, "theoretical_loss": 3.411499998501249, "tokens_seen": 2369519616 }, { "epoch": 0.44, "learning_rate": 0.00028476289817860867, "loss": 0.0659, "theoretical_loss": 3.4114849111816152, "tokens_seen": 2369650688 }, { "epoch": 0.44, "learning_rate": 0.00028472277942710424, "loss": 0.0702, "theoretical_loss": 3.4114698249301316, "tokens_seen": 2369781760 }, { "epoch": 0.44, "learning_rate": 0.00028468266067559976, "loss": 0.0673, "theoretical_loss": 3.411454739746664, "tokens_seen": 2369912832 }, { "epoch": 0.44, "learning_rate": 0.0002846425419240953, "loss": 0.0676, "theoretical_loss": 3.411439655631076, "tokens_seen": 2370043904 }, { "epoch": 0.44, "learning_rate": 0.0002846024231725909, "loss": 0.0659, "theoretical_loss": 3.411424572583235, "tokens_seen": 2370174976 }, { "epoch": 0.44, "learning_rate": 0.00028456230442108646, "loss": 0.0651, "theoretical_loss": 3.4114094906030052, "tokens_seen": 2370306048 }, { "epoch": 0.44, "learning_rate": 0.000284522185669582, "loss": 0.0662, "theoretical_loss": 3.411394409690252, "tokens_seen": 2370437120 }, { "epoch": 0.44, "learning_rate": 0.00028448206691807754, "loss": 0.065, "theoretical_loss": 3.4113793298448414, "tokens_seen": 2370568192 }, { "epoch": 0.44, "learning_rate": 0.00028444194816657306, "loss": 0.0682, "theoretical_loss": 3.411364251066638, "tokens_seen": 2370699264 }, { "epoch": 0.44, "objective/train/advantage_avg": 0.0002594470279291272, "objective/train/docs_used": 863154, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3077369928359985, "objective/train/original_loss": 1.307736873626709, "objective/train/theoretical_loss": 3.411349173355508, "objective/train/tokens_used": 741354976, "objective/train/value_avg": -0.00836944580078125, "objective/train/value_loss": 0.0002572333032730967, "objective/train/value_max": -3.737211227416992e-05, "objective/train/value_min": -0.307373046875, "objective/train/value_reward_corr": 0.6884954085479762, "objective/train/value_std": 0.0152435302734375, "objective/train/weight_avg": 1.0003738403320312, "objective/train/weighted_lm_loss": 1.3075600862503052, "objective/train/weights_max": 1.1428159475326538, "objective/train/weights_min": 0.3721363842487335, "theoretical_loss": 3.411349173355508, "tokens_seen": 2370830336 }, { "epoch": 0.44, "learning_rate": 0.00028440182941506857, "loss": 0.0666, "theoretical_loss": 3.411349173355508, "tokens_seen": 2370830336 }, { "epoch": 0.44, "learning_rate": 0.00028436171066356414, "loss": 0.0646, "theoretical_loss": 3.4113340967113173, "tokens_seen": 2370961408 }, { "epoch": 0.44, "learning_rate": 0.0002843215919120597, "loss": 0.0652, "theoretical_loss": 3.4113190211339304, "tokens_seen": 2371092480 }, { "epoch": 0.44, "learning_rate": 0.0002842814731605552, "loss": 0.0673, "theoretical_loss": 3.4113039466232133, "tokens_seen": 2371223552 }, { "epoch": 0.44, "learning_rate": 0.0002842413544090508, "loss": 0.0663, "theoretical_loss": 3.4112888731790316, "tokens_seen": 2371354624 }, { "epoch": 0.44, "learning_rate": 0.00028420123565754636, "loss": 0.0657, "theoretical_loss": 3.4112738008012506, "tokens_seen": 2371485696 }, { "epoch": 0.44, "learning_rate": 0.00028416111690604193, "loss": 0.064, "theoretical_loss": 3.4112587294897367, "tokens_seen": 2371616768 }, { "epoch": 0.44, "learning_rate": 0.00028412099815453744, "loss": 0.0648, "theoretical_loss": 3.4112436592443545, "tokens_seen": 2371747840 }, { "epoch": 0.44, "learning_rate": 0.000284080879403033, "loss": 0.0697, "theoretical_loss": 3.4112285900649706, "tokens_seen": 2371878912 }, { "epoch": 0.44, "learning_rate": 0.0002840407606515285, "loss": 0.0643, "theoretical_loss": 3.41121352195145, "tokens_seen": 2372009984 }, { "epoch": 0.44, "learning_rate": 0.00028400064190002404, "loss": 0.0661, "theoretical_loss": 3.411198454903659, "tokens_seen": 2372141056 }, { "epoch": 0.44, "learning_rate": 0.0002839605231485196, "loss": 0.0657, "theoretical_loss": 3.4111833889214624, "tokens_seen": 2372272128 }, { "epoch": 0.44, "learning_rate": 0.0002839204043970152, "loss": 0.0645, "theoretical_loss": 3.4111683240047275, "tokens_seen": 2372403200 }, { "epoch": 0.44, "learning_rate": 0.00028388028564551074, "loss": 0.0657, "theoretical_loss": 3.4111532601533185, "tokens_seen": 2372534272 }, { "epoch": 0.44, "learning_rate": 0.00028384016689400626, "loss": 0.0667, "theoretical_loss": 3.4111381973671024, "tokens_seen": 2372665344 }, { "epoch": 0.44, "learning_rate": 0.0002838000481425018, "loss": 0.0681, "theoretical_loss": 3.411123135645944, "tokens_seen": 2372796416 }, { "epoch": 0.44, "learning_rate": 0.0002837599293909974, "loss": 0.0711, "theoretical_loss": 3.4111080749897105, "tokens_seen": 2372927488 }, { "epoch": 0.44, "learning_rate": 0.0002837198106394929, "loss": 0.07, "theoretical_loss": 3.411093015398267, "tokens_seen": 2373058560 }, { "epoch": 0.44, "learning_rate": 0.0002836796918879885, "loss": 0.0668, "theoretical_loss": 3.4110779568714786, "tokens_seen": 2373189632 }, { "epoch": 0.44, "learning_rate": 0.000283639573136484, "loss": 0.0643, "theoretical_loss": 3.4110628994092127, "tokens_seen": 2373320704 }, { "epoch": 0.44, "learning_rate": 0.0002835994543849795, "loss": 0.0686, "theoretical_loss": 3.4110478430113345, "tokens_seen": 2373451776 }, { "epoch": 0.44, "learning_rate": 0.0002835593356334751, "loss": 0.068, "theoretical_loss": 3.41103278767771, "tokens_seen": 2373582848 }, { "epoch": 0.44, "learning_rate": 0.00028351921688197064, "loss": 0.0676, "theoretical_loss": 3.4110177334082055, "tokens_seen": 2373713920 }, { "epoch": 0.44, "learning_rate": 0.0002834790981304662, "loss": 0.0682, "theoretical_loss": 3.4110026802026865, "tokens_seen": 2373844992 }, { "epoch": 0.44, "learning_rate": 0.0002834389793789617, "loss": 0.0676, "theoretical_loss": 3.4109876280610196, "tokens_seen": 2373976064 }, { "epoch": 0.44, "objective/train/advantage_avg": -0.0008668807568028569, "objective/train/docs_used": 864337, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.1996265649795532, "objective/train/original_loss": 1.1996266841888428, "objective/train/theoretical_loss": 3.410972576983071, "objective/train/tokens_used": 744631776, "objective/train/value_avg": -0.00832366943359375, "objective/train/value_loss": 0.0003377578395884484, "objective/train/value_max": -4.947185516357422e-05, "objective/train/value_min": -0.2188720703125, "objective/train/value_reward_corr": 0.7393972498275947, "objective/train/value_std": 0.0171966552734375, "objective/train/weight_avg": 0.999282717704773, "objective/train/weighted_lm_loss": 1.1984236240386963, "objective/train/weights_max": 1.1839783191680908, "objective/train/weights_min": 0.37812936305999756, "theoretical_loss": 3.410972576983071, "tokens_seen": 2374107136 }, { "epoch": 0.44, "learning_rate": 0.0002833988606274573, "loss": 0.0661, "theoretical_loss": 3.410972576983071, "tokens_seen": 2374107136 }, { "epoch": 0.44, "learning_rate": 0.00028335874187595286, "loss": 0.0713, "theoretical_loss": 3.4109575269687062, "tokens_seen": 2374238208 }, { "epoch": 0.44, "learning_rate": 0.0002833186231244484, "loss": 0.0649, "theoretical_loss": 3.410942478017792, "tokens_seen": 2374369280 }, { "epoch": 0.44, "learning_rate": 0.00028327850437294394, "loss": 0.0682, "theoretical_loss": 3.4109274301301937, "tokens_seen": 2374500352 }, { "epoch": 0.44, "learning_rate": 0.00028323838562143946, "loss": 0.0654, "theoretical_loss": 3.4109123833057784, "tokens_seen": 2374631424 }, { "epoch": 0.44, "learning_rate": 0.00028319826686993497, "loss": 0.0689, "theoretical_loss": 3.410897337544412, "tokens_seen": 2374762496 }, { "epoch": 0.44, "learning_rate": 0.00028315814811843054, "loss": 0.0685, "theoretical_loss": 3.41088229284596, "tokens_seen": 2374893568 }, { "epoch": 0.44, "learning_rate": 0.0002831180293669261, "loss": 0.0632, "theoretical_loss": 3.41086724921029, "tokens_seen": 2375024640 }, { "epoch": 0.44, "learning_rate": 0.0002830779106154217, "loss": 0.0656, "theoretical_loss": 3.410852206637267, "tokens_seen": 2375155712 }, { "epoch": 0.44, "learning_rate": 0.0002830377918639172, "loss": 0.0667, "theoretical_loss": 3.410837165126758, "tokens_seen": 2375286784 }, { "epoch": 0.44, "learning_rate": 0.00028299767311241276, "loss": 0.0655, "theoretical_loss": 3.410822124678629, "tokens_seen": 2375417856 }, { "epoch": 0.44, "learning_rate": 0.00028295755436090833, "loss": 0.0665, "theoretical_loss": 3.4108070852927472, "tokens_seen": 2375548928 }, { "epoch": 0.44, "learning_rate": 0.00028291743560940384, "loss": 0.0692, "theoretical_loss": 3.410792046968978, "tokens_seen": 2375680000 }, { "epoch": 0.44, "learning_rate": 0.0002828773168578994, "loss": 0.0641, "theoretical_loss": 3.4107770097071874, "tokens_seen": 2375811072 }, { "epoch": 0.44, "learning_rate": 0.0002828371981063949, "loss": 0.0646, "theoretical_loss": 3.410761973507243, "tokens_seen": 2375942144 }, { "epoch": 0.44, "learning_rate": 0.00028279707935489044, "loss": 0.0685, "theoretical_loss": 3.410746938369011, "tokens_seen": 2376073216 }, { "epoch": 0.44, "learning_rate": 0.000282756960603386, "loss": 0.0698, "theoretical_loss": 3.4107319042923576, "tokens_seen": 2376204288 }, { "epoch": 0.44, "learning_rate": 0.0002827168418518816, "loss": 0.0695, "theoretical_loss": 3.410716871277149, "tokens_seen": 2376335360 }, { "epoch": 0.44, "learning_rate": 0.00028267672310037714, "loss": 0.0693, "theoretical_loss": 3.410701839323252, "tokens_seen": 2376466432 }, { "epoch": 0.44, "learning_rate": 0.00028263660434887266, "loss": 0.0649, "theoretical_loss": 3.4106868084305337, "tokens_seen": 2376597504 }, { "epoch": 0.44, "learning_rate": 0.0002825964855973682, "loss": 0.0658, "theoretical_loss": 3.41067177859886, "tokens_seen": 2376728576 }, { "epoch": 0.44, "learning_rate": 0.0002825563668458638, "loss": 0.0683, "theoretical_loss": 3.4106567498280973, "tokens_seen": 2376859648 }, { "epoch": 0.44, "learning_rate": 0.0002825162480943593, "loss": 0.0654, "theoretical_loss": 3.410641722118113, "tokens_seen": 2376990720 }, { "epoch": 0.44, "learning_rate": 0.0002824761293428549, "loss": 0.0679, "theoretical_loss": 3.410626695468773, "tokens_seen": 2377121792 }, { "epoch": 0.44, "learning_rate": 0.0002824360105913504, "loss": 0.0672, "theoretical_loss": 3.4106116698799447, "tokens_seen": 2377252864 }, { "epoch": 0.44, "objective/train/advantage_avg": 0.0016103433445096016, "objective/train/docs_used": 865580, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4033515453338623, "objective/train/original_loss": 1.4033514261245728, "objective/train/theoretical_loss": 3.410596645351494, "objective/train/tokens_used": 747908576, "objective/train/value_avg": -0.00702667236328125, "objective/train/value_loss": 0.0002193887485191226, "objective/train/value_max": -4.398822784423828e-05, "objective/train/value_min": -0.2393798828125, "objective/train/value_reward_corr": 0.48887337622603216, "objective/train/value_std": 0.01202392578125, "objective/train/weight_avg": 1.0017119646072388, "objective/train/weighted_lm_loss": 1.4055837392807007, "objective/train/weights_max": 1.171118140220642, "objective/train/weights_min": 0.3689397871494293, "theoretical_loss": 3.410596645351494, "tokens_seen": 2377383936 }, { "epoch": 0.44, "learning_rate": 0.0002823958918398459, "loss": 0.0662, "theoretical_loss": 3.410596645351494, "tokens_seen": 2377383936 }, { "epoch": 0.44, "learning_rate": 0.0002823557730883415, "loss": 0.0677, "theoretical_loss": 3.410581621883288, "tokens_seen": 2377515008 }, { "epoch": 0.44, "learning_rate": 0.00028231565433683704, "loss": 0.0666, "theoretical_loss": 3.4105665994751933, "tokens_seen": 2377646080 }, { "epoch": 0.44, "learning_rate": 0.0002822755355853326, "loss": 0.066, "theoretical_loss": 3.4105515781270768, "tokens_seen": 2377777152 }, { "epoch": 0.44, "learning_rate": 0.0002822354168338281, "loss": 0.0661, "theoretical_loss": 3.4105365578388054, "tokens_seen": 2377908224 }, { "epoch": 0.44, "learning_rate": 0.0002821952980823237, "loss": 0.0658, "theoretical_loss": 3.410521538610246, "tokens_seen": 2378039296 }, { "epoch": 0.44, "learning_rate": 0.00028215517933081926, "loss": 0.0652, "theoretical_loss": 3.410506520441265, "tokens_seen": 2378170368 }, { "epoch": 0.44, "learning_rate": 0.0002821150605793148, "loss": 0.0682, "theoretical_loss": 3.410491503331729, "tokens_seen": 2378301440 }, { "epoch": 0.44, "learning_rate": 0.00028207494182781035, "loss": 0.0654, "theoretical_loss": 3.410476487281506, "tokens_seen": 2378432512 }, { "epoch": 0.44, "learning_rate": 0.00028203482307630586, "loss": 0.0675, "theoretical_loss": 3.410461472290462, "tokens_seen": 2378563584 }, { "epoch": 0.44, "learning_rate": 0.0002819947043248014, "loss": 0.0699, "theoretical_loss": 3.410446458358464, "tokens_seen": 2378694656 }, { "epoch": 0.44, "learning_rate": 0.00028195458557329694, "loss": 0.0639, "theoretical_loss": 3.4104314454853792, "tokens_seen": 2378825728 }, { "epoch": 0.44, "learning_rate": 0.0002819144668217925, "loss": 0.0669, "theoretical_loss": 3.4104164336710747, "tokens_seen": 2378956800 }, { "epoch": 0.44, "learning_rate": 0.0002818743480702881, "loss": 0.0651, "theoretical_loss": 3.4104014229154167, "tokens_seen": 2379087872 }, { "epoch": 0.44, "learning_rate": 0.0002818342293187836, "loss": 0.0684, "theoretical_loss": 3.4103864132182733, "tokens_seen": 2379218944 }, { "epoch": 0.44, "learning_rate": 0.00028179411056727916, "loss": 0.0672, "theoretical_loss": 3.4103714045795113, "tokens_seen": 2379350016 }, { "epoch": 0.44, "learning_rate": 0.00028175399181577473, "loss": 0.0697, "theoretical_loss": 3.410356396998997, "tokens_seen": 2379481088 }, { "epoch": 0.44, "learning_rate": 0.00028171387306427024, "loss": 0.0657, "theoretical_loss": 3.4103413904765985, "tokens_seen": 2379612160 }, { "epoch": 0.44, "learning_rate": 0.0002816737543127658, "loss": 0.0645, "theoretical_loss": 3.410326385012182, "tokens_seen": 2379743232 }, { "epoch": 0.44, "learning_rate": 0.0002816336355612613, "loss": 0.0643, "theoretical_loss": 3.4103113806056156, "tokens_seen": 2379874304 }, { "epoch": 0.44, "learning_rate": 0.00028159351680975684, "loss": 0.0703, "theoretical_loss": 3.4102963772567656, "tokens_seen": 2380005376 }, { "epoch": 0.44, "learning_rate": 0.0002815533980582524, "loss": 0.0685, "theoretical_loss": 3.4102813749655, "tokens_seen": 2380136448 }, { "epoch": 0.44, "learning_rate": 0.000281513279306748, "loss": 0.0662, "theoretical_loss": 3.4102663737316847, "tokens_seen": 2380267520 }, { "epoch": 0.44, "learning_rate": 0.00028147316055524355, "loss": 0.0662, "theoretical_loss": 3.4102513735551887, "tokens_seen": 2380398592 }, { "epoch": 0.44, "learning_rate": 0.00028143304180373906, "loss": 0.0674, "theoretical_loss": 3.410236374435878, "tokens_seen": 2380529664 }, { "epoch": 0.44, "objective/train/advantage_avg": 0.0001522019156254828, "objective/train/docs_used": 866853, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.337000846862793, "objective/train/original_loss": 1.3370006084442139, "objective/train/theoretical_loss": 3.41022137637362, "objective/train/tokens_used": 751185376, "objective/train/value_avg": -0.006931304931640625, "objective/train/value_loss": 0.00014153780648484826, "objective/train/value_max": -3.594160079956055e-05, "objective/train/value_min": -0.2076416015625, "objective/train/value_reward_corr": 0.7140933628044044, "objective/train/value_std": 0.01171112060546875, "objective/train/weight_avg": 1.0002185106277466, "objective/train/weighted_lm_loss": 1.3375684022903442, "objective/train/weights_max": 1.1860806941986084, "objective/train/weights_min": 0.37291228771209717, "theoretical_loss": 3.41022137637362, "tokens_seen": 2380660736 }, { "epoch": 0.44, "learning_rate": 0.00028139292305223463, "loss": 0.0671, "theoretical_loss": 3.41022137637362, "tokens_seen": 2380660736 }, { "epoch": 0.44, "learning_rate": 0.0002813528043007302, "loss": 0.0679, "theoretical_loss": 3.410206379368283, "tokens_seen": 2380791808 }, { "epoch": 0.44, "learning_rate": 0.0002813126855492257, "loss": 0.0657, "theoretical_loss": 3.4101913834197335, "tokens_seen": 2380922880 }, { "epoch": 0.44, "learning_rate": 0.0002812725667977213, "loss": 0.0642, "theoretical_loss": 3.4101763885278387, "tokens_seen": 2381053952 }, { "epoch": 0.44, "learning_rate": 0.0002812324480462168, "loss": 0.0633, "theoretical_loss": 3.4101613946924663, "tokens_seen": 2381185024 }, { "epoch": 0.44, "learning_rate": 0.0002811923292947123, "loss": 0.0635, "theoretical_loss": 3.410146401913484, "tokens_seen": 2381316096 }, { "epoch": 0.44, "learning_rate": 0.0002811522105432079, "loss": 0.0703, "theoretical_loss": 3.4101314101907585, "tokens_seen": 2381447168 }, { "epoch": 0.44, "learning_rate": 0.00028111209179170344, "loss": 0.0644, "theoretical_loss": 3.410116419524158, "tokens_seen": 2381578240 }, { "epoch": 0.44, "learning_rate": 0.000281071973040199, "loss": 0.0669, "theoretical_loss": 3.4101014299135493, "tokens_seen": 2381709312 }, { "epoch": 0.44, "learning_rate": 0.0002810318542886945, "loss": 0.0662, "theoretical_loss": 3.4100864413588003, "tokens_seen": 2381840384 }, { "epoch": 0.44, "learning_rate": 0.0002809917355371901, "loss": 0.0645, "theoretical_loss": 3.4100714538597785, "tokens_seen": 2381971456 }, { "epoch": 0.44, "learning_rate": 0.00028095161678568566, "loss": 0.0649, "theoretical_loss": 3.410056467416352, "tokens_seen": 2382102528 }, { "epoch": 0.44, "learning_rate": 0.0002809114980341812, "loss": 0.0661, "theoretical_loss": 3.410041482028387, "tokens_seen": 2382233600 }, { "epoch": 0.44, "learning_rate": 0.00028087137928267675, "loss": 0.0689, "theoretical_loss": 3.4100264976957524, "tokens_seen": 2382364672 }, { "epoch": 0.44, "learning_rate": 0.00028083126053117226, "loss": 0.0696, "theoretical_loss": 3.4100115144183154, "tokens_seen": 2382495744 }, { "epoch": 0.44, "learning_rate": 0.00028079114177966783, "loss": 0.0689, "theoretical_loss": 3.4099965321959433, "tokens_seen": 2382626816 }, { "epoch": 0.44, "learning_rate": 0.00028075102302816334, "loss": 0.0648, "theoretical_loss": 3.409981551028504, "tokens_seen": 2382757888 }, { "epoch": 0.44, "learning_rate": 0.0002807109042766589, "loss": 0.0628, "theoretical_loss": 3.4099665709158655, "tokens_seen": 2382888960 }, { "epoch": 0.44, "learning_rate": 0.0002806707855251545, "loss": 0.0672, "theoretical_loss": 3.409951591857895, "tokens_seen": 2383020032 }, { "epoch": 0.44, "learning_rate": 0.00028063066677365, "loss": 0.0636, "theoretical_loss": 3.409936613854461, "tokens_seen": 2383151104 }, { "epoch": 0.44, "learning_rate": 0.00028059054802214556, "loss": 0.0626, "theoretical_loss": 3.4099216369054304, "tokens_seen": 2383282176 }, { "epoch": 0.44, "learning_rate": 0.00028055042927064113, "loss": 0.0623, "theoretical_loss": 3.4099066610106714, "tokens_seen": 2383413248 }, { "epoch": 0.44, "learning_rate": 0.00028051031051913664, "loss": 0.0698, "theoretical_loss": 3.409891686170052, "tokens_seen": 2383544320 }, { "epoch": 0.44, "learning_rate": 0.0002804701917676322, "loss": 0.064, "theoretical_loss": 3.40987671238344, "tokens_seen": 2383675392 }, { "epoch": 0.44, "learning_rate": 0.00028043007301612773, "loss": 0.069, "theoretical_loss": 3.4098617396507023, "tokens_seen": 2383806464 }, { "epoch": 0.44, "objective/train/advantage_avg": -0.00031859782757237554, "objective/train/docs_used": 868029, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3157762289047241, "objective/train/original_loss": 1.3157761096954346, "objective/train/theoretical_loss": 3.4098467679717084, "objective/train/tokens_used": 754462176, "objective/train/value_avg": -0.0078277587890625, "objective/train/value_loss": 0.00017947066226042807, "objective/train/value_max": -3.4809112548828125e-05, "objective/train/value_min": -0.226318359375, "objective/train/value_reward_corr": 0.7845245826104963, "objective/train/value_std": 0.01568603515625, "objective/train/weight_avg": 0.9997673034667969, "objective/train/weighted_lm_loss": 1.3150923252105713, "objective/train/weights_max": 1.1529536247253418, "objective/train/weights_min": 0.6187294125556946, "theoretical_loss": 3.4098467679717084, "tokens_seen": 2383937536 }, { "epoch": 0.44, "learning_rate": 0.0002803899542646233, "loss": 0.0651, "theoretical_loss": 3.4098467679717084, "tokens_seen": 2383937536 }, { "epoch": 0.44, "learning_rate": 0.0002803498355131188, "loss": 0.0665, "theoretical_loss": 3.4098317973463246, "tokens_seen": 2384068608 }, { "epoch": 0.44, "learning_rate": 0.0002803097167616144, "loss": 0.0624, "theoretical_loss": 3.4098168277744203, "tokens_seen": 2384199680 }, { "epoch": 0.45, "learning_rate": 0.00028026959801010995, "loss": 0.07, "theoretical_loss": 3.4098018592558628, "tokens_seen": 2384330752 }, { "epoch": 0.45, "learning_rate": 0.00028022947925860546, "loss": 0.0677, "theoretical_loss": 3.4097868917905196, "tokens_seen": 2384461824 }, { "epoch": 0.45, "learning_rate": 0.00028018936050710103, "loss": 0.0729, "theoretical_loss": 3.4097719253782595, "tokens_seen": 2384592896 }, { "epoch": 0.45, "learning_rate": 0.0002801492417555966, "loss": 0.066, "theoretical_loss": 3.40975696001895, "tokens_seen": 2384723968 }, { "epoch": 0.45, "learning_rate": 0.0002801091230040921, "loss": 0.0692, "theoretical_loss": 3.4097419957124595, "tokens_seen": 2384855040 }, { "epoch": 0.45, "learning_rate": 0.0002800690042525877, "loss": 0.0648, "theoretical_loss": 3.4097270324586555, "tokens_seen": 2384986112 }, { "epoch": 0.45, "learning_rate": 0.0002800288855010832, "loss": 0.0703, "theoretical_loss": 3.409712070257407, "tokens_seen": 2385117184 }, { "epoch": 0.45, "learning_rate": 0.00027998876674957876, "loss": 0.0593, "theoretical_loss": 3.4096971091085817, "tokens_seen": 2385248256 }, { "epoch": 0.45, "learning_rate": 0.0002799486479980743, "loss": 0.0642, "theoretical_loss": 3.4096821490120472, "tokens_seen": 2385379328 }, { "epoch": 0.45, "learning_rate": 0.00027990852924656985, "loss": 0.0664, "theoretical_loss": 3.4096671899676725, "tokens_seen": 2385510400 }, { "epoch": 0.45, "learning_rate": 0.0002798684104950654, "loss": 0.0689, "theoretical_loss": 3.4096522319753255, "tokens_seen": 2385641472 }, { "epoch": 0.45, "learning_rate": 0.00027982829174356093, "loss": 0.0695, "theoretical_loss": 3.4096372750348745, "tokens_seen": 2385772544 }, { "epoch": 0.45, "learning_rate": 0.0002797881729920565, "loss": 0.0646, "theoretical_loss": 3.4096223191461874, "tokens_seen": 2385903616 }, { "epoch": 0.45, "learning_rate": 0.00027974805424055206, "loss": 0.0688, "theoretical_loss": 3.409607364309133, "tokens_seen": 2386034688 }, { "epoch": 0.45, "learning_rate": 0.0002797079354890476, "loss": 0.0665, "theoretical_loss": 3.4095924105235786, "tokens_seen": 2386165760 }, { "epoch": 0.45, "learning_rate": 0.00027966781673754315, "loss": 0.0677, "theoretical_loss": 3.4095774577893936, "tokens_seen": 2386296832 }, { "epoch": 0.45, "learning_rate": 0.00027962769798603866, "loss": 0.0705, "theoretical_loss": 3.4095625061064463, "tokens_seen": 2386427904 }, { "epoch": 0.45, "learning_rate": 0.00027958757923453423, "loss": 0.0679, "theoretical_loss": 3.4095475554746044, "tokens_seen": 2386558976 }, { "epoch": 0.45, "learning_rate": 0.00027954746048302974, "loss": 0.0648, "theoretical_loss": 3.4095326058937365, "tokens_seen": 2386690048 }, { "epoch": 0.45, "learning_rate": 0.0002795073417315253, "loss": 0.0661, "theoretical_loss": 3.409517657363711, "tokens_seen": 2386821120 }, { "epoch": 0.45, "learning_rate": 0.0002794672229800209, "loss": 0.0665, "theoretical_loss": 3.4095027098843964, "tokens_seen": 2386952192 }, { "epoch": 0.45, "learning_rate": 0.0002794271042285164, "loss": 0.0698, "theoretical_loss": 3.4094877634556617, "tokens_seen": 2387083264 }, { "epoch": 0.45, "objective/train/advantage_avg": 0.0005475170910358429, "objective/train/docs_used": 869259, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.287144422531128, "objective/train/original_loss": 1.287144422531128, "objective/train/theoretical_loss": 3.4094728180773743, "objective/train/tokens_used": 757738976, "objective/train/value_avg": -0.0063629150390625, "objective/train/value_loss": 8.22754591354169e-05, "objective/train/value_max": -2.7954578399658203e-05, "objective/train/value_min": -0.533203125, "objective/train/value_reward_corr": 0.8033056291361039, "objective/train/value_std": 0.01276397705078125, "objective/train/weight_avg": 1.0005881786346436, "objective/train/weighted_lm_loss": 1.2880747318267822, "objective/train/weights_max": 1.1539610624313354, "objective/train/weights_min": 0.7061864733695984, "theoretical_loss": 3.4094728180773743, "tokens_seen": 2387214336 }, { "epoch": 0.45, "learning_rate": 0.00027938698547701196, "loss": 0.0709, "theoretical_loss": 3.4094728180773743, "tokens_seen": 2387214336 }, { "epoch": 0.45, "learning_rate": 0.00027934686672550753, "loss": 0.0664, "theoretical_loss": 3.4094578737494037, "tokens_seen": 2387345408 }, { "epoch": 0.45, "learning_rate": 0.00027930674797400305, "loss": 0.0695, "theoretical_loss": 3.409442930471618, "tokens_seen": 2387476480 }, { "epoch": 0.45, "learning_rate": 0.0002792666292224986, "loss": 0.0676, "theoretical_loss": 3.4094279882438854, "tokens_seen": 2387607552 }, { "epoch": 0.45, "learning_rate": 0.00027922651047099413, "loss": 0.0638, "theoretical_loss": 3.409413047066075, "tokens_seen": 2387738624 }, { "epoch": 0.45, "learning_rate": 0.0002791863917194897, "loss": 0.0678, "theoretical_loss": 3.409398106938055, "tokens_seen": 2387869696 }, { "epoch": 0.45, "learning_rate": 0.0002791462729679852, "loss": 0.0637, "theoretical_loss": 3.409383167859694, "tokens_seen": 2388000768 }, { "epoch": 0.45, "learning_rate": 0.0002791061542164808, "loss": 0.0664, "theoretical_loss": 3.4093682298308616, "tokens_seen": 2388131840 }, { "epoch": 0.45, "learning_rate": 0.00027906603546497635, "loss": 0.0646, "theoretical_loss": 3.4093532928514256, "tokens_seen": 2388262912 }, { "epoch": 0.45, "learning_rate": 0.00027902591671347186, "loss": 0.0682, "theoretical_loss": 3.4093383569212548, "tokens_seen": 2388393984 }, { "epoch": 0.45, "learning_rate": 0.00027898579796196743, "loss": 0.0691, "theoretical_loss": 3.409323422040218, "tokens_seen": 2388525056 }, { "epoch": 0.45, "learning_rate": 0.000278945679210463, "loss": 0.0676, "theoretical_loss": 3.4093084882081834, "tokens_seen": 2388656128 }, { "epoch": 0.45, "learning_rate": 0.00027890556045895857, "loss": 0.0707, "theoretical_loss": 3.409293555425021, "tokens_seen": 2388787200 }, { "epoch": 0.45, "learning_rate": 0.0002788654417074541, "loss": 0.0718, "theoretical_loss": 3.4092786236905983, "tokens_seen": 2388918272 }, { "epoch": 0.45, "learning_rate": 0.0002788253229559496, "loss": 0.0666, "theoretical_loss": 3.4092636930047853, "tokens_seen": 2389049344 }, { "epoch": 0.45, "learning_rate": 0.00027878520420444516, "loss": 0.0694, "theoretical_loss": 3.4092487633674495, "tokens_seen": 2389180416 }, { "epoch": 0.45, "learning_rate": 0.0002787450854529407, "loss": 0.066, "theoretical_loss": 3.409233834778461, "tokens_seen": 2389311488 }, { "epoch": 0.45, "learning_rate": 0.00027870496670143625, "loss": 0.0643, "theoretical_loss": 3.4092189072376877, "tokens_seen": 2389442560 }, { "epoch": 0.45, "learning_rate": 0.0002786648479499318, "loss": 0.0664, "theoretical_loss": 3.409203980744999, "tokens_seen": 2389573632 }, { "epoch": 0.45, "learning_rate": 0.00027862472919842733, "loss": 0.0676, "theoretical_loss": 3.4091890553002644, "tokens_seen": 2389704704 }, { "epoch": 0.45, "learning_rate": 0.0002785846104469229, "loss": 0.0681, "theoretical_loss": 3.409174130903352, "tokens_seen": 2389835776 }, { "epoch": 0.45, "learning_rate": 0.00027854449169541847, "loss": 0.0656, "theoretical_loss": 3.4091592075541306, "tokens_seen": 2389966848 }, { "epoch": 0.45, "learning_rate": 0.00027850437294391403, "loss": 0.0661, "theoretical_loss": 3.40914428525247, "tokens_seen": 2390097920 }, { "epoch": 0.45, "learning_rate": 0.00027846425419240955, "loss": 0.0645, "theoretical_loss": 3.4091293639982387, "tokens_seen": 2390228992 }, { "epoch": 0.45, "learning_rate": 0.00027842413544090506, "loss": 0.0696, "theoretical_loss": 3.4091144437913057, "tokens_seen": 2390360064 }, { "epoch": 0.45, "objective/train/advantage_avg": 0.0013288272311910987, "objective/train/docs_used": 870459, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.28444242477417, "objective/train/original_loss": 1.2844421863555908, "objective/train/theoretical_loss": 3.4090995246315403, "objective/train/tokens_used": 761015776, "objective/train/value_avg": -0.011444091796875, "objective/train/value_loss": 0.00017145564197562635, "objective/train/value_max": -7.033348083496094e-05, "objective/train/value_min": -0.273681640625, "objective/train/value_reward_corr": 0.7769859292321705, "objective/train/value_std": 0.0173187255859375, "objective/train/weight_avg": 1.0014128684997559, "objective/train/weighted_lm_loss": 1.2850292921066284, "objective/train/weights_max": 1.2937599420547485, "objective/train/weights_min": 0.6101974248886108, "theoretical_loss": 3.4090995246315403, "tokens_seen": 2390491136 }, { "epoch": 0.45, "learning_rate": 0.00027838401668940063, "loss": 0.0663, "theoretical_loss": 3.4090995246315403, "tokens_seen": 2390491136 }, { "epoch": 0.45, "learning_rate": 0.00027834389793789615, "loss": 0.0685, "theoretical_loss": 3.4090846065188116, "tokens_seen": 2390622208 }, { "epoch": 0.45, "learning_rate": 0.0002783037791863917, "loss": 0.0694, "theoretical_loss": 3.4090696894529886, "tokens_seen": 2390753280 }, { "epoch": 0.45, "learning_rate": 0.0002782636604348873, "loss": 0.0659, "theoretical_loss": 3.409054773433941, "tokens_seen": 2390884352 }, { "epoch": 0.45, "learning_rate": 0.0002782235416833828, "loss": 0.0646, "theoretical_loss": 3.409039858461537, "tokens_seen": 2391015424 }, { "epoch": 0.45, "learning_rate": 0.00027818342293187836, "loss": 0.0645, "theoretical_loss": 3.409024944535646, "tokens_seen": 2391146496 }, { "epoch": 0.45, "learning_rate": 0.00027814330418037393, "loss": 0.0698, "theoretical_loss": 3.4090100316561385, "tokens_seen": 2391277568 }, { "epoch": 0.45, "learning_rate": 0.0002781031854288695, "loss": 0.0641, "theoretical_loss": 3.4089951198228814, "tokens_seen": 2391408640 }, { "epoch": 0.45, "learning_rate": 0.000278063066677365, "loss": 0.0684, "theoretical_loss": 3.408980209035746, "tokens_seen": 2391539712 }, { "epoch": 0.45, "learning_rate": 0.00027802294792586053, "loss": 0.0672, "theoretical_loss": 3.408965299294601, "tokens_seen": 2391670784 }, { "epoch": 0.45, "learning_rate": 0.0002779828291743561, "loss": 0.0666, "theoretical_loss": 3.4089503905993155, "tokens_seen": 2391801856 }, { "epoch": 0.45, "learning_rate": 0.0002779427104228516, "loss": 0.0691, "theoretical_loss": 3.4089354829497585, "tokens_seen": 2391932928 }, { "epoch": 0.45, "learning_rate": 0.0002779025916713472, "loss": 0.0686, "theoretical_loss": 3.4089205763458, "tokens_seen": 2392064000 }, { "epoch": 0.45, "learning_rate": 0.00027786247291984275, "loss": 0.0651, "theoretical_loss": 3.408905670787309, "tokens_seen": 2392195072 }, { "epoch": 0.45, "learning_rate": 0.00027782235416833826, "loss": 0.0676, "theoretical_loss": 3.408890766274155, "tokens_seen": 2392326144 }, { "epoch": 0.45, "learning_rate": 0.00027778223541683383, "loss": 0.0671, "theoretical_loss": 3.408875862806208, "tokens_seen": 2392457216 }, { "epoch": 0.45, "learning_rate": 0.0002777421166653294, "loss": 0.0661, "theoretical_loss": 3.408860960383336, "tokens_seen": 2392588288 }, { "epoch": 0.45, "learning_rate": 0.00027770199791382497, "loss": 0.0661, "theoretical_loss": 3.40884605900541, "tokens_seen": 2392719360 }, { "epoch": 0.45, "learning_rate": 0.0002776618791623205, "loss": 0.0635, "theoretical_loss": 3.4088311586722986, "tokens_seen": 2392850432 }, { "epoch": 0.45, "learning_rate": 0.00027762176041081605, "loss": 0.0676, "theoretical_loss": 3.4088162593838716, "tokens_seen": 2392981504 }, { "epoch": 0.45, "learning_rate": 0.00027758164165931157, "loss": 0.0697, "theoretical_loss": 3.4088013611399983, "tokens_seen": 2393112576 }, { "epoch": 0.45, "learning_rate": 0.0002775415229078071, "loss": 0.0648, "theoretical_loss": 3.4087864639405487, "tokens_seen": 2393243648 }, { "epoch": 0.45, "learning_rate": 0.00027750140415630265, "loss": 0.0659, "theoretical_loss": 3.408771567785392, "tokens_seen": 2393374720 }, { "epoch": 0.45, "learning_rate": 0.0002774612854047982, "loss": 0.0682, "theoretical_loss": 3.4087566726743983, "tokens_seen": 2393505792 }, { "epoch": 0.45, "learning_rate": 0.00027742116665329373, "loss": 0.0674, "theoretical_loss": 3.4087417786074363, "tokens_seen": 2393636864 }, { "epoch": 0.45, "objective/train/advantage_avg": 0.0006596501334570348, "objective/train/docs_used": 871588, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2322224378585815, "objective/train/original_loss": 1.232222557067871, "objective/train/theoretical_loss": 3.4087268855843766, "objective/train/tokens_used": 764292576, "objective/train/value_avg": -0.006641387939453125, "objective/train/value_loss": 0.00020770682021975517, "objective/train/value_max": -5.829334259033203e-05, "objective/train/value_min": -0.9267578125, "objective/train/value_reward_corr": 0.7277167418212773, "objective/train/value_std": 0.018341064453125, "objective/train/weight_avg": 1.0007586479187012, "objective/train/weighted_lm_loss": 1.233628273010254, "objective/train/weights_max": 2.447364568710327, "objective/train/weights_min": 0.373849481344223, "theoretical_loss": 3.4087268855843766, "tokens_seen": 2393767936 }, { "epoch": 0.45, "learning_rate": 0.0002773810479017893, "loss": 0.0675, "theoretical_loss": 3.4087268855843766, "tokens_seen": 2393767936 }, { "epoch": 0.45, "learning_rate": 0.00027734092915028487, "loss": 0.066, "theoretical_loss": 3.4087119936050887, "tokens_seen": 2393899008 }, { "epoch": 0.45, "learning_rate": 0.00027730081039878044, "loss": 0.0637, "theoretical_loss": 3.408697102669442, "tokens_seen": 2394030080 }, { "epoch": 0.45, "learning_rate": 0.00027726069164727595, "loss": 0.0647, "theoretical_loss": 3.4086822127773067, "tokens_seen": 2394161152 }, { "epoch": 0.45, "learning_rate": 0.0002772205728957715, "loss": 0.0649, "theoretical_loss": 3.408667323928552, "tokens_seen": 2394292224 }, { "epoch": 0.45, "learning_rate": 0.00027718045414426703, "loss": 0.0712, "theoretical_loss": 3.4086524361230475, "tokens_seen": 2394423296 }, { "epoch": 0.45, "learning_rate": 0.00027714033539276255, "loss": 0.0654, "theoretical_loss": 3.4086375493606638, "tokens_seen": 2394554368 }, { "epoch": 0.45, "learning_rate": 0.0002771002166412581, "loss": 0.0654, "theoretical_loss": 3.40862266364127, "tokens_seen": 2394685440 }, { "epoch": 0.45, "learning_rate": 0.0002770600978897537, "loss": 0.0659, "theoretical_loss": 3.408607778964737, "tokens_seen": 2394816512 }, { "epoch": 0.45, "learning_rate": 0.0002770199791382492, "loss": 0.064, "theoretical_loss": 3.4085928953309335, "tokens_seen": 2394947584 }, { "epoch": 0.45, "learning_rate": 0.00027697986038674477, "loss": 0.0663, "theoretical_loss": 3.4085780127397296, "tokens_seen": 2395078656 }, { "epoch": 0.45, "learning_rate": 0.00027693974163524033, "loss": 0.0664, "theoretical_loss": 3.408563131190996, "tokens_seen": 2395209728 }, { "epoch": 0.45, "learning_rate": 0.0002768996228837359, "loss": 0.0669, "theoretical_loss": 3.408548250684601, "tokens_seen": 2395340800 }, { "epoch": 0.45, "learning_rate": 0.0002768595041322314, "loss": 0.0636, "theoretical_loss": 3.408533371220417, "tokens_seen": 2395471872 }, { "epoch": 0.45, "learning_rate": 0.000276819385380727, "loss": 0.0659, "theoretical_loss": 3.4085184927983114, "tokens_seen": 2395602944 }, { "epoch": 0.45, "learning_rate": 0.0002767792666292225, "loss": 0.0694, "theoretical_loss": 3.408503615418156, "tokens_seen": 2395734016 }, { "epoch": 0.45, "learning_rate": 0.000276739147877718, "loss": 0.0672, "theoretical_loss": 3.4084887390798206, "tokens_seen": 2395865088 }, { "epoch": 0.45, "learning_rate": 0.0002766990291262136, "loss": 0.0665, "theoretical_loss": 3.4084738637831746, "tokens_seen": 2395996160 }, { "epoch": 0.45, "learning_rate": 0.00027665891037470915, "loss": 0.0685, "theoretical_loss": 3.4084589895280883, "tokens_seen": 2396127232 }, { "epoch": 0.45, "learning_rate": 0.00027661879162320466, "loss": 0.0717, "theoretical_loss": 3.408444116314432, "tokens_seen": 2396258304 }, { "epoch": 0.45, "learning_rate": 0.00027657867287170023, "loss": 0.0678, "theoretical_loss": 3.408429244142076, "tokens_seen": 2396389376 }, { "epoch": 0.45, "learning_rate": 0.0002765385541201958, "loss": 0.0686, "theoretical_loss": 3.4084143730108893, "tokens_seen": 2396520448 }, { "epoch": 0.45, "learning_rate": 0.00027649843536869137, "loss": 0.0656, "theoretical_loss": 3.408399502920744, "tokens_seen": 2396651520 }, { "epoch": 0.45, "learning_rate": 0.0002764583166171869, "loss": 0.0689, "theoretical_loss": 3.408384633871509, "tokens_seen": 2396782592 }, { "epoch": 0.45, "learning_rate": 0.00027641819786568245, "loss": 0.0647, "theoretical_loss": 3.4083697658630543, "tokens_seen": 2396913664 }, { "epoch": 0.45, "objective/train/advantage_avg": 0.0005234293639659882, "objective/train/docs_used": 872754, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.261536955833435, "objective/train/original_loss": 1.2615368366241455, "objective/train/theoretical_loss": 3.408354898895251, "objective/train/tokens_used": 767569376, "objective/train/value_avg": -0.007442474365234375, "objective/train/value_loss": 0.00017944452702067792, "objective/train/value_max": -4.506111145019531e-05, "objective/train/value_min": -0.2381591796875, "objective/train/value_reward_corr": 0.6822720446961085, "objective/train/value_std": 0.0137786865234375, "objective/train/weight_avg": 1.0006047487258911, "objective/train/weighted_lm_loss": 1.2623876333236694, "objective/train/weights_max": 1.1486068964004517, "objective/train/weights_min": 0.36834320425987244, "theoretical_loss": 3.408354898895251, "tokens_seen": 2397044736 }, { "epoch": 0.45, "learning_rate": 0.00027637807911417797, "loss": 0.0691, "theoretical_loss": 3.408354898895251, "tokens_seen": 2397044736 }, { "epoch": 0.45, "learning_rate": 0.0002763379603626735, "loss": 0.0679, "theoretical_loss": 3.4083400329679687, "tokens_seen": 2397175808 }, { "epoch": 0.45, "learning_rate": 0.00027629784161116905, "loss": 0.0694, "theoretical_loss": 3.408325168081078, "tokens_seen": 2397306880 }, { "epoch": 0.45, "learning_rate": 0.0002762577228596646, "loss": 0.0682, "theoretical_loss": 3.408310304234449, "tokens_seen": 2397437952 }, { "epoch": 0.45, "learning_rate": 0.0002762176041081602, "loss": 0.0673, "theoretical_loss": 3.4082954414279523, "tokens_seen": 2397569024 }, { "epoch": 0.45, "learning_rate": 0.0002761774853566557, "loss": 0.0666, "theoretical_loss": 3.4082805796614584, "tokens_seen": 2397700096 }, { "epoch": 0.45, "learning_rate": 0.00027613736660515127, "loss": 0.0677, "theoretical_loss": 3.408265718934837, "tokens_seen": 2397831168 }, { "epoch": 0.45, "learning_rate": 0.00027609724785364684, "loss": 0.0694, "theoretical_loss": 3.4082508592479597, "tokens_seen": 2397962240 }, { "epoch": 0.45, "learning_rate": 0.00027605712910214235, "loss": 0.068, "theoretical_loss": 3.4082360006006955, "tokens_seen": 2398093312 }, { "epoch": 0.45, "learning_rate": 0.0002760170103506379, "loss": 0.0633, "theoretical_loss": 3.4082211429929155, "tokens_seen": 2398224384 }, { "epoch": 0.45, "learning_rate": 0.00027597689159913343, "loss": 0.065, "theoretical_loss": 3.4082062864244906, "tokens_seen": 2398355456 }, { "epoch": 0.45, "learning_rate": 0.00027593677284762895, "loss": 0.0655, "theoretical_loss": 3.4081914308952905, "tokens_seen": 2398486528 }, { "epoch": 0.45, "learning_rate": 0.0002758966540961245, "loss": 0.0671, "theoretical_loss": 3.4081765764051863, "tokens_seen": 2398617600 }, { "epoch": 0.45, "learning_rate": 0.0002758565353446201, "loss": 0.0679, "theoretical_loss": 3.4081617229540484, "tokens_seen": 2398748672 }, { "epoch": 0.45, "learning_rate": 0.00027581641659311565, "loss": 0.0656, "theoretical_loss": 3.4081468705417475, "tokens_seen": 2398879744 }, { "epoch": 0.45, "learning_rate": 0.00027577629784161117, "loss": 0.0642, "theoretical_loss": 3.4081320191681534, "tokens_seen": 2399010816 }, { "epoch": 0.45, "learning_rate": 0.00027573617909010674, "loss": 0.0632, "theoretical_loss": 3.408117168833138, "tokens_seen": 2399141888 }, { "epoch": 0.45, "learning_rate": 0.0002756960603386023, "loss": 0.0638, "theoretical_loss": 3.408102319536571, "tokens_seen": 2399272960 }, { "epoch": 0.45, "learning_rate": 0.0002756559415870978, "loss": 0.0686, "theoretical_loss": 3.408087471278323, "tokens_seen": 2399404032 }, { "epoch": 0.45, "learning_rate": 0.0002756158228355934, "loss": 0.0665, "theoretical_loss": 3.4080726240582653, "tokens_seen": 2399535104 }, { "epoch": 0.45, "learning_rate": 0.0002755757040840889, "loss": 0.0664, "theoretical_loss": 3.4080577778762686, "tokens_seen": 2399666176 }, { "epoch": 0.45, "learning_rate": 0.0002755355853325844, "loss": 0.0676, "theoretical_loss": 3.4080429327322026, "tokens_seen": 2399797248 }, { "epoch": 0.45, "learning_rate": 0.00027549546658108, "loss": 0.0671, "theoretical_loss": 3.4080280886259393, "tokens_seen": 2399928320 }, { "epoch": 0.45, "learning_rate": 0.00027545534782957555, "loss": 0.0641, "theoretical_loss": 3.408013245557349, "tokens_seen": 2400059392 }, { "epoch": 0.45, "learning_rate": 0.0002754152290780711, "loss": 0.0687, "theoretical_loss": 3.4079984035263022, "tokens_seen": 2400190464 }, { "epoch": 0.45, "objective/train/advantage_avg": 0.0008174733375199139, "objective/train/docs_used": 873951, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3719667196273804, "objective/train/original_loss": 1.3719666004180908, "objective/train/theoretical_loss": 3.40798356253267, "objective/train/tokens_used": 770846176, "objective/train/value_avg": -0.00582122802734375, "objective/train/value_loss": 7.961410301504657e-05, "objective/train/value_max": -4.684925079345703e-05, "objective/train/value_min": -0.1878662109375, "objective/train/value_reward_corr": 0.7513360226641863, "objective/train/value_std": 0.0098114013671875, "objective/train/weight_avg": 1.000856876373291, "objective/train/weighted_lm_loss": 1.3737499713897705, "objective/train/weights_max": 1.1281108856201172, "objective/train/weights_min": 0.7803025245666504, "theoretical_loss": 3.40798356253267, "tokens_seen": 2400321536 }, { "epoch": 0.45, "learning_rate": 0.00027537511032656663, "loss": 0.0712, "theoretical_loss": 3.40798356253267, "tokens_seen": 2400321536 }, { "epoch": 0.45, "learning_rate": 0.0002753349915750622, "loss": 0.0663, "theoretical_loss": 3.4079687225763236, "tokens_seen": 2400452608 }, { "epoch": 0.45, "learning_rate": 0.00027529487282355777, "loss": 0.0689, "theoretical_loss": 3.4079538836571333, "tokens_seen": 2400583680 }, { "epoch": 0.46, "learning_rate": 0.0002752547540720533, "loss": 0.07, "theoretical_loss": 3.40793904577497, "tokens_seen": 2400714752 }, { "epoch": 0.46, "learning_rate": 0.00027521463532054885, "loss": 0.0678, "theoretical_loss": 3.407924208929705, "tokens_seen": 2400845824 }, { "epoch": 0.46, "learning_rate": 0.00027517451656904437, "loss": 0.0645, "theoretical_loss": 3.4079093731212087, "tokens_seen": 2400976896 }, { "epoch": 0.46, "learning_rate": 0.0002751343978175399, "loss": 0.0674, "theoretical_loss": 3.4078945383493524, "tokens_seen": 2401107968 }, { "epoch": 0.46, "learning_rate": 0.00027509427906603545, "loss": 0.0671, "theoretical_loss": 3.4078797046140075, "tokens_seen": 2401239040 }, { "epoch": 0.46, "learning_rate": 0.000275054160314531, "loss": 0.0681, "theoretical_loss": 3.407864871915044, "tokens_seen": 2401370112 }, { "epoch": 0.46, "learning_rate": 0.0002750140415630266, "loss": 0.0675, "theoretical_loss": 3.4078500402523346, "tokens_seen": 2401501184 }, { "epoch": 0.46, "learning_rate": 0.0002749739228115221, "loss": 0.0679, "theoretical_loss": 3.4078352096257483, "tokens_seen": 2401632256 }, { "epoch": 0.46, "learning_rate": 0.00027493380406001767, "loss": 0.0663, "theoretical_loss": 3.407820380035157, "tokens_seen": 2401763328 }, { "epoch": 0.46, "learning_rate": 0.00027489368530851324, "loss": 0.0684, "theoretical_loss": 3.4078055514804326, "tokens_seen": 2401894400 }, { "epoch": 0.46, "learning_rate": 0.00027485356655700875, "loss": 0.0669, "theoretical_loss": 3.4077907239614453, "tokens_seen": 2402025472 }, { "epoch": 0.46, "learning_rate": 0.0002748134478055043, "loss": 0.0692, "theoretical_loss": 3.407775897478067, "tokens_seen": 2402156544 }, { "epoch": 0.46, "learning_rate": 0.00027477332905399983, "loss": 0.0738, "theoretical_loss": 3.4077610720301674, "tokens_seen": 2402287616 }, { "epoch": 0.46, "learning_rate": 0.00027473321030249535, "loss": 0.0736, "theoretical_loss": 3.4077462476176192, "tokens_seen": 2402418688 }, { "epoch": 0.46, "learning_rate": 0.0002746930915509909, "loss": 0.0662, "theoretical_loss": 3.407731424240293, "tokens_seen": 2402549760 }, { "epoch": 0.46, "learning_rate": 0.0002746529727994865, "loss": 0.0701, "theoretical_loss": 3.4077166018980605, "tokens_seen": 2402680832 }, { "epoch": 0.46, "learning_rate": 0.00027461285404798205, "loss": 0.0678, "theoretical_loss": 3.4077017805907923, "tokens_seen": 2402811904 }, { "epoch": 0.46, "learning_rate": 0.00027457273529647757, "loss": 0.065, "theoretical_loss": 3.40768696031836, "tokens_seen": 2402942976 }, { "epoch": 0.46, "learning_rate": 0.00027453261654497314, "loss": 0.0668, "theoretical_loss": 3.4076721410806345, "tokens_seen": 2403074048 }, { "epoch": 0.46, "learning_rate": 0.0002744924977934687, "loss": 0.0694, "theoretical_loss": 3.407657322877488, "tokens_seen": 2403205120 }, { "epoch": 0.46, "learning_rate": 0.0002744523790419642, "loss": 0.0686, "theoretical_loss": 3.407642505708791, "tokens_seen": 2403336192 }, { "epoch": 0.46, "learning_rate": 0.0002744122602904598, "loss": 0.0747, "theoretical_loss": 3.407627689574415, "tokens_seen": 2403467264 }, { "epoch": 0.46, "objective/train/advantage_avg": -4.663151776185259e-05, "objective/train/docs_used": 875100, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2525273561477661, "objective/train/original_loss": 1.2525272369384766, "objective/train/theoretical_loss": 3.4076128744742324, "objective/train/tokens_used": 774122976, "objective/train/value_avg": -0.00701141357421875, "objective/train/value_loss": 0.0003649452410172671, "objective/train/value_max": -3.594160079956055e-05, "objective/train/value_min": -0.9375, "objective/train/value_reward_corr": 0.7430437905733376, "objective/train/value_std": 0.019683837890625, "objective/train/weight_avg": 1.0001128911972046, "objective/train/weighted_lm_loss": 1.252402901649475, "objective/train/weights_max": 1.714398980140686, "objective/train/weights_min": 0.36900386214256287, "theoretical_loss": 3.4076128744742324, "tokens_seen": 2403598336 }, { "epoch": 0.46, "learning_rate": 0.0002743721415389553, "loss": 0.0664, "theoretical_loss": 3.4076128744742324, "tokens_seen": 2403598336 }, { "epoch": 0.46, "learning_rate": 0.0002743320227874508, "loss": 0.0677, "theoretical_loss": 3.4075980604081133, "tokens_seen": 2403729408 }, { "epoch": 0.46, "learning_rate": 0.0002742919040359464, "loss": 0.0671, "theoretical_loss": 3.4075832473759298, "tokens_seen": 2403860480 }, { "epoch": 0.46, "learning_rate": 0.00027425178528444195, "loss": 0.068, "theoretical_loss": 3.407568435377553, "tokens_seen": 2403991552 }, { "epoch": 0.46, "learning_rate": 0.0002742116665329375, "loss": 0.0714, "theoretical_loss": 3.4075536244128553, "tokens_seen": 2404122624 }, { "epoch": 0.46, "learning_rate": 0.00027417154778143303, "loss": 0.0706, "theoretical_loss": 3.407538814481707, "tokens_seen": 2404253696 }, { "epoch": 0.46, "learning_rate": 0.0002741314290299286, "loss": 0.0724, "theoretical_loss": 3.4075240055839804, "tokens_seen": 2404384768 }, { "epoch": 0.46, "learning_rate": 0.00027409131027842417, "loss": 0.0681, "theoretical_loss": 3.4075091977195466, "tokens_seen": 2404515840 }, { "epoch": 0.46, "learning_rate": 0.0002740511915269197, "loss": 0.069, "theoretical_loss": 3.407494390888278, "tokens_seen": 2404646912 }, { "epoch": 0.46, "learning_rate": 0.00027401107277541525, "loss": 0.0654, "theoretical_loss": 3.4074795850900452, "tokens_seen": 2404777984 }, { "epoch": 0.46, "learning_rate": 0.00027397095402391077, "loss": 0.0709, "theoretical_loss": 3.4074647803247204, "tokens_seen": 2404909056 }, { "epoch": 0.46, "learning_rate": 0.0002739308352724063, "loss": 0.0647, "theoretical_loss": 3.407449976592175, "tokens_seen": 2405040128 }, { "epoch": 0.46, "learning_rate": 0.00027389071652090185, "loss": 0.0715, "theoretical_loss": 3.407435173892281, "tokens_seen": 2405171200 }, { "epoch": 0.46, "learning_rate": 0.0002738505977693974, "loss": 0.0735, "theoretical_loss": 3.4074203722249097, "tokens_seen": 2405302272 }, { "epoch": 0.46, "learning_rate": 0.000273810479017893, "loss": 0.0652, "theoretical_loss": 3.407405571589933, "tokens_seen": 2405433344 }, { "epoch": 0.46, "learning_rate": 0.0002737703602663885, "loss": 0.0697, "theoretical_loss": 3.407390771987223, "tokens_seen": 2405564416 }, { "epoch": 0.46, "learning_rate": 0.00027373024151488407, "loss": 0.0684, "theoretical_loss": 3.4073759734166504, "tokens_seen": 2405695488 }, { "epoch": 0.46, "learning_rate": 0.00027369012276337964, "loss": 0.068, "theoretical_loss": 3.4073611758780884, "tokens_seen": 2405826560 }, { "epoch": 0.46, "learning_rate": 0.00027365000401187515, "loss": 0.0684, "theoretical_loss": 3.4073463793714076, "tokens_seen": 2405957632 }, { "epoch": 0.46, "learning_rate": 0.0002736098852603707, "loss": 0.0661, "theoretical_loss": 3.4073315838964806, "tokens_seen": 2406088704 }, { "epoch": 0.46, "learning_rate": 0.00027356976650886624, "loss": 0.0642, "theoretical_loss": 3.407316789453179, "tokens_seen": 2406219776 }, { "epoch": 0.46, "learning_rate": 0.0002735296477573618, "loss": 0.067, "theoretical_loss": 3.407301996041375, "tokens_seen": 2406350848 }, { "epoch": 0.46, "learning_rate": 0.0002734895290058573, "loss": 0.0696, "theoretical_loss": 3.40728720366094, "tokens_seen": 2406481920 }, { "epoch": 0.46, "learning_rate": 0.0002734494102543529, "loss": 0.0672, "theoretical_loss": 3.407272412311746, "tokens_seen": 2406612992 }, { "epoch": 0.46, "learning_rate": 0.00027340929150284845, "loss": 0.0639, "theoretical_loss": 3.407257621993665, "tokens_seen": 2406744064 }, { "epoch": 0.46, "objective/train/advantage_avg": 0.0007847966626286507, "objective/train/docs_used": 876268, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3708107471466064, "objective/train/original_loss": 1.3708107471466064, "objective/train/theoretical_loss": 3.4072428327065696, "objective/train/tokens_used": 777399776, "objective/train/value_avg": -0.01053619384765625, "objective/train/value_loss": 0.0003420967550482601, "objective/train/value_max": -9.840726852416992e-05, "objective/train/value_min": -0.75341796875, "objective/train/value_reward_corr": 0.6833843758198733, "objective/train/value_std": 0.018951416015625, "objective/train/weight_avg": 1.0009347200393677, "objective/train/weighted_lm_loss": 1.3711421489715576, "objective/train/weights_max": 1.2651888132095337, "objective/train/weights_min": 0.3709288537502289, "theoretical_loss": 3.4072428327065696, "tokens_seen": 2406875136 }, { "epoch": 0.46, "learning_rate": 0.00027336917275134397, "loss": 0.0681, "theoretical_loss": 3.4072428327065696, "tokens_seen": 2406875136 }, { "epoch": 0.46, "learning_rate": 0.00027332905399983954, "loss": 0.0669, "theoretical_loss": 3.4072280444503305, "tokens_seen": 2407006208 }, { "epoch": 0.46, "learning_rate": 0.0002732889352483351, "loss": 0.0688, "theoretical_loss": 3.4072132572248206, "tokens_seen": 2407137280 }, { "epoch": 0.46, "learning_rate": 0.0002732488164968306, "loss": 0.0759, "theoretical_loss": 3.407198471029912, "tokens_seen": 2407268352 }, { "epoch": 0.46, "learning_rate": 0.0002732086977453262, "loss": 0.0675, "theoretical_loss": 3.4071836858654767, "tokens_seen": 2407399424 }, { "epoch": 0.46, "learning_rate": 0.0002731685789938217, "loss": 0.072, "theoretical_loss": 3.4071689017313864, "tokens_seen": 2407530496 }, { "epoch": 0.46, "learning_rate": 0.00027312846024231727, "loss": 0.0711, "theoretical_loss": 3.4071541186275134, "tokens_seen": 2407661568 }, { "epoch": 0.46, "learning_rate": 0.0002730883414908128, "loss": 0.067, "theoretical_loss": 3.40713933655373, "tokens_seen": 2407792640 }, { "epoch": 0.46, "learning_rate": 0.00027304822273930835, "loss": 0.0667, "theoretical_loss": 3.4071245555099083, "tokens_seen": 2407923712 }, { "epoch": 0.46, "learning_rate": 0.0002730081039878039, "loss": 0.0668, "theoretical_loss": 3.40710977549592, "tokens_seen": 2408054784 }, { "epoch": 0.46, "learning_rate": 0.00027296798523629944, "loss": 0.07, "theoretical_loss": 3.4070949965116384, "tokens_seen": 2408185856 }, { "epoch": 0.46, "learning_rate": 0.000272927866484795, "loss": 0.0663, "theoretical_loss": 3.4070802185569344, "tokens_seen": 2408316928 }, { "epoch": 0.46, "learning_rate": 0.00027288774773329057, "loss": 0.0702, "theoretical_loss": 3.4070654416316812, "tokens_seen": 2408448000 }, { "epoch": 0.46, "learning_rate": 0.0002728476289817861, "loss": 0.0686, "theoretical_loss": 3.407050665735751, "tokens_seen": 2408579072 }, { "epoch": 0.46, "learning_rate": 0.00027280751023028166, "loss": 0.0663, "theoretical_loss": 3.4070358908690155, "tokens_seen": 2408710144 }, { "epoch": 0.46, "learning_rate": 0.00027276739147877717, "loss": 0.0695, "theoretical_loss": 3.407021117031347, "tokens_seen": 2408841216 }, { "epoch": 0.46, "learning_rate": 0.00027272727272727274, "loss": 0.0641, "theoretical_loss": 3.407006344222619, "tokens_seen": 2408972288 }, { "epoch": 0.46, "learning_rate": 0.00027268715397576825, "loss": 0.0692, "theoretical_loss": 3.4069915724427027, "tokens_seen": 2409103360 }, { "epoch": 0.46, "learning_rate": 0.0002726470352242638, "loss": 0.0741, "theoretical_loss": 3.4069768016914708, "tokens_seen": 2409234432 }, { "epoch": 0.46, "learning_rate": 0.0002726069164727594, "loss": 0.0683, "theoretical_loss": 3.4069620319687957, "tokens_seen": 2409365504 }, { "epoch": 0.46, "learning_rate": 0.0002725667977212549, "loss": 0.0723, "theoretical_loss": 3.40694726327455, "tokens_seen": 2409496576 }, { "epoch": 0.46, "learning_rate": 0.00027252667896975047, "loss": 0.0706, "theoretical_loss": 3.406932495608606, "tokens_seen": 2409627648 }, { "epoch": 0.46, "learning_rate": 0.00027248656021824604, "loss": 0.0685, "theoretical_loss": 3.4069177289708357, "tokens_seen": 2409758720 }, { "epoch": 0.46, "learning_rate": 0.00027244644146674155, "loss": 0.0656, "theoretical_loss": 3.4069029633611128, "tokens_seen": 2409889792 }, { "epoch": 0.46, "learning_rate": 0.0002724063227152371, "loss": 0.0685, "theoretical_loss": 3.406888198779309, "tokens_seen": 2410020864 }, { "epoch": 0.46, "objective/train/advantage_avg": 0.0005908664898015559, "objective/train/docs_used": 877499, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.477681040763855, "objective/train/original_loss": 1.4776809215545654, "objective/train/theoretical_loss": 3.4068734352252967, "objective/train/tokens_used": 780676576, "objective/train/value_avg": -0.01120758056640625, "objective/train/value_loss": 0.0003859704884234816, "objective/train/value_max": -2.8848648071289062e-05, "objective/train/value_min": -0.36376953125, "objective/train/value_reward_corr": 0.7649602235268136, "objective/train/value_std": 0.0290069580078125, "objective/train/weight_avg": 1.0007859468460083, "objective/train/weighted_lm_loss": 1.480027198791504, "objective/train/weights_max": 1.3976384401321411, "objective/train/weights_min": 0.7180020213127136, "theoretical_loss": 3.4068734352252967, "tokens_seen": 2410151936 }, { "epoch": 0.46, "learning_rate": 0.00027236620396373264, "loss": 0.0705, "theoretical_loss": 3.4068734352252967, "tokens_seen": 2410151936 }, { "epoch": 0.46, "learning_rate": 0.0002723260852122282, "loss": 0.0697, "theoretical_loss": 3.4068586726989487, "tokens_seen": 2410283008 }, { "epoch": 0.46, "learning_rate": 0.0002722859664607237, "loss": 0.0679, "theoretical_loss": 3.406843911200138, "tokens_seen": 2410414080 }, { "epoch": 0.46, "learning_rate": 0.0002722458477092193, "loss": 0.0731, "theoretical_loss": 3.4068291507287363, "tokens_seen": 2410545152 }, { "epoch": 0.46, "learning_rate": 0.00027220572895771486, "loss": 0.0715, "theoretical_loss": 3.406814391284617, "tokens_seen": 2410676224 }, { "epoch": 0.46, "learning_rate": 0.00027216561020621037, "loss": 0.0704, "theoretical_loss": 3.406799632867652, "tokens_seen": 2410807296 }, { "epoch": 0.46, "learning_rate": 0.00027212549145470594, "loss": 0.0649, "theoretical_loss": 3.4067848754777152, "tokens_seen": 2410938368 }, { "epoch": 0.46, "learning_rate": 0.0002720853727032015, "loss": 0.0717, "theoretical_loss": 3.406770119114678, "tokens_seen": 2411069440 }, { "epoch": 0.46, "learning_rate": 0.000272045253951697, "loss": 0.0667, "theoretical_loss": 3.4067553637784145, "tokens_seen": 2411200512 }, { "epoch": 0.46, "learning_rate": 0.0002720051352001926, "loss": 0.0714, "theoretical_loss": 3.406740609468796, "tokens_seen": 2411331584 }, { "epoch": 0.46, "learning_rate": 0.0002719650164486881, "loss": 0.0668, "theoretical_loss": 3.406725856185696, "tokens_seen": 2411462656 }, { "epoch": 0.46, "learning_rate": 0.00027192489769718367, "loss": 0.0666, "theoretical_loss": 3.4067111039289877, "tokens_seen": 2411593728 }, { "epoch": 0.46, "learning_rate": 0.0002718847789456792, "loss": 0.0706, "theoretical_loss": 3.4066963526985425, "tokens_seen": 2411724800 }, { "epoch": 0.46, "learning_rate": 0.00027184466019417475, "loss": 0.0645, "theoretical_loss": 3.406681602494235, "tokens_seen": 2411855872 }, { "epoch": 0.46, "learning_rate": 0.0002718045414426703, "loss": 0.0674, "theoretical_loss": 3.4066668533159365, "tokens_seen": 2411986944 }, { "epoch": 0.46, "learning_rate": 0.00027176442269116584, "loss": 0.0674, "theoretical_loss": 3.4066521051635212, "tokens_seen": 2412118016 }, { "epoch": 0.46, "learning_rate": 0.0002717243039396614, "loss": 0.0687, "theoretical_loss": 3.4066373580368614, "tokens_seen": 2412249088 }, { "epoch": 0.46, "learning_rate": 0.000271684185188157, "loss": 0.0702, "theoretical_loss": 3.4066226119358296, "tokens_seen": 2412380160 }, { "epoch": 0.46, "learning_rate": 0.0002716440664366525, "loss": 0.0699, "theoretical_loss": 3.4066078668602997, "tokens_seen": 2412511232 }, { "epoch": 0.46, "learning_rate": 0.00027160394768514806, "loss": 0.0747, "theoretical_loss": 3.406593122810144, "tokens_seen": 2412642304 }, { "epoch": 0.46, "learning_rate": 0.00027156382893364357, "loss": 0.0727, "theoretical_loss": 3.4065783797852354, "tokens_seen": 2412773376 }, { "epoch": 0.46, "learning_rate": 0.00027152371018213914, "loss": 0.0695, "theoretical_loss": 3.4065636377854474, "tokens_seen": 2412904448 }, { "epoch": 0.46, "learning_rate": 0.00027148359143063465, "loss": 0.0641, "theoretical_loss": 3.4065488968106528, "tokens_seen": 2413035520 }, { "epoch": 0.46, "learning_rate": 0.0002714434726791302, "loss": 0.0644, "theoretical_loss": 3.406534156860724, "tokens_seen": 2413166592 }, { "epoch": 0.46, "learning_rate": 0.0002714033539276258, "loss": 0.0654, "theoretical_loss": 3.406519417935536, "tokens_seen": 2413297664 }, { "epoch": 0.46, "objective/train/advantage_avg": -0.00045103859156370163, "objective/train/docs_used": 878709, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.308925986289978, "objective/train/original_loss": 1.3089261054992676, "objective/train/theoretical_loss": 3.40650468003496, "objective/train/tokens_used": 783953376, "objective/train/value_avg": -0.006046295166015625, "objective/train/value_loss": 0.00015751883620396256, "objective/train/value_max": -4.07099723815918e-05, "objective/train/value_min": -0.966796875, "objective/train/value_reward_corr": 0.5685158656165719, "objective/train/value_std": 0.01245880126953125, "objective/train/weight_avg": 0.9996342658996582, "objective/train/weighted_lm_loss": 1.3085368871688843, "objective/train/weights_max": 2.6295082569122314, "objective/train/weights_min": 0.7429630756378174, "theoretical_loss": 3.40650468003496, "tokens_seen": 2413428736 }, { "epoch": 0.46, "learning_rate": 0.0002713632351761213, "loss": 0.068, "theoretical_loss": 3.40650468003496, "tokens_seen": 2413428736 }, { "epoch": 0.46, "learning_rate": 0.00027132311642461687, "loss": 0.07, "theoretical_loss": 3.4064899431588698, "tokens_seen": 2413559808 }, { "epoch": 0.46, "learning_rate": 0.00027128299767311244, "loss": 0.0653, "theoretical_loss": 3.4064752073071385, "tokens_seen": 2413690880 }, { "epoch": 0.46, "learning_rate": 0.000271242878921608, "loss": 0.0662, "theoretical_loss": 3.4064604724796395, "tokens_seen": 2413821952 }, { "epoch": 0.46, "learning_rate": 0.0002712027601701035, "loss": 0.0707, "theoretical_loss": 3.4064457386762457, "tokens_seen": 2413953024 }, { "epoch": 0.46, "learning_rate": 0.00027116264141859904, "loss": 0.0679, "theoretical_loss": 3.4064310058968306, "tokens_seen": 2414084096 }, { "epoch": 0.46, "learning_rate": 0.0002711225226670946, "loss": 0.0701, "theoretical_loss": 3.406416274141267, "tokens_seen": 2414215168 }, { "epoch": 0.46, "learning_rate": 0.0002710824039155901, "loss": 0.0667, "theoretical_loss": 3.406401543409429, "tokens_seen": 2414346240 }, { "epoch": 0.46, "learning_rate": 0.0002710422851640857, "loss": 0.069, "theoretical_loss": 3.4063868137011886, "tokens_seen": 2414477312 }, { "epoch": 0.46, "learning_rate": 0.00027100216641258126, "loss": 0.0708, "theoretical_loss": 3.4063720850164203, "tokens_seen": 2414608384 }, { "epoch": 0.46, "learning_rate": 0.00027096204766107677, "loss": 0.0723, "theoretical_loss": 3.406357357354997, "tokens_seen": 2414739456 }, { "epoch": 0.46, "learning_rate": 0.00027092192890957234, "loss": 0.0689, "theoretical_loss": 3.406342630716792, "tokens_seen": 2414870528 }, { "epoch": 0.46, "learning_rate": 0.0002708818101580679, "loss": 0.0687, "theoretical_loss": 3.406327905101679, "tokens_seen": 2415001600 }, { "epoch": 0.46, "learning_rate": 0.0002708416914065635, "loss": 0.0689, "theoretical_loss": 3.406313180509531, "tokens_seen": 2415132672 }, { "epoch": 0.46, "learning_rate": 0.000270801572655059, "loss": 0.0639, "theoretical_loss": 3.4062984569402213, "tokens_seen": 2415263744 }, { "epoch": 0.46, "learning_rate": 0.0002707614539035545, "loss": 0.071, "theoretical_loss": 3.406283734393624, "tokens_seen": 2415394816 }, { "epoch": 0.46, "learning_rate": 0.0002707213351520501, "loss": 0.0694, "theoretical_loss": 3.406269012869612, "tokens_seen": 2415525888 }, { "epoch": 0.46, "learning_rate": 0.0002706812164005456, "loss": 0.0705, "theoretical_loss": 3.4062542923680588, "tokens_seen": 2415656960 }, { "epoch": 0.46, "learning_rate": 0.00027064109764904116, "loss": 0.0707, "theoretical_loss": 3.4062395728888384, "tokens_seen": 2415788032 }, { "epoch": 0.46, "learning_rate": 0.0002706009788975367, "loss": 0.0689, "theoretical_loss": 3.406224854431824, "tokens_seen": 2415919104 }, { "epoch": 0.46, "learning_rate": 0.00027056086014603224, "loss": 0.0693, "theoretical_loss": 3.4062101369968887, "tokens_seen": 2416050176 }, { "epoch": 0.46, "learning_rate": 0.0002705207413945278, "loss": 0.066, "theoretical_loss": 3.406195420583907, "tokens_seen": 2416181248 }, { "epoch": 0.46, "learning_rate": 0.0002704806226430234, "loss": 0.0698, "theoretical_loss": 3.406180705192752, "tokens_seen": 2416312320 }, { "epoch": 0.46, "learning_rate": 0.00027044050389151894, "loss": 0.0668, "theoretical_loss": 3.4061659908232973, "tokens_seen": 2416443392 }, { "epoch": 0.46, "learning_rate": 0.00027040038514001446, "loss": 0.0724, "theoretical_loss": 3.406151277475417, "tokens_seen": 2416574464 }, { "epoch": 0.46, "objective/train/advantage_avg": -0.0007252574432641268, "objective/train/docs_used": 880009, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.1596753597259521, "objective/train/original_loss": 1.1596752405166626, "objective/train/theoretical_loss": 3.4061365651489837, "objective/train/tokens_used": 787230176, "objective/train/value_avg": -0.01044464111328125, "objective/train/value_loss": 0.00031934547587297857, "objective/train/value_max": -5.1856040954589844e-05, "objective/train/value_min": -0.63916015625, "objective/train/value_reward_corr": 0.8140036041431318, "objective/train/value_std": 0.0236053466796875, "objective/train/weight_avg": 0.999426543712616, "objective/train/weighted_lm_loss": 1.1590797901153564, "objective/train/weights_max": 1.6738673448562622, "objective/train/weights_min": 0.36930587887763977, "theoretical_loss": 3.4061365651489837, "tokens_seen": 2416705536 }, { "epoch": 0.46, "learning_rate": 0.00027036026638850997, "loss": 0.0655, "theoretical_loss": 3.4061365651489837, "tokens_seen": 2416705536 }, { "epoch": 0.46, "learning_rate": 0.00027032014763700554, "loss": 0.0713, "theoretical_loss": 3.4061218538438722, "tokens_seen": 2416836608 }, { "epoch": 0.46, "learning_rate": 0.00027028002888550105, "loss": 0.0663, "theoretical_loss": 3.406107143559956, "tokens_seen": 2416967680 }, { "epoch": 0.46, "learning_rate": 0.0002702399101339966, "loss": 0.0723, "theoretical_loss": 3.4060924342971086, "tokens_seen": 2417098752 }, { "epoch": 0.47, "learning_rate": 0.0002701997913824922, "loss": 0.0672, "theoretical_loss": 3.4060777260552038, "tokens_seen": 2417229824 }, { "epoch": 0.47, "learning_rate": 0.0002701596726309877, "loss": 0.0625, "theoretical_loss": 3.406063018834115, "tokens_seen": 2417360896 }, { "epoch": 0.47, "learning_rate": 0.0002701195538794833, "loss": 0.068, "theoretical_loss": 3.4060483126337173, "tokens_seen": 2417491968 }, { "epoch": 0.47, "learning_rate": 0.00027007943512797884, "loss": 0.0712, "theoretical_loss": 3.406033607453883, "tokens_seen": 2417623040 }, { "epoch": 0.47, "learning_rate": 0.0002700393163764744, "loss": 0.0687, "theoretical_loss": 3.4060189032944868, "tokens_seen": 2417754112 }, { "epoch": 0.47, "learning_rate": 0.0002699991976249699, "loss": 0.0694, "theoretical_loss": 3.4060042001554023, "tokens_seen": 2417885184 }, { "epoch": 0.47, "learning_rate": 0.00026995907887346544, "loss": 0.0654, "theoretical_loss": 3.4059894980365035, "tokens_seen": 2418016256 }, { "epoch": 0.47, "learning_rate": 0.000269918960121961, "loss": 0.0684, "theoretical_loss": 3.4059747969376644, "tokens_seen": 2418147328 }, { "epoch": 0.47, "learning_rate": 0.0002698788413704565, "loss": 0.0653, "theoretical_loss": 3.4059600968587587, "tokens_seen": 2418278400 }, { "epoch": 0.47, "learning_rate": 0.0002698387226189521, "loss": 0.068, "theoretical_loss": 3.4059453977996603, "tokens_seen": 2418409472 }, { "epoch": 0.47, "learning_rate": 0.00026979860386744766, "loss": 0.0646, "theoretical_loss": 3.4059306997602437, "tokens_seen": 2418540544 }, { "epoch": 0.47, "learning_rate": 0.00026975848511594317, "loss": 0.0693, "theoretical_loss": 3.4059160027403825, "tokens_seen": 2418671616 }, { "epoch": 0.47, "learning_rate": 0.00026971836636443874, "loss": 0.0728, "theoretical_loss": 3.405901306739951, "tokens_seen": 2418802688 }, { "epoch": 0.47, "learning_rate": 0.0002696782476129343, "loss": 0.0679, "theoretical_loss": 3.405886611758823, "tokens_seen": 2418933760 }, { "epoch": 0.47, "learning_rate": 0.0002696381288614299, "loss": 0.0708, "theoretical_loss": 3.405871917796872, "tokens_seen": 2419064832 }, { "epoch": 0.47, "learning_rate": 0.0002695980101099254, "loss": 0.0697, "theoretical_loss": 3.405857224853974, "tokens_seen": 2419195904 }, { "epoch": 0.47, "learning_rate": 0.00026955789135842096, "loss": 0.0656, "theoretical_loss": 3.405842532930001, "tokens_seen": 2419326976 }, { "epoch": 0.47, "learning_rate": 0.0002695177726069165, "loss": 0.0674, "theoretical_loss": 3.405827842024828, "tokens_seen": 2419458048 }, { "epoch": 0.47, "learning_rate": 0.000269477653855412, "loss": 0.0693, "theoretical_loss": 3.4058131521383292, "tokens_seen": 2419589120 }, { "epoch": 0.47, "learning_rate": 0.00026943753510390756, "loss": 0.068, "theoretical_loss": 3.4057984632703793, "tokens_seen": 2419720192 }, { "epoch": 0.47, "learning_rate": 0.0002693974163524031, "loss": 0.0664, "theoretical_loss": 3.4057837754208515, "tokens_seen": 2419851264 }, { "epoch": 0.47, "objective/train/advantage_avg": 0.0007652671192772686, "objective/train/docs_used": 881081, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3519432544708252, "objective/train/original_loss": 1.3519431352615356, "objective/train/theoretical_loss": 3.4057690885896204, "objective/train/tokens_used": 790506976, "objective/train/value_avg": -0.005924224853515625, "objective/train/value_loss": 0.00015909731155261397, "objective/train/value_max": -7.31348991394043e-05, "objective/train/value_min": -0.2095947265625, "objective/train/value_reward_corr": 0.5959482239078341, "objective/train/value_std": 0.01018524169921875, "objective/train/weight_avg": 1.0008348226547241, "objective/train/weighted_lm_loss": 1.3525999784469604, "objective/train/weights_max": 1.172405481338501, "objective/train/weights_min": 0.3693559169769287, "theoretical_loss": 3.4057690885896204, "tokens_seen": 2419982336 }, { "epoch": 0.47, "learning_rate": 0.00026935729760089864, "loss": 0.0674, "theoretical_loss": 3.4057690885896204, "tokens_seen": 2419982336 }, { "epoch": 0.47, "learning_rate": 0.0002693171788493942, "loss": 0.066, "theoretical_loss": 3.4057544027765605, "tokens_seen": 2420113408 }, { "epoch": 0.47, "learning_rate": 0.0002692770600978898, "loss": 0.0673, "theoretical_loss": 3.4057397179815463, "tokens_seen": 2420244480 }, { "epoch": 0.47, "learning_rate": 0.00026923694134638534, "loss": 0.0663, "theoretical_loss": 3.4057250342044516, "tokens_seen": 2420375552 }, { "epoch": 0.47, "learning_rate": 0.00026919682259488086, "loss": 0.0653, "theoretical_loss": 3.4057103514451503, "tokens_seen": 2420506624 }, { "epoch": 0.47, "learning_rate": 0.0002691567038433764, "loss": 0.0662, "theoretical_loss": 3.405695669703518, "tokens_seen": 2420637696 }, { "epoch": 0.47, "learning_rate": 0.00026911658509187194, "loss": 0.0658, "theoretical_loss": 3.4056809889794284, "tokens_seen": 2420768768 }, { "epoch": 0.47, "learning_rate": 0.00026907646634036746, "loss": 0.0677, "theoretical_loss": 3.405666309272755, "tokens_seen": 2420899840 }, { "epoch": 0.47, "learning_rate": 0.000269036347588863, "loss": 0.0649, "theoretical_loss": 3.4056516305833737, "tokens_seen": 2421030912 }, { "epoch": 0.47, "learning_rate": 0.0002689962288373586, "loss": 0.0666, "theoretical_loss": 3.4056369529111583, "tokens_seen": 2421161984 }, { "epoch": 0.47, "learning_rate": 0.0002689561100858541, "loss": 0.0662, "theoretical_loss": 3.4056222762559836, "tokens_seen": 2421293056 }, { "epoch": 0.47, "learning_rate": 0.0002689159913343497, "loss": 0.0688, "theoretical_loss": 3.4056076006177234, "tokens_seen": 2421424128 }, { "epoch": 0.47, "learning_rate": 0.00026887587258284524, "loss": 0.0663, "theoretical_loss": 3.4055929259962525, "tokens_seen": 2421555200 }, { "epoch": 0.47, "learning_rate": 0.0002688357538313408, "loss": 0.0689, "theoretical_loss": 3.4055782523914453, "tokens_seen": 2421686272 }, { "epoch": 0.47, "learning_rate": 0.0002687956350798363, "loss": 0.0694, "theoretical_loss": 3.4055635798031765, "tokens_seen": 2421817344 }, { "epoch": 0.47, "learning_rate": 0.0002687555163283319, "loss": 0.0665, "theoretical_loss": 3.405548908231321, "tokens_seen": 2421948416 }, { "epoch": 0.47, "learning_rate": 0.0002687153975768274, "loss": 0.0666, "theoretical_loss": 3.4055342376757527, "tokens_seen": 2422079488 }, { "epoch": 0.47, "learning_rate": 0.0002686752788253229, "loss": 0.0653, "theoretical_loss": 3.405519568136347, "tokens_seen": 2422210560 }, { "epoch": 0.47, "learning_rate": 0.0002686351600738185, "loss": 0.0674, "theoretical_loss": 3.4055048996129775, "tokens_seen": 2422341632 }, { "epoch": 0.47, "learning_rate": 0.00026859504132231406, "loss": 0.0671, "theoretical_loss": 3.40549023210552, "tokens_seen": 2422472704 }, { "epoch": 0.47, "learning_rate": 0.00026855492257080963, "loss": 0.069, "theoretical_loss": 3.4054755656138487, "tokens_seen": 2422603776 }, { "epoch": 0.47, "learning_rate": 0.00026851480381930514, "loss": 0.0666, "theoretical_loss": 3.4054609001378378, "tokens_seen": 2422734848 }, { "epoch": 0.47, "learning_rate": 0.0002684746850678007, "loss": 0.0696, "theoretical_loss": 3.4054462356773625, "tokens_seen": 2422865920 }, { "epoch": 0.47, "learning_rate": 0.0002684345663162963, "loss": 0.0755, "theoretical_loss": 3.4054315722322976, "tokens_seen": 2422996992 }, { "epoch": 0.47, "learning_rate": 0.0002683944475647918, "loss": 0.0658, "theoretical_loss": 3.405416909802518, "tokens_seen": 2423128064 }, { "epoch": 0.47, "objective/train/advantage_avg": 0.0004935134202241898, "objective/train/docs_used": 882232, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2994698286056519, "objective/train/original_loss": 1.2994698286056519, "objective/train/theoretical_loss": 3.4054022483878983, "objective/train/tokens_used": 793783776, "objective/train/value_avg": -0.0054168701171875, "objective/train/value_loss": 0.00014271789405029267, "objective/train/value_max": -4.07099723815918e-05, "objective/train/value_min": -0.30810546875, "objective/train/value_reward_corr": 0.7198844407197817, "objective/train/value_std": 0.0126190185546875, "objective/train/weight_avg": 1.0005581378936768, "objective/train/weighted_lm_loss": 1.2998682260513306, "objective/train/weights_max": 1.255659818649292, "objective/train/weights_min": 0.3994268476963043, "theoretical_loss": 3.4054022483878983, "tokens_seen": 2423259136 }, { "epoch": 0.47, "learning_rate": 0.00026835432881328736, "loss": 0.0665, "theoretical_loss": 3.4054022483878983, "tokens_seen": 2423259136 }, { "epoch": 0.47, "learning_rate": 0.0002683142100617829, "loss": 0.0642, "theoretical_loss": 3.405387587988313, "tokens_seen": 2423390208 }, { "epoch": 0.47, "learning_rate": 0.0002682740913102784, "loss": 0.0685, "theoretical_loss": 3.4053729286036374, "tokens_seen": 2423521280 }, { "epoch": 0.47, "learning_rate": 0.00026823397255877396, "loss": 0.0663, "theoretical_loss": 3.405358270233746, "tokens_seen": 2423652352 }, { "epoch": 0.47, "learning_rate": 0.0002681938538072695, "loss": 0.0644, "theoretical_loss": 3.405343612878514, "tokens_seen": 2423783424 }, { "epoch": 0.47, "learning_rate": 0.0002681537350557651, "loss": 0.0679, "theoretical_loss": 3.405328956537816, "tokens_seen": 2423914496 }, { "epoch": 0.47, "learning_rate": 0.0002681136163042606, "loss": 0.0716, "theoretical_loss": 3.405314301211527, "tokens_seen": 2424045568 }, { "epoch": 0.47, "learning_rate": 0.0002680734975527562, "loss": 0.0671, "theoretical_loss": 3.4052996468995222, "tokens_seen": 2424176640 }, { "epoch": 0.47, "learning_rate": 0.00026803337880125175, "loss": 0.0681, "theoretical_loss": 3.4052849936016765, "tokens_seen": 2424307712 }, { "epoch": 0.47, "learning_rate": 0.00026799326004974726, "loss": 0.066, "theoretical_loss": 3.4052703413178644, "tokens_seen": 2424438784 }, { "epoch": 0.47, "learning_rate": 0.00026795314129824283, "loss": 0.0669, "theoretical_loss": 3.4052556900479614, "tokens_seen": 2424569856 }, { "epoch": 0.47, "learning_rate": 0.00026791302254673834, "loss": 0.0679, "theoretical_loss": 3.405241039791843, "tokens_seen": 2424700928 }, { "epoch": 0.47, "learning_rate": 0.00026787290379523386, "loss": 0.068, "theoretical_loss": 3.405226390549383, "tokens_seen": 2424832000 }, { "epoch": 0.47, "learning_rate": 0.0002678327850437294, "loss": 0.0682, "theoretical_loss": 3.405211742320457, "tokens_seen": 2424963072 }, { "epoch": 0.47, "learning_rate": 0.000267792666292225, "loss": 0.0669, "theoretical_loss": 3.4051970951049406, "tokens_seen": 2425094144 }, { "epoch": 0.47, "learning_rate": 0.00026775254754072056, "loss": 0.0642, "theoretical_loss": 3.4051824489027087, "tokens_seen": 2425225216 }, { "epoch": 0.47, "learning_rate": 0.0002677124287892161, "loss": 0.0669, "theoretical_loss": 3.4051678037136357, "tokens_seen": 2425356288 }, { "epoch": 0.47, "learning_rate": 0.00026767231003771164, "loss": 0.0657, "theoretical_loss": 3.405153159537598, "tokens_seen": 2425487360 }, { "epoch": 0.47, "learning_rate": 0.0002676321912862072, "loss": 0.0665, "theoretical_loss": 3.40513851637447, "tokens_seen": 2425618432 }, { "epoch": 0.47, "learning_rate": 0.0002675920725347027, "loss": 0.0706, "theoretical_loss": 3.405123874224127, "tokens_seen": 2425749504 }, { "epoch": 0.47, "learning_rate": 0.0002675519537831983, "loss": 0.0697, "theoretical_loss": 3.4051092330864443, "tokens_seen": 2425880576 }, { "epoch": 0.47, "learning_rate": 0.0002675118350316938, "loss": 0.0709, "theoretical_loss": 3.405094592961297, "tokens_seen": 2426011648 }, { "epoch": 0.47, "learning_rate": 0.0002674717162801893, "loss": 0.0687, "theoretical_loss": 3.405079953848561, "tokens_seen": 2426142720 }, { "epoch": 0.47, "learning_rate": 0.0002674315975286849, "loss": 0.0733, "theoretical_loss": 3.4050653157481108, "tokens_seen": 2426273792 }, { "epoch": 0.47, "learning_rate": 0.00026739147877718046, "loss": 0.0698, "theoretical_loss": 3.4050506786598222, "tokens_seen": 2426404864 }, { "epoch": 0.47, "objective/train/advantage_avg": -0.001013284781947732, "objective/train/docs_used": 883303, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3044350147247314, "objective/train/original_loss": 1.304434895515442, "objective/train/theoretical_loss": 3.4050360425835704, "objective/train/tokens_used": 797060576, "objective/train/value_avg": -0.0062103271484375, "objective/train/value_loss": 0.00019765348406508565, "objective/train/value_max": -5.692243576049805e-05, "objective/train/value_min": -0.378662109375, "objective/train/value_reward_corr": 0.8034383182922589, "objective/train/value_std": 0.01419830322265625, "objective/train/weight_avg": 0.9990766644477844, "objective/train/weighted_lm_loss": 1.3036575317382812, "objective/train/weights_max": 1.1283615827560425, "objective/train/weights_min": 0.36817532777786255, "theoretical_loss": 3.4050360425835704, "tokens_seen": 2426535936 }, { "epoch": 0.47, "learning_rate": 0.00026735136002567603, "loss": 0.0669, "theoretical_loss": 3.4050360425835704, "tokens_seen": 2426535936 }, { "epoch": 0.47, "learning_rate": 0.00026731124127417154, "loss": 0.0696, "theoretical_loss": 3.405021407519231, "tokens_seen": 2426667008 }, { "epoch": 0.47, "learning_rate": 0.0002672711225226671, "loss": 0.0719, "theoretical_loss": 3.4050067734666785, "tokens_seen": 2426798080 }, { "epoch": 0.47, "learning_rate": 0.0002672310037711627, "loss": 0.0687, "theoretical_loss": 3.4049921404257897, "tokens_seen": 2426929152 }, { "epoch": 0.47, "learning_rate": 0.0002671908850196582, "loss": 0.0673, "theoretical_loss": 3.404977508396439, "tokens_seen": 2427060224 }, { "epoch": 0.47, "learning_rate": 0.00026715076626815376, "loss": 0.0679, "theoretical_loss": 3.4049628773785017, "tokens_seen": 2427191296 }, { "epoch": 0.47, "learning_rate": 0.0002671106475166493, "loss": 0.0675, "theoretical_loss": 3.404948247371854, "tokens_seen": 2427322368 }, { "epoch": 0.47, "learning_rate": 0.0002670705287651448, "loss": 0.0678, "theoretical_loss": 3.4049336183763717, "tokens_seen": 2427453440 }, { "epoch": 0.47, "learning_rate": 0.00026703041001364036, "loss": 0.066, "theoretical_loss": 3.404918990391929, "tokens_seen": 2427584512 }, { "epoch": 0.47, "learning_rate": 0.00026699029126213593, "loss": 0.0686, "theoretical_loss": 3.4049043634184026, "tokens_seen": 2427715584 }, { "epoch": 0.47, "learning_rate": 0.0002669501725106315, "loss": 0.0716, "theoretical_loss": 3.4048897374556675, "tokens_seen": 2427846656 }, { "epoch": 0.47, "learning_rate": 0.000266910053759127, "loss": 0.0655, "theoretical_loss": 3.4048751125035994, "tokens_seen": 2427977728 }, { "epoch": 0.47, "learning_rate": 0.0002668699350076226, "loss": 0.0692, "theoretical_loss": 3.404860488562074, "tokens_seen": 2428108800 }, { "epoch": 0.47, "learning_rate": 0.00026682981625611815, "loss": 0.0636, "theoretical_loss": 3.404845865630967, "tokens_seen": 2428239872 }, { "epoch": 0.47, "learning_rate": 0.00026678969750461366, "loss": 0.0645, "theoretical_loss": 3.404831243710154, "tokens_seen": 2428370944 }, { "epoch": 0.47, "learning_rate": 0.00026674957875310923, "loss": 0.0642, "theoretical_loss": 3.40481662279951, "tokens_seen": 2428502016 }, { "epoch": 0.47, "learning_rate": 0.00026670946000160474, "loss": 0.0637, "theoretical_loss": 3.404802002898912, "tokens_seen": 2428633088 }, { "epoch": 0.47, "learning_rate": 0.00026666934125010026, "loss": 0.0713, "theoretical_loss": 3.4047873840082348, "tokens_seen": 2428764160 }, { "epoch": 0.47, "learning_rate": 0.0002666292224985958, "loss": 0.0672, "theoretical_loss": 3.404772766127354, "tokens_seen": 2428895232 }, { "epoch": 0.47, "learning_rate": 0.0002665891037470914, "loss": 0.0628, "theoretical_loss": 3.404758149256146, "tokens_seen": 2429026304 }, { "epoch": 0.47, "learning_rate": 0.00026654898499558696, "loss": 0.0676, "theoretical_loss": 3.404743533394486, "tokens_seen": 2429157376 }, { "epoch": 0.47, "learning_rate": 0.0002665088662440825, "loss": 0.0674, "theoretical_loss": 3.4047289185422507, "tokens_seen": 2429288448 }, { "epoch": 0.47, "learning_rate": 0.00026646874749257805, "loss": 0.0661, "theoretical_loss": 3.404714304699315, "tokens_seen": 2429419520 }, { "epoch": 0.47, "learning_rate": 0.0002664286287410736, "loss": 0.0671, "theoretical_loss": 3.404699691865555, "tokens_seen": 2429550592 }, { "epoch": 0.47, "learning_rate": 0.00026638850998956913, "loss": 0.0663, "theoretical_loss": 3.4046850800408466, "tokens_seen": 2429681664 }, { "epoch": 0.47, "objective/train/advantage_avg": 0.0006875027902424335, "objective/train/docs_used": 884501, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.373802661895752, "objective/train/original_loss": 1.3738027811050415, "objective/train/theoretical_loss": 3.4046704692250658, "objective/train/tokens_used": 800337376, "objective/train/value_avg": -0.00478363037109375, "objective/train/value_loss": 0.00014554620429407805, "objective/train/value_max": -6.300210952758789e-05, "objective/train/value_min": -0.2310791015625, "objective/train/value_reward_corr": 0.5020030640327655, "objective/train/value_std": 0.00736236572265625, "objective/train/weight_avg": 1.00075364112854, "objective/train/weighted_lm_loss": 1.3750274181365967, "objective/train/weights_max": 1.0757941007614136, "objective/train/weights_min": 0.3692037761211395, "theoretical_loss": 3.4046704692250658, "tokens_seen": 2429812736 }, { "epoch": 0.47, "learning_rate": 0.0002663483912380647, "loss": 0.0659, "theoretical_loss": 3.4046704692250658, "tokens_seen": 2429812736 }, { "epoch": 0.47, "learning_rate": 0.0002663082724865602, "loss": 0.0663, "theoretical_loss": 3.4046558594180887, "tokens_seen": 2429943808 }, { "epoch": 0.47, "learning_rate": 0.0002662681537350557, "loss": 0.0644, "theoretical_loss": 3.404641250619791, "tokens_seen": 2430074880 }, { "epoch": 0.47, "learning_rate": 0.0002662280349835513, "loss": 0.0649, "theoretical_loss": 3.4046266428300482, "tokens_seen": 2430205952 }, { "epoch": 0.47, "learning_rate": 0.00026618791623204686, "loss": 0.0662, "theoretical_loss": 3.404612036048737, "tokens_seen": 2430337024 }, { "epoch": 0.47, "learning_rate": 0.00026614779748054243, "loss": 0.0684, "theoretical_loss": 3.404597430275733, "tokens_seen": 2430468096 }, { "epoch": 0.47, "learning_rate": 0.00026610767872903794, "loss": 0.0703, "theoretical_loss": 3.404582825510913, "tokens_seen": 2430599168 }, { "epoch": 0.47, "learning_rate": 0.0002660675599775335, "loss": 0.0664, "theoretical_loss": 3.4045682217541517, "tokens_seen": 2430730240 }, { "epoch": 0.47, "learning_rate": 0.0002660274412260291, "loss": 0.0667, "theoretical_loss": 3.404553619005326, "tokens_seen": 2430861312 }, { "epoch": 0.47, "learning_rate": 0.0002659873224745246, "loss": 0.0668, "theoretical_loss": 3.4045390172643124, "tokens_seen": 2430992384 }, { "epoch": 0.47, "learning_rate": 0.00026594720372302016, "loss": 0.0672, "theoretical_loss": 3.404524416530986, "tokens_seen": 2431123456 }, { "epoch": 0.47, "learning_rate": 0.0002659070849715157, "loss": 0.0683, "theoretical_loss": 3.4045098168052235, "tokens_seen": 2431254528 }, { "epoch": 0.47, "learning_rate": 0.00026586696622001125, "loss": 0.0703, "theoretical_loss": 3.4044952180869013, "tokens_seen": 2431385600 }, { "epoch": 0.47, "learning_rate": 0.00026582684746850676, "loss": 0.0668, "theoretical_loss": 3.404480620375895, "tokens_seen": 2431516672 }, { "epoch": 0.47, "learning_rate": 0.00026578672871700233, "loss": 0.0662, "theoretical_loss": 3.4044660236720805, "tokens_seen": 2431647744 }, { "epoch": 0.47, "learning_rate": 0.0002657466099654979, "loss": 0.0704, "theoretical_loss": 3.4044514279753355, "tokens_seen": 2431778816 }, { "epoch": 0.47, "learning_rate": 0.0002657064912139934, "loss": 0.065, "theoretical_loss": 3.404436833285535, "tokens_seen": 2431909888 }, { "epoch": 0.47, "learning_rate": 0.000265666372462489, "loss": 0.0661, "theoretical_loss": 3.4044222396025554, "tokens_seen": 2432040960 }, { "epoch": 0.47, "learning_rate": 0.00026562625371098455, "loss": 0.0669, "theoretical_loss": 3.4044076469262734, "tokens_seen": 2432172032 }, { "epoch": 0.47, "learning_rate": 0.00026558613495948006, "loss": 0.0703, "theoretical_loss": 3.404393055256565, "tokens_seen": 2432303104 }, { "epoch": 0.47, "learning_rate": 0.00026554601620797563, "loss": 0.0647, "theoretical_loss": 3.4043784645933064, "tokens_seen": 2432434176 }, { "epoch": 0.47, "learning_rate": 0.00026550589745647114, "loss": 0.0665, "theoretical_loss": 3.4043638749363745, "tokens_seen": 2432565248 }, { "epoch": 0.47, "learning_rate": 0.0002654657787049667, "loss": 0.0689, "theoretical_loss": 3.4043492862856457, "tokens_seen": 2432696320 }, { "epoch": 0.47, "learning_rate": 0.00026542565995346223, "loss": 0.0667, "theoretical_loss": 3.4043346986409952, "tokens_seen": 2432827392 }, { "epoch": 0.47, "learning_rate": 0.0002653855412019578, "loss": 0.0675, "theoretical_loss": 3.404320112002301, "tokens_seen": 2432958464 }, { "epoch": 0.47, "objective/train/advantage_avg": 0.000539600383490324, "objective/train/docs_used": 885704, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3602089881896973, "objective/train/original_loss": 1.3602091073989868, "objective/train/theoretical_loss": 3.4043055263694386, "objective/train/tokens_used": 803614176, "objective/train/value_avg": -0.005519866943359375, "objective/train/value_loss": 7.893727160990238e-05, "objective/train/value_max": -3.069639205932617e-05, "objective/train/value_min": -0.406982421875, "objective/train/value_reward_corr": 0.7609828571633932, "objective/train/value_std": 0.011138916015625, "objective/train/weight_avg": 1.00057852268219, "objective/train/weighted_lm_loss": 1.3605904579162598, "objective/train/weights_max": 1.2112469673156738, "objective/train/weights_min": 0.702682375907898, "theoretical_loss": 3.4043055263694386, "tokens_seen": 2433089536 }, { "epoch": 0.47, "learning_rate": 0.00026534542245045336, "loss": 0.0696, "theoretical_loss": 3.4043055263694386, "tokens_seen": 2433089536 }, { "epoch": 0.47, "learning_rate": 0.0002653053036989489, "loss": 0.0669, "theoretical_loss": 3.4042909417422846, "tokens_seen": 2433220608 }, { "epoch": 0.47, "learning_rate": 0.00026526518494744445, "loss": 0.0648, "theoretical_loss": 3.4042763581207156, "tokens_seen": 2433351680 }, { "epoch": 0.47, "learning_rate": 0.00026522506619594, "loss": 0.0685, "theoretical_loss": 3.404261775504608, "tokens_seen": 2433482752 }, { "epoch": 0.47, "learning_rate": 0.00026518494744443553, "loss": 0.0694, "theoretical_loss": 3.404247193893838, "tokens_seen": 2433613824 }, { "epoch": 0.48, "learning_rate": 0.0002651448286929311, "loss": 0.0651, "theoretical_loss": 3.404232613288283, "tokens_seen": 2433744896 }, { "epoch": 0.48, "learning_rate": 0.0002651047099414266, "loss": 0.0691, "theoretical_loss": 3.4042180336878194, "tokens_seen": 2433875968 }, { "epoch": 0.48, "learning_rate": 0.0002650645911899222, "loss": 0.069, "theoretical_loss": 3.404203455092323, "tokens_seen": 2434007040 }, { "epoch": 0.48, "learning_rate": 0.0002650244724384177, "loss": 0.0705, "theoretical_loss": 3.404188877501671, "tokens_seen": 2434138112 }, { "epoch": 0.48, "learning_rate": 0.00026498435368691326, "loss": 0.0672, "theoretical_loss": 3.40417430091574, "tokens_seen": 2434269184 }, { "epoch": 0.48, "learning_rate": 0.00026494423493540883, "loss": 0.0673, "theoretical_loss": 3.4041597253344067, "tokens_seen": 2434400256 }, { "epoch": 0.48, "learning_rate": 0.00026490411618390435, "loss": 0.069, "theoretical_loss": 3.404145150757548, "tokens_seen": 2434531328 }, { "epoch": 0.48, "learning_rate": 0.0002648639974323999, "loss": 0.0695, "theoretical_loss": 3.40413057718504, "tokens_seen": 2434662400 }, { "epoch": 0.48, "learning_rate": 0.0002648238786808955, "loss": 0.0675, "theoretical_loss": 3.4041160046167596, "tokens_seen": 2434793472 }, { "epoch": 0.48, "learning_rate": 0.000264783759929391, "loss": 0.0685, "theoretical_loss": 3.404101433052584, "tokens_seen": 2434924544 }, { "epoch": 0.48, "learning_rate": 0.00026474364117788656, "loss": 0.0668, "theoretical_loss": 3.4040868624923895, "tokens_seen": 2435055616 }, { "epoch": 0.48, "learning_rate": 0.0002647035224263821, "loss": 0.0669, "theoretical_loss": 3.404072292936053, "tokens_seen": 2435186688 }, { "epoch": 0.48, "learning_rate": 0.00026466340367487765, "loss": 0.0711, "theoretical_loss": 3.4040577243834513, "tokens_seen": 2435317760 }, { "epoch": 0.48, "learning_rate": 0.00026462328492337316, "loss": 0.0641, "theoretical_loss": 3.404043156834461, "tokens_seen": 2435448832 }, { "epoch": 0.48, "learning_rate": 0.00026458316617186873, "loss": 0.0655, "theoretical_loss": 3.4040285902889598, "tokens_seen": 2435579904 }, { "epoch": 0.48, "learning_rate": 0.0002645430474203643, "loss": 0.0652, "theoretical_loss": 3.404014024746824, "tokens_seen": 2435710976 }, { "epoch": 0.48, "learning_rate": 0.0002645029286688598, "loss": 0.0671, "theoretical_loss": 3.40399946020793, "tokens_seen": 2435842048 }, { "epoch": 0.48, "learning_rate": 0.0002644628099173554, "loss": 0.067, "theoretical_loss": 3.4039848966721555, "tokens_seen": 2435973120 }, { "epoch": 0.48, "learning_rate": 0.00026442269116585095, "loss": 0.0686, "theoretical_loss": 3.403970334139377, "tokens_seen": 2436104192 }, { "epoch": 0.48, "learning_rate": 0.00026438257241434646, "loss": 0.0667, "theoretical_loss": 3.4039557726094722, "tokens_seen": 2436235264 }, { "epoch": 0.48, "objective/train/advantage_avg": 0.00039040885167196393, "objective/train/docs_used": 886917, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4178615808486938, "objective/train/original_loss": 1.4178617000579834, "objective/train/theoretical_loss": 3.403941212082317, "objective/train/tokens_used": 806890976, "objective/train/value_avg": -0.0110626220703125, "objective/train/value_loss": 0.0006643609958700836, "objective/train/value_max": -6.401538848876953e-05, "objective/train/value_min": -0.83447265625, "objective/train/value_reward_corr": 0.6628359427241322, "objective/train/value_std": 0.02264404296875, "objective/train/weight_avg": 1.0006301403045654, "objective/train/weighted_lm_loss": 1.4175746440887451, "objective/train/weights_max": 1.3635050058364868, "objective/train/weights_min": 0.0535246878862381, "theoretical_loss": 3.403941212082317, "tokens_seen": 2436366336 }, { "epoch": 0.48, "learning_rate": 0.00026434245366284203, "loss": 0.0666, "theoretical_loss": 3.403941212082317, "tokens_seen": 2436366336 }, { "epoch": 0.48, "learning_rate": 0.00026430233491133755, "loss": 0.0701, "theoretical_loss": 3.4039266525577894, "tokens_seen": 2436497408 }, { "epoch": 0.48, "learning_rate": 0.0002642622161598331, "loss": 0.0665, "theoretical_loss": 3.4039120940357654, "tokens_seen": 2436628480 }, { "epoch": 0.48, "learning_rate": 0.00026422209740832863, "loss": 0.0675, "theoretical_loss": 3.403897536516123, "tokens_seen": 2436759552 }, { "epoch": 0.48, "learning_rate": 0.0002641819786568242, "loss": 0.0642, "theoretical_loss": 3.4038829799987385, "tokens_seen": 2436890624 }, { "epoch": 0.48, "learning_rate": 0.00026414185990531977, "loss": 0.0671, "theoretical_loss": 3.40386842448349, "tokens_seen": 2437021696 }, { "epoch": 0.48, "learning_rate": 0.0002641017411538153, "loss": 0.0697, "theoretical_loss": 3.4038538699702534, "tokens_seen": 2437152768 }, { "epoch": 0.48, "learning_rate": 0.00026406162240231085, "loss": 0.0676, "theoretical_loss": 3.4038393164589067, "tokens_seen": 2437283840 }, { "epoch": 0.48, "learning_rate": 0.0002640215036508064, "loss": 0.0661, "theoretical_loss": 3.403824763949326, "tokens_seen": 2437414912 }, { "epoch": 0.48, "learning_rate": 0.000263981384899302, "loss": 0.0628, "theoretical_loss": 3.4038102124413907, "tokens_seen": 2437545984 }, { "epoch": 0.48, "learning_rate": 0.0002639412661477975, "loss": 0.0659, "theoretical_loss": 3.403795661934976, "tokens_seen": 2437677056 }, { "epoch": 0.48, "learning_rate": 0.000263901147396293, "loss": 0.0678, "theoretical_loss": 3.403781112429959, "tokens_seen": 2437808128 }, { "epoch": 0.48, "learning_rate": 0.0002638610286447886, "loss": 0.0656, "theoretical_loss": 3.4037665639262182, "tokens_seen": 2437939200 }, { "epoch": 0.48, "learning_rate": 0.0002638209098932841, "loss": 0.0703, "theoretical_loss": 3.4037520164236303, "tokens_seen": 2438070272 }, { "epoch": 0.48, "learning_rate": 0.00026378079114177966, "loss": 0.0644, "theoretical_loss": 3.403737469922073, "tokens_seen": 2438201344 }, { "epoch": 0.48, "learning_rate": 0.00026374067239027523, "loss": 0.0705, "theoretical_loss": 3.4037229244214227, "tokens_seen": 2438332416 }, { "epoch": 0.48, "learning_rate": 0.00026370055363877075, "loss": 0.0647, "theoretical_loss": 3.4037083799215573, "tokens_seen": 2438463488 }, { "epoch": 0.48, "learning_rate": 0.0002636604348872663, "loss": 0.0687, "theoretical_loss": 3.403693836422354, "tokens_seen": 2438594560 }, { "epoch": 0.48, "learning_rate": 0.0002636203161357619, "loss": 0.0667, "theoretical_loss": 3.40367929392369, "tokens_seen": 2438725632 }, { "epoch": 0.48, "learning_rate": 0.00026358019738425745, "loss": 0.0685, "theoretical_loss": 3.4036647524254438, "tokens_seen": 2438856704 }, { "epoch": 0.48, "learning_rate": 0.00026354007863275297, "loss": 0.0665, "theoretical_loss": 3.4036502119274914, "tokens_seen": 2438987776 }, { "epoch": 0.48, "learning_rate": 0.0002634999598812485, "loss": 0.0627, "theoretical_loss": 3.403635672429711, "tokens_seen": 2439118848 }, { "epoch": 0.48, "learning_rate": 0.00026345984112974405, "loss": 0.0666, "theoretical_loss": 3.4036211339319795, "tokens_seen": 2439249920 }, { "epoch": 0.48, "learning_rate": 0.00026341972237823956, "loss": 0.072, "theoretical_loss": 3.4036065964341753, "tokens_seen": 2439380992 }, { "epoch": 0.48, "learning_rate": 0.00026337960362673513, "loss": 0.0672, "theoretical_loss": 3.4035920599361753, "tokens_seen": 2439512064 }, { "epoch": 0.48, "objective/train/advantage_avg": 0.0010462348582223058, "objective/train/docs_used": 888050, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3570727109909058, "objective/train/original_loss": 1.3570725917816162, "objective/train/theoretical_loss": 3.403577524437857, "objective/train/tokens_used": 810167776, "objective/train/value_avg": -0.00774383544921875, "objective/train/value_loss": 0.00020661184680648148, "objective/train/value_max": -4.6133995056152344e-05, "objective/train/value_min": -0.3984375, "objective/train/value_reward_corr": 0.5888352271879904, "objective/train/value_std": 0.0119781494140625, "objective/train/weight_avg": 1.0011398792266846, "objective/train/weighted_lm_loss": 1.3582038879394531, "objective/train/weights_max": 1.2320702075958252, "objective/train/weights_min": 0.36942780017852783, "theoretical_loss": 3.403577524437857, "tokens_seen": 2439643136 }, { "epoch": 0.48, "learning_rate": 0.0002633394848752307, "loss": 0.0671, "theoretical_loss": 3.403577524437857, "tokens_seen": 2439643136 }, { "epoch": 0.48, "learning_rate": 0.0002632993661237262, "loss": 0.0663, "theoretical_loss": 3.403562989939098, "tokens_seen": 2439774208 }, { "epoch": 0.48, "learning_rate": 0.0002632592473722218, "loss": 0.0647, "theoretical_loss": 3.403548456439776, "tokens_seen": 2439905280 }, { "epoch": 0.48, "learning_rate": 0.00026321912862071735, "loss": 0.0697, "theoretical_loss": 3.4035339239397686, "tokens_seen": 2440036352 }, { "epoch": 0.48, "learning_rate": 0.0002631790098692129, "loss": 0.068, "theoretical_loss": 3.403519392438953, "tokens_seen": 2440167424 }, { "epoch": 0.48, "learning_rate": 0.00026313889111770843, "loss": 0.0662, "theoretical_loss": 3.4035048619372072, "tokens_seen": 2440298496 }, { "epoch": 0.48, "learning_rate": 0.00026309877236620395, "loss": 0.069, "theoretical_loss": 3.403490332434409, "tokens_seen": 2440429568 }, { "epoch": 0.48, "learning_rate": 0.0002630586536146995, "loss": 0.0729, "theoretical_loss": 3.403475803930436, "tokens_seen": 2440560640 }, { "epoch": 0.48, "learning_rate": 0.00026301853486319503, "loss": 0.066, "theoretical_loss": 3.4034612764251655, "tokens_seen": 2440691712 }, { "epoch": 0.48, "learning_rate": 0.0002629784161116906, "loss": 0.0713, "theoretical_loss": 3.4034467499184755, "tokens_seen": 2440822784 }, { "epoch": 0.48, "learning_rate": 0.00026293829736018617, "loss": 0.0644, "theoretical_loss": 3.4034322244102437, "tokens_seen": 2440953856 }, { "epoch": 0.48, "learning_rate": 0.0002628981786086817, "loss": 0.0649, "theoretical_loss": 3.4034176999003485, "tokens_seen": 2441084928 }, { "epoch": 0.48, "learning_rate": 0.00026285805985717725, "loss": 0.0652, "theoretical_loss": 3.4034031763886667, "tokens_seen": 2441216000 }, { "epoch": 0.48, "learning_rate": 0.0002628179411056728, "loss": 0.0644, "theoretical_loss": 3.4033886538750764, "tokens_seen": 2441347072 }, { "epoch": 0.48, "learning_rate": 0.0002627778223541684, "loss": 0.067, "theoretical_loss": 3.4033741323594557, "tokens_seen": 2441478144 }, { "epoch": 0.48, "learning_rate": 0.0002627377036026639, "loss": 0.0672, "theoretical_loss": 3.4033596118416822, "tokens_seen": 2441609216 }, { "epoch": 0.48, "learning_rate": 0.0002626975848511594, "loss": 0.066, "theoretical_loss": 3.4033450923216337, "tokens_seen": 2441740288 }, { "epoch": 0.48, "learning_rate": 0.000262657466099655, "loss": 0.0643, "theoretical_loss": 3.4033305737991886, "tokens_seen": 2441871360 }, { "epoch": 0.48, "learning_rate": 0.0002626173473481505, "loss": 0.0682, "theoretical_loss": 3.403316056274224, "tokens_seen": 2442002432 }, { "epoch": 0.48, "learning_rate": 0.00026257722859664606, "loss": 0.0696, "theoretical_loss": 3.403301539746619, "tokens_seen": 2442133504 }, { "epoch": 0.48, "learning_rate": 0.00026253710984514163, "loss": 0.0664, "theoretical_loss": 3.40328702421625, "tokens_seen": 2442264576 }, { "epoch": 0.48, "learning_rate": 0.00026249699109363715, "loss": 0.0722, "theoretical_loss": 3.4032725096829965, "tokens_seen": 2442395648 }, { "epoch": 0.48, "learning_rate": 0.0002624568723421327, "loss": 0.0658, "theoretical_loss": 3.4032579961467357, "tokens_seen": 2442526720 }, { "epoch": 0.48, "learning_rate": 0.0002624167535906283, "loss": 0.0656, "theoretical_loss": 3.4032434836073455, "tokens_seen": 2442657792 }, { "epoch": 0.48, "learning_rate": 0.00026237663483912385, "loss": 0.0716, "theoretical_loss": 3.4032289720647046, "tokens_seen": 2442788864 }, { "epoch": 0.48, "objective/train/advantage_avg": -0.00011127161997137591, "objective/train/docs_used": 889343, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.170166015625, "objective/train/original_loss": 1.170166015625, "objective/train/theoretical_loss": 3.40321446151869, "objective/train/tokens_used": 813444576, "objective/train/value_avg": -0.006397247314453125, "objective/train/value_loss": 0.00031674321508035064, "objective/train/value_max": -2.7298927307128906e-05, "objective/train/value_min": -0.7275390625, "objective/train/value_reward_corr": 0.6377202663466834, "objective/train/value_std": 0.01537322998046875, "objective/train/weight_avg": 1.000025749206543, "objective/train/weighted_lm_loss": 1.169310450553894, "objective/train/weights_max": 1.5240734815597534, "objective/train/weights_min": 0.38241738080978394, "theoretical_loss": 3.40321446151869, "tokens_seen": 2442919936 }, { "epoch": 0.48, "learning_rate": 0.00026233651608761937, "loss": 0.059, "theoretical_loss": 3.40321446151869, "tokens_seen": 2442919936 }, { "epoch": 0.48, "learning_rate": 0.0002622963973361149, "loss": 0.0676, "theoretical_loss": 3.403199951969181, "tokens_seen": 2443051008 }, { "epoch": 0.48, "learning_rate": 0.00026225627858461045, "loss": 0.0652, "theoretical_loss": 3.4031854434160556, "tokens_seen": 2443182080 }, { "epoch": 0.48, "learning_rate": 0.00026221615983310596, "loss": 0.0691, "theoretical_loss": 3.4031709358591904, "tokens_seen": 2443313152 }, { "epoch": 0.48, "learning_rate": 0.00026217604108160153, "loss": 0.0694, "theoretical_loss": 3.4031564292984653, "tokens_seen": 2443444224 }, { "epoch": 0.48, "learning_rate": 0.0002621359223300971, "loss": 0.0668, "theoretical_loss": 3.4031419237337577, "tokens_seen": 2443575296 }, { "epoch": 0.48, "learning_rate": 0.0002620958035785926, "loss": 0.0658, "theoretical_loss": 3.403127419164946, "tokens_seen": 2443706368 }, { "epoch": 0.48, "learning_rate": 0.0002620556848270882, "loss": 0.0685, "theoretical_loss": 3.403112915591908, "tokens_seen": 2443837440 }, { "epoch": 0.48, "learning_rate": 0.00026201556607558375, "loss": 0.0691, "theoretical_loss": 3.4030984130145225, "tokens_seen": 2443968512 }, { "epoch": 0.48, "learning_rate": 0.0002619754473240793, "loss": 0.0632, "theoretical_loss": 3.4030839114326676, "tokens_seen": 2444099584 }, { "epoch": 0.48, "learning_rate": 0.00026193532857257483, "loss": 0.0669, "theoretical_loss": 3.4030694108462214, "tokens_seen": 2444230656 }, { "epoch": 0.48, "learning_rate": 0.0002618952098210704, "loss": 0.0677, "theoretical_loss": 3.4030549112550625, "tokens_seen": 2444361728 }, { "epoch": 0.48, "learning_rate": 0.0002618550910695659, "loss": 0.0656, "theoretical_loss": 3.4030404126590685, "tokens_seen": 2444492800 }, { "epoch": 0.48, "learning_rate": 0.00026181497231806143, "loss": 0.0719, "theoretical_loss": 3.4030259150581186, "tokens_seen": 2444623872 }, { "epoch": 0.48, "learning_rate": 0.000261774853566557, "loss": 0.0676, "theoretical_loss": 3.403011418452091, "tokens_seen": 2444754944 }, { "epoch": 0.48, "learning_rate": 0.00026173473481505257, "loss": 0.0652, "theoretical_loss": 3.402996922840863, "tokens_seen": 2444886016 }, { "epoch": 0.48, "learning_rate": 0.0002616946160635481, "loss": 0.0662, "theoretical_loss": 3.402982428224315, "tokens_seen": 2445017088 }, { "epoch": 0.48, "learning_rate": 0.00026165449731204365, "loss": 0.0676, "theoretical_loss": 3.402967934602324, "tokens_seen": 2445148160 }, { "epoch": 0.48, "learning_rate": 0.0002616143785605392, "loss": 0.0666, "theoretical_loss": 3.4029534419747685, "tokens_seen": 2445279232 }, { "epoch": 0.48, "learning_rate": 0.0002615742598090348, "loss": 0.0679, "theoretical_loss": 3.4029389503415275, "tokens_seen": 2445410304 }, { "epoch": 0.48, "learning_rate": 0.0002615341410575303, "loss": 0.0691, "theoretical_loss": 3.4029244597024793, "tokens_seen": 2445541376 }, { "epoch": 0.48, "learning_rate": 0.00026149402230602587, "loss": 0.0691, "theoretical_loss": 3.402909970057502, "tokens_seen": 2445672448 }, { "epoch": 0.48, "learning_rate": 0.0002614539035545214, "loss": 0.07, "theoretical_loss": 3.402895481406475, "tokens_seen": 2445803520 }, { "epoch": 0.48, "learning_rate": 0.0002614137848030169, "loss": 0.0678, "theoretical_loss": 3.4028809937492763, "tokens_seen": 2445934592 }, { "epoch": 0.48, "learning_rate": 0.00026137366605151247, "loss": 0.071, "theoretical_loss": 3.4028665070857844, "tokens_seen": 2446065664 }, { "epoch": 0.48, "objective/train/advantage_avg": 0.0016338009154424071, "objective/train/docs_used": 890537, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3270853757858276, "objective/train/original_loss": 1.3270853757858276, "objective/train/theoretical_loss": 3.402852021415878, "objective/train/tokens_used": 816721376, "objective/train/value_avg": -0.00731658935546875, "objective/train/value_loss": 0.00015277751663234085, "objective/train/value_max": -8.940696716308594e-06, "objective/train/value_min": -0.269287109375, "objective/train/value_reward_corr": 0.6980230200439854, "objective/train/value_std": 0.0130615234375, "objective/train/weight_avg": 1.001705288887024, "objective/train/weighted_lm_loss": 1.3294957876205444, "objective/train/weights_max": 1.1142876148223877, "objective/train/weights_min": 0.36880120635032654, "theoretical_loss": 3.402852021415878, "tokens_seen": 2446196736 }, { "epoch": 0.48, "learning_rate": 0.00026133354730000803, "loss": 0.0685, "theoretical_loss": 3.402852021415878, "tokens_seen": 2446196736 }, { "epoch": 0.48, "learning_rate": 0.0002612934285485036, "loss": 0.0677, "theoretical_loss": 3.402837536739436, "tokens_seen": 2446327808 }, { "epoch": 0.48, "learning_rate": 0.0002612533097969991, "loss": 0.0669, "theoretical_loss": 3.4028230530563364, "tokens_seen": 2446458880 }, { "epoch": 0.48, "learning_rate": 0.0002612131910454947, "loss": 0.0683, "theoretical_loss": 3.4028085703664583, "tokens_seen": 2446589952 }, { "epoch": 0.48, "learning_rate": 0.00026117307229399025, "loss": 0.0675, "theoretical_loss": 3.4027940886696806, "tokens_seen": 2446721024 }, { "epoch": 0.48, "learning_rate": 0.00026113295354248577, "loss": 0.0685, "theoretical_loss": 3.402779607965882, "tokens_seen": 2446852096 }, { "epoch": 0.48, "learning_rate": 0.00026109283479098134, "loss": 0.0687, "theoretical_loss": 3.4027651282549405, "tokens_seen": 2446983168 }, { "epoch": 0.48, "learning_rate": 0.00026105271603947685, "loss": 0.0663, "theoretical_loss": 3.4027506495367357, "tokens_seen": 2447114240 }, { "epoch": 0.48, "learning_rate": 0.00026101259728797236, "loss": 0.0706, "theoretical_loss": 3.4027361718111457, "tokens_seen": 2447245312 }, { "epoch": 0.48, "learning_rate": 0.00026097247853646793, "loss": 0.0647, "theoretical_loss": 3.4027216950780494, "tokens_seen": 2447376384 }, { "epoch": 0.48, "learning_rate": 0.0002609323597849635, "loss": 0.0653, "theoretical_loss": 3.4027072193373264, "tokens_seen": 2447507456 }, { "epoch": 0.48, "learning_rate": 0.00026089224103345907, "loss": 0.0669, "theoretical_loss": 3.4026927445888546, "tokens_seen": 2447638528 }, { "epoch": 0.48, "learning_rate": 0.0002608521222819546, "loss": 0.0718, "theoretical_loss": 3.402678270832513, "tokens_seen": 2447769600 }, { "epoch": 0.48, "learning_rate": 0.00026081200353045015, "loss": 0.0641, "theoretical_loss": 3.4026637980681813, "tokens_seen": 2447900672 }, { "epoch": 0.48, "learning_rate": 0.0002607718847789457, "loss": 0.0682, "theoretical_loss": 3.4026493262957374, "tokens_seen": 2448031744 }, { "epoch": 0.48, "learning_rate": 0.00026073176602744123, "loss": 0.0673, "theoretical_loss": 3.4026348555150605, "tokens_seen": 2448162816 }, { "epoch": 0.48, "learning_rate": 0.0002606916472759368, "loss": 0.0705, "theoretical_loss": 3.40262038572603, "tokens_seen": 2448293888 }, { "epoch": 0.48, "learning_rate": 0.0002606515285244323, "loss": 0.0677, "theoretical_loss": 3.402605916928524, "tokens_seen": 2448424960 }, { "epoch": 0.48, "learning_rate": 0.00026061140977292783, "loss": 0.0656, "theoretical_loss": 3.4025914491224225, "tokens_seen": 2448556032 }, { "epoch": 0.48, "learning_rate": 0.0002605712910214234, "loss": 0.0654, "theoretical_loss": 3.4025769823076035, "tokens_seen": 2448687104 }, { "epoch": 0.48, "learning_rate": 0.00026053117226991897, "loss": 0.0646, "theoretical_loss": 3.402562516483947, "tokens_seen": 2448818176 }, { "epoch": 0.48, "learning_rate": 0.00026049105351841454, "loss": 0.0649, "theoretical_loss": 3.402548051651331, "tokens_seen": 2448949248 }, { "epoch": 0.48, "learning_rate": 0.00026045093476691005, "loss": 0.0691, "theoretical_loss": 3.4025335878096357, "tokens_seen": 2449080320 }, { "epoch": 0.48, "learning_rate": 0.0002604108160154056, "loss": 0.067, "theoretical_loss": 3.402519124958739, "tokens_seen": 2449211392 }, { "epoch": 0.48, "learning_rate": 0.0002603706972639012, "loss": 0.0648, "theoretical_loss": 3.402504663098521, "tokens_seen": 2449342464 }, { "epoch": 0.48, "objective/train/advantage_avg": 0.0006804154836572707, "objective/train/docs_used": 891543, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.1899349689483643, "objective/train/original_loss": 1.1899349689483643, "objective/train/theoretical_loss": 3.4024902022288606, "objective/train/tokens_used": 819998176, "objective/train/value_avg": -0.005184173583984375, "objective/train/value_loss": 0.00011531027121236548, "objective/train/value_max": -4.9114227294921875e-05, "objective/train/value_min": -0.34228515625, "objective/train/value_reward_corr": 0.689884121653458, "objective/train/value_std": 0.009521484375, "objective/train/weight_avg": 1.0007338523864746, "objective/train/weighted_lm_loss": 1.1902631521224976, "objective/train/weights_max": 1.193924903869629, "objective/train/weights_min": 0.41670939326286316, "theoretical_loss": 3.4024902022288606, "tokens_seen": 2449473536 }, { "epoch": 0.48, "learning_rate": 0.0002603305785123967, "loss": 0.0624, "theoretical_loss": 3.4024902022288606, "tokens_seen": 2449473536 }, { "epoch": 0.48, "learning_rate": 0.00026029045976089227, "loss": 0.0659, "theoretical_loss": 3.4024757423496363, "tokens_seen": 2449604608 }, { "epoch": 0.48, "learning_rate": 0.0002602503410093878, "loss": 0.0673, "theoretical_loss": 3.4024612834607284, "tokens_seen": 2449735680 }, { "epoch": 0.48, "learning_rate": 0.0002602102222578833, "loss": 0.065, "theoretical_loss": 3.402446825562015, "tokens_seen": 2449866752 }, { "epoch": 0.48, "learning_rate": 0.00026017010350637887, "loss": 0.0655, "theoretical_loss": 3.402432368653376, "tokens_seen": 2449997824 }, { "epoch": 0.48, "learning_rate": 0.00026012998475487444, "loss": 0.0672, "theoretical_loss": 3.4024179127346903, "tokens_seen": 2450128896 }, { "epoch": 0.49, "learning_rate": 0.00026008986600337, "loss": 0.0653, "theoretical_loss": 3.4024034578058373, "tokens_seen": 2450259968 }, { "epoch": 0.49, "learning_rate": 0.0002600497472518655, "loss": 0.0656, "theoretical_loss": 3.402389003866696, "tokens_seen": 2450391040 }, { "epoch": 0.49, "learning_rate": 0.0002600096285003611, "loss": 0.0647, "theoretical_loss": 3.4023745509171466, "tokens_seen": 2450522112 }, { "epoch": 0.49, "learning_rate": 0.00025996950974885665, "loss": 0.0631, "theoretical_loss": 3.4023600989570673, "tokens_seen": 2450653184 }, { "epoch": 0.49, "learning_rate": 0.00025992939099735217, "loss": 0.0628, "theoretical_loss": 3.4023456479863383, "tokens_seen": 2450784256 }, { "epoch": 0.49, "learning_rate": 0.00025988927224584774, "loss": 0.0629, "theoretical_loss": 3.4023311980048385, "tokens_seen": 2450915328 }, { "epoch": 0.49, "learning_rate": 0.00025984915349434325, "loss": 0.0638, "theoretical_loss": 3.402316749012447, "tokens_seen": 2451046400 }, { "epoch": 0.49, "learning_rate": 0.00025980903474283877, "loss": 0.064, "theoretical_loss": 3.402302301009044, "tokens_seen": 2451177472 }, { "epoch": 0.49, "learning_rate": 0.00025976891599133433, "loss": 0.0605, "theoretical_loss": 3.4022878539945087, "tokens_seen": 2451308544 }, { "epoch": 0.49, "learning_rate": 0.0002597287972398299, "loss": 0.0623, "theoretical_loss": 3.40227340796872, "tokens_seen": 2451439616 }, { "epoch": 0.49, "learning_rate": 0.00025968867848832547, "loss": 0.0631, "theoretical_loss": 3.402258962931558, "tokens_seen": 2451570688 }, { "epoch": 0.49, "learning_rate": 0.000259648559736821, "loss": 0.0651, "theoretical_loss": 3.4022445188829016, "tokens_seen": 2451701760 }, { "epoch": 0.49, "learning_rate": 0.00025960844098531655, "loss": 0.0682, "theoretical_loss": 3.4022300758226307, "tokens_seen": 2451832832 }, { "epoch": 0.49, "learning_rate": 0.0002595683222338121, "loss": 0.0631, "theoretical_loss": 3.402215633750625, "tokens_seen": 2451963904 }, { "epoch": 0.49, "learning_rate": 0.00025952820348230764, "loss": 0.0651, "theoretical_loss": 3.4022011926667637, "tokens_seen": 2452094976 }, { "epoch": 0.49, "learning_rate": 0.0002594880847308032, "loss": 0.0654, "theoretical_loss": 3.4021867525709264, "tokens_seen": 2452226048 }, { "epoch": 0.49, "learning_rate": 0.0002594479659792987, "loss": 0.0657, "theoretical_loss": 3.402172313462993, "tokens_seen": 2452357120 }, { "epoch": 0.49, "learning_rate": 0.00025940784722779423, "loss": 0.0674, "theoretical_loss": 3.4021578753428425, "tokens_seen": 2452488192 }, { "epoch": 0.49, "learning_rate": 0.0002593677284762898, "loss": 0.0657, "theoretical_loss": 3.4021434382103557, "tokens_seen": 2452619264 }, { "epoch": 0.49, "objective/train/advantage_avg": 0.000923838175367564, "objective/train/docs_used": 892743, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2273404598236084, "objective/train/original_loss": 1.2273404598236084, "objective/train/theoretical_loss": 3.402129002065411, "objective/train/tokens_used": 823274976, "objective/train/value_avg": -0.00894927978515625, "objective/train/value_loss": 0.000479041482321918, "objective/train/value_max": -5.739927291870117e-05, "objective/train/value_min": -0.7236328125, "objective/train/value_reward_corr": 0.7079314419620726, "objective/train/value_std": 0.0206298828125, "objective/train/weight_avg": 1.0011379718780518, "objective/train/weighted_lm_loss": 1.2286409139633179, "objective/train/weights_max": 1.6117087602615356, "objective/train/weights_min": 0.38619378209114075, "theoretical_loss": 3.402129002065411, "tokens_seen": 2452750336 }, { "epoch": 0.49, "learning_rate": 0.00025932760972478537, "loss": 0.064, "theoretical_loss": 3.402129002065411, "tokens_seen": 2452750336 }, { "epoch": 0.49, "learning_rate": 0.00025928749097328094, "loss": 0.0663, "theoretical_loss": 3.4021145669078887, "tokens_seen": 2452881408 }, { "epoch": 0.49, "learning_rate": 0.00025924737222177645, "loss": 0.063, "theoretical_loss": 3.4021001327376688, "tokens_seen": 2453012480 }, { "epoch": 0.49, "learning_rate": 0.000259207253470272, "loss": 0.0643, "theoretical_loss": 3.40208569955463, "tokens_seen": 2453143552 }, { "epoch": 0.49, "learning_rate": 0.0002591671347187676, "loss": 0.0679, "theoretical_loss": 3.402071267358653, "tokens_seen": 2453274624 }, { "epoch": 0.49, "learning_rate": 0.0002591270159672631, "loss": 0.0658, "theoretical_loss": 3.402056836149617, "tokens_seen": 2453405696 }, { "epoch": 0.49, "learning_rate": 0.00025908689721575867, "loss": 0.0635, "theoretical_loss": 3.402042405927402, "tokens_seen": 2453536768 }, { "epoch": 0.49, "learning_rate": 0.0002590467784642542, "loss": 0.0654, "theoretical_loss": 3.4020279766918886, "tokens_seen": 2453667840 }, { "epoch": 0.49, "learning_rate": 0.0002590066597127497, "loss": 0.0669, "theoretical_loss": 3.4020135484429552, "tokens_seen": 2453798912 }, { "epoch": 0.49, "learning_rate": 0.00025896654096124527, "loss": 0.0628, "theoretical_loss": 3.401999121180482, "tokens_seen": 2453929984 }, { "epoch": 0.49, "learning_rate": 0.00025892642220974084, "loss": 0.0639, "theoretical_loss": 3.4019846949043497, "tokens_seen": 2454061056 }, { "epoch": 0.49, "learning_rate": 0.0002588863034582364, "loss": 0.0668, "theoretical_loss": 3.401970269614438, "tokens_seen": 2454192128 }, { "epoch": 0.49, "learning_rate": 0.0002588461847067319, "loss": 0.0607, "theoretical_loss": 3.401955845310626, "tokens_seen": 2454323200 }, { "epoch": 0.49, "learning_rate": 0.0002588060659552275, "loss": 0.0621, "theoretical_loss": 3.4019414219927944, "tokens_seen": 2454454272 }, { "epoch": 0.49, "learning_rate": 0.00025876594720372306, "loss": 0.0667, "theoretical_loss": 3.4019269996608226, "tokens_seen": 2454585344 }, { "epoch": 0.49, "learning_rate": 0.00025872582845221857, "loss": 0.0633, "theoretical_loss": 3.4019125783145907, "tokens_seen": 2454716416 }, { "epoch": 0.49, "learning_rate": 0.00025868570970071414, "loss": 0.0657, "theoretical_loss": 3.4018981579539793, "tokens_seen": 2454847488 }, { "epoch": 0.49, "learning_rate": 0.00025864559094920965, "loss": 0.0664, "theoretical_loss": 3.4018837385788676, "tokens_seen": 2454978560 }, { "epoch": 0.49, "learning_rate": 0.00025860547219770517, "loss": 0.0682, "theoretical_loss": 3.401869320189136, "tokens_seen": 2455109632 }, { "epoch": 0.49, "learning_rate": 0.00025856535344620074, "loss": 0.0652, "theoretical_loss": 3.401854902784665, "tokens_seen": 2455240704 }, { "epoch": 0.49, "learning_rate": 0.0002585252346946963, "loss": 0.0672, "theoretical_loss": 3.401840486365334, "tokens_seen": 2455371776 }, { "epoch": 0.49, "learning_rate": 0.00025848511594319187, "loss": 0.0661, "theoretical_loss": 3.4018260709310235, "tokens_seen": 2455502848 }, { "epoch": 0.49, "learning_rate": 0.0002584449971916874, "loss": 0.0618, "theoretical_loss": 3.4018116564816134, "tokens_seen": 2455633920 }, { "epoch": 0.49, "learning_rate": 0.00025840487844018295, "loss": 0.0656, "theoretical_loss": 3.4017972430169836, "tokens_seen": 2455764992 }, { "epoch": 0.49, "learning_rate": 0.0002583647596886785, "loss": 0.0647, "theoretical_loss": 3.401782830537015, "tokens_seen": 2455896064 }, { "epoch": 0.49, "objective/train/advantage_avg": -0.0007783505716361105, "objective/train/docs_used": 893908, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4138989448547363, "objective/train/original_loss": 1.4138988256454468, "objective/train/theoretical_loss": 3.4017684190415873, "objective/train/tokens_used": 826551776, "objective/train/value_avg": -0.006427764892578125, "objective/train/value_loss": 0.00014978792751207948, "objective/train/value_max": -4.470348358154297e-05, "objective/train/value_min": -0.29052734375, "objective/train/value_reward_corr": 0.709263001783069, "objective/train/value_std": 0.01158905029296875, "objective/train/weight_avg": 0.9992918372154236, "objective/train/weighted_lm_loss": 1.413183569908142, "objective/train/weights_max": 1.1322495937347412, "objective/train/weights_min": 0.37658044695854187, "theoretical_loss": 3.4017684190415873, "tokens_seen": 2456027136 }, { "epoch": 0.49, "learning_rate": 0.00025832464093717404, "loss": 0.0697, "theoretical_loss": 3.4017684190415873, "tokens_seen": 2456027136 }, { "epoch": 0.49, "learning_rate": 0.0002582845221856696, "loss": 0.0664, "theoretical_loss": 3.4017540085305806, "tokens_seen": 2456158208 }, { "epoch": 0.49, "learning_rate": 0.0002582444034341651, "loss": 0.0673, "theoretical_loss": 3.4017395990038755, "tokens_seen": 2456289280 }, { "epoch": 0.49, "learning_rate": 0.0002582042846826607, "loss": 0.0644, "theoretical_loss": 3.401725190461352, "tokens_seen": 2456420352 }, { "epoch": 0.49, "learning_rate": 0.0002581641659311562, "loss": 0.0655, "theoretical_loss": 3.4017107829028905, "tokens_seen": 2456551424 }, { "epoch": 0.49, "learning_rate": 0.00025812404717965177, "loss": 0.0626, "theoretical_loss": 3.4016963763283714, "tokens_seen": 2456682496 }, { "epoch": 0.49, "learning_rate": 0.00025808392842814734, "loss": 0.0619, "theoretical_loss": 3.401681970737674, "tokens_seen": 2456813568 }, { "epoch": 0.49, "learning_rate": 0.00025804380967664285, "loss": 0.0641, "theoretical_loss": 3.4016675661306803, "tokens_seen": 2456944640 }, { "epoch": 0.49, "learning_rate": 0.0002580036909251384, "loss": 0.0625, "theoretical_loss": 3.4016531625072695, "tokens_seen": 2457075712 }, { "epoch": 0.49, "learning_rate": 0.000257963572173634, "loss": 0.0678, "theoretical_loss": 3.4016387598673226, "tokens_seen": 2457206784 }, { "epoch": 0.49, "learning_rate": 0.0002579234534221295, "loss": 0.0634, "theoretical_loss": 3.4016243582107193, "tokens_seen": 2457337856 }, { "epoch": 0.49, "learning_rate": 0.00025788333467062507, "loss": 0.0658, "theoretical_loss": 3.4016099575373406, "tokens_seen": 2457468928 }, { "epoch": 0.49, "learning_rate": 0.0002578432159191206, "loss": 0.0645, "theoretical_loss": 3.4015955578470667, "tokens_seen": 2457600000 }, { "epoch": 0.49, "learning_rate": 0.00025780309716761616, "loss": 0.0627, "theoretical_loss": 3.401581159139778, "tokens_seen": 2457731072 }, { "epoch": 0.49, "learning_rate": 0.00025776297841611167, "loss": 0.0646, "theoretical_loss": 3.4015667614153555, "tokens_seen": 2457862144 }, { "epoch": 0.49, "learning_rate": 0.00025772285966460724, "loss": 0.0649, "theoretical_loss": 3.401552364673679, "tokens_seen": 2457993216 }, { "epoch": 0.49, "learning_rate": 0.0002576827409131028, "loss": 0.0658, "theoretical_loss": 3.4015379689146292, "tokens_seen": 2458124288 }, { "epoch": 0.49, "learning_rate": 0.0002576426221615983, "loss": 0.0661, "theoretical_loss": 3.401523574138087, "tokens_seen": 2458255360 }, { "epoch": 0.49, "learning_rate": 0.0002576025034100939, "loss": 0.0619, "theoretical_loss": 3.4015091803439326, "tokens_seen": 2458386432 }, { "epoch": 0.49, "learning_rate": 0.00025756238465858946, "loss": 0.0626, "theoretical_loss": 3.4014947875320463, "tokens_seen": 2458517504 }, { "epoch": 0.49, "learning_rate": 0.00025752226590708497, "loss": 0.0648, "theoretical_loss": 3.4014803957023094, "tokens_seen": 2458648576 }, { "epoch": 0.49, "learning_rate": 0.00025748214715558054, "loss": 0.0636, "theoretical_loss": 3.401466004854602, "tokens_seen": 2458779648 }, { "epoch": 0.49, "learning_rate": 0.00025744202840407605, "loss": 0.0669, "theoretical_loss": 3.4014516149888054, "tokens_seen": 2458910720 }, { "epoch": 0.49, "learning_rate": 0.0002574019096525716, "loss": 0.0619, "theoretical_loss": 3.4014372261048, "tokens_seen": 2459041792 }, { "epoch": 0.49, "learning_rate": 0.00025736179090106714, "loss": 0.0715, "theoretical_loss": 3.401422838202466, "tokens_seen": 2459172864 }, { "epoch": 0.49, "objective/train/advantage_avg": 0.000968090258538723, "objective/train/docs_used": 895121, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2501220703125, "objective/train/original_loss": 1.2501220703125, "objective/train/theoretical_loss": 3.401408451281684, "objective/train/tokens_used": 829828576, "objective/train/value_avg": -0.005199432373046875, "objective/train/value_loss": 0.00017746363300830126, "objective/train/value_max": -4.57763671875e-05, "objective/train/value_min": -0.4208984375, "objective/train/value_reward_corr": 0.6649323912519232, "objective/train/value_std": 0.0112457275390625, "objective/train/weight_avg": 1.0010453462600708, "objective/train/weighted_lm_loss": 1.2516560554504395, "objective/train/weights_max": 1.1714041233062744, "objective/train/weights_min": 0.3685304820537567, "theoretical_loss": 3.401408451281684, "tokens_seen": 2459303936 }, { "epoch": 0.49, "learning_rate": 0.0002573216721495627, "loss": 0.0632, "theoretical_loss": 3.401408451281684, "tokens_seen": 2459303936 }, { "epoch": 0.49, "learning_rate": 0.0002572815533980583, "loss": 0.063, "theoretical_loss": 3.401394065342336, "tokens_seen": 2459435008 }, { "epoch": 0.49, "learning_rate": 0.0002572414346465538, "loss": 0.0643, "theoretical_loss": 3.4013796803843013, "tokens_seen": 2459566080 }, { "epoch": 0.49, "learning_rate": 0.00025720131589504936, "loss": 0.0651, "theoretical_loss": 3.4013652964074614, "tokens_seen": 2459697152 }, { "epoch": 0.49, "learning_rate": 0.0002571611971435449, "loss": 0.0646, "theoretical_loss": 3.4013509134116973, "tokens_seen": 2459828224 }, { "epoch": 0.49, "learning_rate": 0.00025712107839204044, "loss": 0.0628, "theoretical_loss": 3.4013365313968893, "tokens_seen": 2459959296 }, { "epoch": 0.49, "learning_rate": 0.000257080959640536, "loss": 0.0646, "theoretical_loss": 3.4013221503629185, "tokens_seen": 2460090368 }, { "epoch": 0.49, "learning_rate": 0.0002570408408890315, "loss": 0.0654, "theoretical_loss": 3.4013077703096655, "tokens_seen": 2460221440 }, { "epoch": 0.49, "learning_rate": 0.0002570007221375271, "loss": 0.0639, "theoretical_loss": 3.4012933912370116, "tokens_seen": 2460352512 }, { "epoch": 0.49, "learning_rate": 0.0002569606033860226, "loss": 0.0667, "theoretical_loss": 3.4012790131448374, "tokens_seen": 2460483584 }, { "epoch": 0.49, "learning_rate": 0.00025692048463451817, "loss": 0.061, "theoretical_loss": 3.401264636033024, "tokens_seen": 2460614656 }, { "epoch": 0.49, "learning_rate": 0.00025688036588301374, "loss": 0.0622, "theoretical_loss": 3.4012502599014516, "tokens_seen": 2460745728 }, { "epoch": 0.49, "learning_rate": 0.00025684024713150925, "loss": 0.0652, "theoretical_loss": 3.401235884750002, "tokens_seen": 2460876800 }, { "epoch": 0.49, "learning_rate": 0.0002568001283800048, "loss": 0.0627, "theoretical_loss": 3.4012215105785564, "tokens_seen": 2461007872 }, { "epoch": 0.49, "learning_rate": 0.0002567600096285004, "loss": 0.0632, "theoretical_loss": 3.401207137386995, "tokens_seen": 2461138944 }, { "epoch": 0.49, "learning_rate": 0.0002567198908769959, "loss": 0.0671, "theoretical_loss": 3.401192765175199, "tokens_seen": 2461270016 }, { "epoch": 0.49, "learning_rate": 0.0002566797721254915, "loss": 0.062, "theoretical_loss": 3.40117839394305, "tokens_seen": 2461401088 }, { "epoch": 0.49, "learning_rate": 0.000256639653373987, "loss": 0.0657, "theoretical_loss": 3.4011640236904284, "tokens_seen": 2461532160 }, { "epoch": 0.49, "learning_rate": 0.00025659953462248256, "loss": 0.0642, "theoretical_loss": 3.4011496544172157, "tokens_seen": 2461663232 }, { "epoch": 0.49, "learning_rate": 0.00025655941587097807, "loss": 0.0636, "theoretical_loss": 3.401135286123293, "tokens_seen": 2461794304 }, { "epoch": 0.49, "learning_rate": 0.00025651929711947364, "loss": 0.0616, "theoretical_loss": 3.401120918808541, "tokens_seen": 2461925376 }, { "epoch": 0.49, "learning_rate": 0.0002564791783679692, "loss": 0.0649, "theoretical_loss": 3.401106552472841, "tokens_seen": 2462056448 }, { "epoch": 0.49, "learning_rate": 0.0002564390596164647, "loss": 0.0677, "theoretical_loss": 3.401092187116075, "tokens_seen": 2462187520 }, { "epoch": 0.49, "learning_rate": 0.0002563989408649603, "loss": 0.0659, "theoretical_loss": 3.4010778227381233, "tokens_seen": 2462318592 }, { "epoch": 0.49, "learning_rate": 0.00025635882211345586, "loss": 0.0651, "theoretical_loss": 3.401063459338867, "tokens_seen": 2462449664 }, { "epoch": 0.49, "objective/train/advantage_avg": 0.0009081036550924182, "objective/train/docs_used": 896263, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.240075707435608, "objective/train/original_loss": 1.2400753498077393, "objective/train/theoretical_loss": 3.4010490969181877, "objective/train/tokens_used": 833105376, "objective/train/value_avg": -0.00750732421875, "objective/train/value_loss": 0.00018225386156700552, "objective/train/value_max": -3.9458274841308594e-05, "objective/train/value_min": -0.548828125, "objective/train/value_reward_corr": 0.7761348028883214, "objective/train/value_std": 0.0192718505859375, "objective/train/weight_avg": 1.0009914636611938, "objective/train/weighted_lm_loss": 1.240823745727539, "objective/train/weights_max": 1.603045105934143, "objective/train/weights_min": 0.3999635875225067, "theoretical_loss": 3.4010490969181877, "tokens_seen": 2462580736 }, { "epoch": 0.49, "learning_rate": 0.0002563187033619514, "loss": 0.0634, "theoretical_loss": 3.4010490969181877, "tokens_seen": 2462580736 }, { "epoch": 0.49, "learning_rate": 0.00025627858461044694, "loss": 0.0651, "theoretical_loss": 3.4010347354759665, "tokens_seen": 2462711808 }, { "epoch": 0.49, "learning_rate": 0.00025623846585894245, "loss": 0.0601, "theoretical_loss": 3.401020375012085, "tokens_seen": 2462842880 }, { "epoch": 0.49, "learning_rate": 0.000256198347107438, "loss": 0.0646, "theoretical_loss": 3.401006015526424, "tokens_seen": 2462973952 }, { "epoch": 0.49, "learning_rate": 0.00025615822835593354, "loss": 0.0639, "theoretical_loss": 3.400991657018865, "tokens_seen": 2463105024 }, { "epoch": 0.49, "learning_rate": 0.0002561181096044291, "loss": 0.0657, "theoretical_loss": 3.4009772994892895, "tokens_seen": 2463236096 }, { "epoch": 0.49, "learning_rate": 0.0002560779908529247, "loss": 0.0638, "theoretical_loss": 3.400962942937579, "tokens_seen": 2463367168 }, { "epoch": 0.49, "learning_rate": 0.0002560378721014202, "loss": 0.067, "theoretical_loss": 3.4009485873636143, "tokens_seen": 2463498240 }, { "epoch": 0.49, "learning_rate": 0.00025599775334991576, "loss": 0.0616, "theoretical_loss": 3.400934232767277, "tokens_seen": 2463629312 }, { "epoch": 0.49, "learning_rate": 0.0002559576345984113, "loss": 0.066, "theoretical_loss": 3.4009198791484487, "tokens_seen": 2463760384 }, { "epoch": 0.49, "learning_rate": 0.0002559175158469069, "loss": 0.0632, "theoretical_loss": 3.400905526507011, "tokens_seen": 2463891456 }, { "epoch": 0.49, "learning_rate": 0.0002558773970954024, "loss": 0.0653, "theoretical_loss": 3.4008911748428448, "tokens_seen": 2464022528 }, { "epoch": 0.49, "learning_rate": 0.0002558372783438979, "loss": 0.0622, "theoretical_loss": 3.4008768241558323, "tokens_seen": 2464153600 }, { "epoch": 0.49, "learning_rate": 0.0002557971595923935, "loss": 0.0662, "theoretical_loss": 3.400862474445854, "tokens_seen": 2464284672 }, { "epoch": 0.49, "learning_rate": 0.000255757040840889, "loss": 0.0689, "theoretical_loss": 3.4008481257127925, "tokens_seen": 2464415744 }, { "epoch": 0.49, "learning_rate": 0.00025571692208938457, "loss": 0.0646, "theoretical_loss": 3.400833777956529, "tokens_seen": 2464546816 }, { "epoch": 0.49, "learning_rate": 0.00025567680333788014, "loss": 0.0651, "theoretical_loss": 3.400819431176944, "tokens_seen": 2464677888 }, { "epoch": 0.49, "learning_rate": 0.00025563668458637566, "loss": 0.0636, "theoretical_loss": 3.4008050853739213, "tokens_seen": 2464808960 }, { "epoch": 0.49, "learning_rate": 0.0002555965658348712, "loss": 0.0638, "theoretical_loss": 3.4007907405473405, "tokens_seen": 2464940032 }, { "epoch": 0.49, "learning_rate": 0.0002555564470833668, "loss": 0.0669, "theoretical_loss": 3.4007763966970836, "tokens_seen": 2465071104 }, { "epoch": 0.49, "learning_rate": 0.00025551632833186236, "loss": 0.0622, "theoretical_loss": 3.400762053823033, "tokens_seen": 2465202176 }, { "epoch": 0.49, "learning_rate": 0.0002554762095803579, "loss": 0.0654, "theoretical_loss": 3.40074771192507, "tokens_seen": 2465333248 }, { "epoch": 0.49, "learning_rate": 0.0002554360908288534, "loss": 0.0653, "theoretical_loss": 3.400733371003076, "tokens_seen": 2465464320 }, { "epoch": 0.49, "learning_rate": 0.00025539597207734896, "loss": 0.0642, "theoretical_loss": 3.4007190310569326, "tokens_seen": 2465595392 }, { "epoch": 0.49, "learning_rate": 0.00025535585332584447, "loss": 0.0647, "theoretical_loss": 3.4007046920865225, "tokens_seen": 2465726464 }, { "epoch": 0.49, "objective/train/advantage_avg": -0.00040717533556744456, "objective/train/docs_used": 897133, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2204694747924805, "objective/train/original_loss": 1.2204694747924805, "objective/train/theoretical_loss": 3.400690354091726, "objective/train/tokens_used": 836382176, "objective/train/value_avg": -0.01148223876953125, "objective/train/value_loss": 0.00038927473360672593, "objective/train/value_max": -4.7206878662109375e-05, "objective/train/value_min": -0.79345703125, "objective/train/value_reward_corr": 0.8845394588129606, "objective/train/value_std": 0.035491943359375, "objective/train/weight_avg": 0.9997527599334717, "objective/train/weighted_lm_loss": 1.219051480293274, "objective/train/weights_max": 1.2006316184997559, "objective/train/weights_min": 0.14526447653770447, "theoretical_loss": 3.400690354091726, "tokens_seen": 2465857536 }, { "epoch": 0.49, "learning_rate": 0.00025531573457434004, "loss": 0.0655, "theoretical_loss": 3.400690354091726, "tokens_seen": 2465857536 }, { "epoch": 0.49, "learning_rate": 0.0002552756158228356, "loss": 0.0649, "theoretical_loss": 3.4006760170724264, "tokens_seen": 2465988608 }, { "epoch": 0.49, "learning_rate": 0.0002552354970713311, "loss": 0.0659, "theoretical_loss": 3.400661681028504, "tokens_seen": 2466119680 }, { "epoch": 0.49, "learning_rate": 0.0002551953783198267, "loss": 0.0647, "theoretical_loss": 3.4006473459598423, "tokens_seen": 2466250752 }, { "epoch": 0.49, "learning_rate": 0.00025515525956832226, "loss": 0.0627, "theoretical_loss": 3.4006330118663213, "tokens_seen": 2466381824 }, { "epoch": 0.49, "learning_rate": 0.00025511514081681783, "loss": 0.0638, "theoretical_loss": 3.4006186787478243, "tokens_seen": 2466512896 }, { "epoch": 0.49, "learning_rate": 0.00025507502206531334, "loss": 0.0672, "theoretical_loss": 3.4006043466042324, "tokens_seen": 2466643968 }, { "epoch": 0.5, "learning_rate": 0.00025503490331380886, "loss": 0.0653, "theoretical_loss": 3.4005900154354274, "tokens_seen": 2466775040 }, { "epoch": 0.5, "learning_rate": 0.0002549947845623044, "loss": 0.0647, "theoretical_loss": 3.400575685241292, "tokens_seen": 2466906112 }, { "epoch": 0.5, "learning_rate": 0.00025495466581079994, "loss": 0.0655, "theoretical_loss": 3.400561356021707, "tokens_seen": 2467037184 }, { "epoch": 0.5, "learning_rate": 0.0002549145470592955, "loss": 0.065, "theoretical_loss": 3.4005470277765557, "tokens_seen": 2467168256 }, { "epoch": 0.5, "learning_rate": 0.0002548744283077911, "loss": 0.066, "theoretical_loss": 3.400532700505719, "tokens_seen": 2467299328 }, { "epoch": 0.5, "learning_rate": 0.0002548343095562866, "loss": 0.0619, "theoretical_loss": 3.4005183742090788, "tokens_seen": 2467430400 }, { "epoch": 0.5, "learning_rate": 0.00025479419080478216, "loss": 0.0648, "theoretical_loss": 3.400504048886518, "tokens_seen": 2467561472 }, { "epoch": 0.5, "learning_rate": 0.0002547540720532777, "loss": 0.0673, "theoretical_loss": 3.400489724537918, "tokens_seen": 2467692544 }, { "epoch": 0.5, "learning_rate": 0.0002547139533017733, "loss": 0.0677, "theoretical_loss": 3.4004754011631615, "tokens_seen": 2467823616 }, { "epoch": 0.5, "learning_rate": 0.0002546738345502688, "loss": 0.0667, "theoretical_loss": 3.4004610787621297, "tokens_seen": 2467954688 }, { "epoch": 0.5, "learning_rate": 0.0002546337157987643, "loss": 0.0615, "theoretical_loss": 3.4004467573347053, "tokens_seen": 2468085760 }, { "epoch": 0.5, "learning_rate": 0.0002545935970472599, "loss": 0.0649, "theoretical_loss": 3.40043243688077, "tokens_seen": 2468216832 }, { "epoch": 0.5, "learning_rate": 0.0002545534782957554, "loss": 0.0647, "theoretical_loss": 3.400418117400206, "tokens_seen": 2468347904 }, { "epoch": 0.5, "learning_rate": 0.000254513359544251, "loss": 0.0641, "theoretical_loss": 3.400403798892896, "tokens_seen": 2468478976 }, { "epoch": 0.5, "learning_rate": 0.00025447324079274654, "loss": 0.0717, "theoretical_loss": 3.4003894813587223, "tokens_seen": 2468610048 }, { "epoch": 0.5, "learning_rate": 0.00025443312204124206, "loss": 0.0668, "theoretical_loss": 3.4003751647975657, "tokens_seen": 2468741120 }, { "epoch": 0.5, "learning_rate": 0.0002543930032897376, "loss": 0.0664, "theoretical_loss": 3.4003608492093096, "tokens_seen": 2468872192 }, { "epoch": 0.5, "learning_rate": 0.0002543528845382332, "loss": 0.0639, "theoretical_loss": 3.400346534593836, "tokens_seen": 2469003264 }, { "epoch": 0.5, "objective/train/advantage_avg": -0.00035405447124503553, "objective/train/docs_used": 898257, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4309805631637573, "objective/train/original_loss": 1.4309803247451782, "objective/train/theoretical_loss": 3.400332220951027, "objective/train/tokens_used": 839658976, "objective/train/value_avg": -0.005573272705078125, "objective/train/value_loss": 0.00027514214161783457, "objective/train/value_max": -4.792213439941406e-05, "objective/train/value_min": -0.4833984375, "objective/train/value_reward_corr": 0.6595190122660851, "objective/train/value_std": 0.012664794921875, "objective/train/weight_avg": 0.9997660517692566, "objective/train/weighted_lm_loss": 1.429917335510254, "objective/train/weights_max": 1.335748553276062, "objective/train/weights_min": 0.3701203465461731, "theoretical_loss": 3.400332220951027, "tokens_seen": 2469134336 }, { "epoch": 0.5, "learning_rate": 0.00025431276578672876, "loss": 0.0677, "theoretical_loss": 3.400332220951027, "tokens_seen": 2469134336 }, { "epoch": 0.5, "learning_rate": 0.0002542726470352243, "loss": 0.0668, "theoretical_loss": 3.4003179082807655, "tokens_seen": 2469265408 }, { "epoch": 0.5, "learning_rate": 0.0002542325282837198, "loss": 0.0652, "theoretical_loss": 3.400303596582933, "tokens_seen": 2469396480 }, { "epoch": 0.5, "learning_rate": 0.00025419240953221536, "loss": 0.0654, "theoretical_loss": 3.400289285857412, "tokens_seen": 2469527552 }, { "epoch": 0.5, "learning_rate": 0.00025415229078071087, "loss": 0.066, "theoretical_loss": 3.4002749761040847, "tokens_seen": 2469658624 }, { "epoch": 0.5, "learning_rate": 0.00025411217202920644, "loss": 0.0667, "theoretical_loss": 3.400260667322834, "tokens_seen": 2469789696 }, { "epoch": 0.5, "learning_rate": 0.000254072053277702, "loss": 0.0685, "theoretical_loss": 3.400246359513542, "tokens_seen": 2469920768 }, { "epoch": 0.5, "learning_rate": 0.0002540319345261975, "loss": 0.0693, "theoretical_loss": 3.4002320526760914, "tokens_seen": 2470051840 }, { "epoch": 0.5, "learning_rate": 0.0002539918157746931, "loss": 0.0691, "theoretical_loss": 3.4002177468103643, "tokens_seen": 2470182912 }, { "epoch": 0.5, "learning_rate": 0.00025395169702318866, "loss": 0.0712, "theoretical_loss": 3.4002034419162426, "tokens_seen": 2470313984 }, { "epoch": 0.5, "learning_rate": 0.00025391157827168423, "loss": 0.0678, "theoretical_loss": 3.40018913799361, "tokens_seen": 2470445056 }, { "epoch": 0.5, "learning_rate": 0.00025387145952017974, "loss": 0.0672, "theoretical_loss": 3.400174835042348, "tokens_seen": 2470576128 }, { "epoch": 0.5, "learning_rate": 0.0002538313407686753, "loss": 0.0657, "theoretical_loss": 3.4001605330623397, "tokens_seen": 2470707200 }, { "epoch": 0.5, "learning_rate": 0.0002537912220171708, "loss": 0.0684, "theoretical_loss": 3.400146232053467, "tokens_seen": 2470838272 }, { "epoch": 0.5, "learning_rate": 0.00025375110326566634, "loss": 0.0629, "theoretical_loss": 3.400131932015613, "tokens_seen": 2470969344 }, { "epoch": 0.5, "learning_rate": 0.0002537109845141619, "loss": 0.0622, "theoretical_loss": 3.40011763294866, "tokens_seen": 2471100416 }, { "epoch": 0.5, "learning_rate": 0.0002536708657626575, "loss": 0.0685, "theoretical_loss": 3.4001033348524907, "tokens_seen": 2471231488 }, { "epoch": 0.5, "learning_rate": 0.00025363074701115304, "loss": 0.0657, "theoretical_loss": 3.4000890377269877, "tokens_seen": 2471362560 }, { "epoch": 0.5, "learning_rate": 0.00025359062825964856, "loss": 0.0713, "theoretical_loss": 3.4000747415720336, "tokens_seen": 2471493632 }, { "epoch": 0.5, "learning_rate": 0.00025355050950814413, "loss": 0.0625, "theoretical_loss": 3.400060446387511, "tokens_seen": 2471624704 }, { "epoch": 0.5, "learning_rate": 0.0002535103907566397, "loss": 0.0627, "theoretical_loss": 3.4000461521733025, "tokens_seen": 2471755776 }, { "epoch": 0.5, "learning_rate": 0.0002534702720051352, "loss": 0.0697, "theoretical_loss": 3.400031858929291, "tokens_seen": 2471886848 }, { "epoch": 0.5, "learning_rate": 0.0002534301532536308, "loss": 0.0653, "theoretical_loss": 3.400017566655359, "tokens_seen": 2472017920 }, { "epoch": 0.5, "learning_rate": 0.0002533900345021263, "loss": 0.0672, "theoretical_loss": 3.4000032753513896, "tokens_seen": 2472148992 }, { "epoch": 0.5, "learning_rate": 0.0002533499157506218, "loss": 0.0711, "theoretical_loss": 3.3999889850172655, "tokens_seen": 2472280064 }, { "epoch": 0.5, "objective/train/advantage_avg": 0.000727068108972162, "objective/train/docs_used": 899541, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.28717839717865, "objective/train/original_loss": 1.2871782779693604, "objective/train/theoretical_loss": 3.399974695652869, "objective/train/tokens_used": 842935776, "objective/train/value_avg": -0.0072479248046875, "objective/train/value_loss": 0.00011867039575008675, "objective/train/value_max": -5.02467155456543e-05, "objective/train/value_min": -0.282470703125, "objective/train/value_reward_corr": 0.7030409280026744, "objective/train/value_std": 0.01203155517578125, "objective/train/weight_avg": 1.0007827281951904, "objective/train/weighted_lm_loss": 1.2873568534851074, "objective/train/weights_max": 1.1115705966949463, "objective/train/weights_min": 0.39273062348365784, "theoretical_loss": 3.399974695652869, "tokens_seen": 2472411136 }, { "epoch": 0.5, "learning_rate": 0.0002533097969991174, "loss": 0.0645, "theoretical_loss": 3.399974695652869, "tokens_seen": 2472411136 }, { "epoch": 0.5, "learning_rate": 0.00025326967824761294, "loss": 0.0632, "theoretical_loss": 3.399960407258083, "tokens_seen": 2472542208 }, { "epoch": 0.5, "learning_rate": 0.0002532295594961085, "loss": 0.0688, "theoretical_loss": 3.3999461198327907, "tokens_seen": 2472673280 }, { "epoch": 0.5, "learning_rate": 0.000253189440744604, "loss": 0.0678, "theoretical_loss": 3.3999318333768747, "tokens_seen": 2472804352 }, { "epoch": 0.5, "learning_rate": 0.0002531493219930996, "loss": 0.0653, "theoretical_loss": 3.3999175478902175, "tokens_seen": 2472935424 }, { "epoch": 0.5, "learning_rate": 0.00025310920324159516, "loss": 0.0687, "theoretical_loss": 3.3999032633727024, "tokens_seen": 2473066496 }, { "epoch": 0.5, "learning_rate": 0.0002530690844900907, "loss": 0.066, "theoretical_loss": 3.3998889798242127, "tokens_seen": 2473197568 }, { "epoch": 0.5, "learning_rate": 0.00025302896573858625, "loss": 0.0693, "theoretical_loss": 3.3998746972446305, "tokens_seen": 2473328640 }, { "epoch": 0.5, "learning_rate": 0.00025298884698708176, "loss": 0.0653, "theoretical_loss": 3.3998604156338392, "tokens_seen": 2473459712 }, { "epoch": 0.5, "learning_rate": 0.0002529487282355773, "loss": 0.064, "theoretical_loss": 3.3998461349917215, "tokens_seen": 2473590784 }, { "epoch": 0.5, "learning_rate": 0.00025290860948407284, "loss": 0.0641, "theoretical_loss": 3.3998318553181606, "tokens_seen": 2473721856 }, { "epoch": 0.5, "learning_rate": 0.0002528684907325684, "loss": 0.0677, "theoretical_loss": 3.3998175766130396, "tokens_seen": 2473852928 }, { "epoch": 0.5, "learning_rate": 0.000252828371981064, "loss": 0.0666, "theoretical_loss": 3.399803298876241, "tokens_seen": 2473984000 }, { "epoch": 0.5, "learning_rate": 0.0002527882532295595, "loss": 0.0647, "theoretical_loss": 3.399789022107648, "tokens_seen": 2474115072 }, { "epoch": 0.5, "learning_rate": 0.00025274813447805506, "loss": 0.0682, "theoretical_loss": 3.399774746307144, "tokens_seen": 2474246144 }, { "epoch": 0.5, "learning_rate": 0.00025270801572655063, "loss": 0.0633, "theoretical_loss": 3.399760471474612, "tokens_seen": 2474377216 }, { "epoch": 0.5, "learning_rate": 0.00025266789697504614, "loss": 0.0646, "theoretical_loss": 3.3997461976099346, "tokens_seen": 2474508288 }, { "epoch": 0.5, "learning_rate": 0.0002526277782235417, "loss": 0.0667, "theoretical_loss": 3.399731924712996, "tokens_seen": 2474639360 }, { "epoch": 0.5, "learning_rate": 0.0002525876594720372, "loss": 0.0649, "theoretical_loss": 3.399717652783678, "tokens_seen": 2474770432 }, { "epoch": 0.5, "learning_rate": 0.00025254754072053274, "loss": 0.069, "theoretical_loss": 3.3997033818218645, "tokens_seen": 2474901504 }, { "epoch": 0.5, "learning_rate": 0.0002525074219690283, "loss": 0.0652, "theoretical_loss": 3.3996891118274384, "tokens_seen": 2475032576 }, { "epoch": 0.5, "learning_rate": 0.0002524673032175239, "loss": 0.0664, "theoretical_loss": 3.3996748428002834, "tokens_seen": 2475163648 }, { "epoch": 0.5, "learning_rate": 0.00025242718446601945, "loss": 0.0656, "theoretical_loss": 3.3996605747402824, "tokens_seen": 2475294720 }, { "epoch": 0.5, "learning_rate": 0.00025238706571451496, "loss": 0.0654, "theoretical_loss": 3.3996463076473185, "tokens_seen": 2475425792 }, { "epoch": 0.5, "learning_rate": 0.00025234694696301053, "loss": 0.067, "theoretical_loss": 3.399632041521275, "tokens_seen": 2475556864 }, { "epoch": 0.5, "objective/train/advantage_avg": -0.0009070009109564126, "objective/train/docs_used": 900678, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3210715055465698, "objective/train/original_loss": 1.3210716247558594, "objective/train/theoretical_loss": 3.399617776362035, "objective/train/tokens_used": 846212576, "objective/train/value_avg": -0.00714874267578125, "objective/train/value_loss": 0.00015322091348934919, "objective/train/value_max": -8.416175842285156e-05, "objective/train/value_min": -0.24169921875, "objective/train/value_reward_corr": 0.7445247078601958, "objective/train/value_std": 0.01262664794921875, "objective/train/weight_avg": 0.9991682171821594, "objective/train/weighted_lm_loss": 1.3192490339279175, "objective/train/weights_max": 1.1860806941986084, "objective/train/weights_min": 0.6235631108283997, "theoretical_loss": 3.399617776362035, "tokens_seen": 2475687936 }, { "epoch": 0.5, "learning_rate": 0.0002523068282115061, "loss": 0.0642, "theoretical_loss": 3.399617776362035, "tokens_seen": 2475687936 }, { "epoch": 0.5, "learning_rate": 0.0002522667094600016, "loss": 0.0621, "theoretical_loss": 3.3996035121694828, "tokens_seen": 2475819008 }, { "epoch": 0.5, "learning_rate": 0.0002522265907084972, "loss": 0.0642, "theoretical_loss": 3.3995892489435007, "tokens_seen": 2475950080 }, { "epoch": 0.5, "learning_rate": 0.0002521864719569927, "loss": 0.0683, "theoretical_loss": 3.399574986683972, "tokens_seen": 2476081152 }, { "epoch": 0.5, "learning_rate": 0.0002521463532054882, "loss": 0.0675, "theoretical_loss": 3.3995607253907805, "tokens_seen": 2476212224 }, { "epoch": 0.5, "learning_rate": 0.0002521062344539838, "loss": 0.0663, "theoretical_loss": 3.3995464650638096, "tokens_seen": 2476343296 }, { "epoch": 0.5, "learning_rate": 0.00025206611570247934, "loss": 0.067, "theoretical_loss": 3.3995322057029425, "tokens_seen": 2476474368 }, { "epoch": 0.5, "learning_rate": 0.0002520259969509749, "loss": 0.0643, "theoretical_loss": 3.3995179473080626, "tokens_seen": 2476605440 }, { "epoch": 0.5, "learning_rate": 0.0002519858781994704, "loss": 0.0617, "theoretical_loss": 3.3995036898790536, "tokens_seen": 2476736512 }, { "epoch": 0.5, "learning_rate": 0.000251945759447966, "loss": 0.0664, "theoretical_loss": 3.399489433415799, "tokens_seen": 2476867584 }, { "epoch": 0.5, "learning_rate": 0.00025190564069646156, "loss": 0.0622, "theoretical_loss": 3.3994751779181813, "tokens_seen": 2476998656 }, { "epoch": 0.5, "learning_rate": 0.0002518655219449571, "loss": 0.0652, "theoretical_loss": 3.3994609233860853, "tokens_seen": 2477129728 }, { "epoch": 0.5, "learning_rate": 0.00025182540319345265, "loss": 0.0658, "theoretical_loss": 3.399446669819394, "tokens_seen": 2477260800 }, { "epoch": 0.5, "learning_rate": 0.00025178528444194816, "loss": 0.0713, "theoretical_loss": 3.399432417217991, "tokens_seen": 2477391872 }, { "epoch": 0.5, "learning_rate": 0.0002517451656904437, "loss": 0.0655, "theoretical_loss": 3.3994181655817592, "tokens_seen": 2477522944 }, { "epoch": 0.5, "learning_rate": 0.00025170504693893924, "loss": 0.0651, "theoretical_loss": 3.3994039149105832, "tokens_seen": 2477654016 }, { "epoch": 0.5, "learning_rate": 0.0002516649281874348, "loss": 0.0659, "theoretical_loss": 3.3993896652043465, "tokens_seen": 2477785088 }, { "epoch": 0.5, "learning_rate": 0.0002516248094359304, "loss": 0.0695, "theoretical_loss": 3.3993754164629317, "tokens_seen": 2477916160 }, { "epoch": 0.5, "learning_rate": 0.0002515846906844259, "loss": 0.0656, "theoretical_loss": 3.399361168686224, "tokens_seen": 2478047232 }, { "epoch": 0.5, "learning_rate": 0.00025154457193292146, "loss": 0.0685, "theoretical_loss": 3.3993469218741055, "tokens_seen": 2478178304 }, { "epoch": 0.5, "learning_rate": 0.00025150445318141703, "loss": 0.0639, "theoretical_loss": 3.399332676026461, "tokens_seen": 2478309376 }, { "epoch": 0.5, "learning_rate": 0.00025146433442991255, "loss": 0.0686, "theoretical_loss": 3.3993184311431737, "tokens_seen": 2478440448 }, { "epoch": 0.5, "learning_rate": 0.0002514242156784081, "loss": 0.0685, "theoretical_loss": 3.399304187224127, "tokens_seen": 2478571520 }, { "epoch": 0.5, "learning_rate": 0.00025138409692690363, "loss": 0.0676, "theoretical_loss": 3.3992899442692055, "tokens_seen": 2478702592 }, { "epoch": 0.5, "learning_rate": 0.00025134397817539914, "loss": 0.0704, "theoretical_loss": 3.3992757022782927, "tokens_seen": 2478833664 }, { "epoch": 0.5, "objective/train/advantage_avg": 0.0006969331880100071, "objective/train/docs_used": 901918, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3551603555679321, "objective/train/original_loss": 1.3551604747772217, "objective/train/theoretical_loss": 3.399261461251272, "objective/train/tokens_used": 849489376, "objective/train/value_avg": -0.004474639892578125, "objective/train/value_loss": 9.594507719157264e-05, "objective/train/value_max": -6.657838821411133e-05, "objective/train/value_min": -0.2178955078125, "objective/train/value_reward_corr": 0.6329485570368907, "objective/train/value_std": 0.00841522216796875, "objective/train/weight_avg": 1.000740647315979, "objective/train/weighted_lm_loss": 1.3560088872909546, "objective/train/weights_max": 1.1370280981063843, "objective/train/weights_min": 0.370623379945755, "theoretical_loss": 3.399261461251272, "tokens_seen": 2478964736 }, { "epoch": 0.5, "learning_rate": 0.0002513038594238947, "loss": 0.0672, "theoretical_loss": 3.399261461251272, "tokens_seen": 2478964736 }, { "epoch": 0.5, "learning_rate": 0.0002512637406723903, "loss": 0.0663, "theoretical_loss": 3.3992472211880274, "tokens_seen": 2479095808 }, { "epoch": 0.5, "learning_rate": 0.00025122362192088585, "loss": 0.0685, "theoretical_loss": 3.3992329820884426, "tokens_seen": 2479226880 }, { "epoch": 0.5, "learning_rate": 0.00025118350316938136, "loss": 0.066, "theoretical_loss": 3.3992187439524018, "tokens_seen": 2479357952 }, { "epoch": 0.5, "learning_rate": 0.00025114338441787693, "loss": 0.064, "theoretical_loss": 3.3992045067797885, "tokens_seen": 2479489024 }, { "epoch": 0.5, "learning_rate": 0.0002511032656663725, "loss": 0.0657, "theoretical_loss": 3.399190270570487, "tokens_seen": 2479620096 }, { "epoch": 0.5, "learning_rate": 0.000251063146914868, "loss": 0.0692, "theoretical_loss": 3.3991760353243805, "tokens_seen": 2479751168 }, { "epoch": 0.5, "learning_rate": 0.0002510230281633636, "loss": 0.0662, "theoretical_loss": 3.3991618010413536, "tokens_seen": 2479882240 }, { "epoch": 0.5, "learning_rate": 0.0002509829094118591, "loss": 0.0682, "theoretical_loss": 3.39914756772129, "tokens_seen": 2480013312 }, { "epoch": 0.5, "learning_rate": 0.00025094279066035466, "loss": 0.0663, "theoretical_loss": 3.3991333353640742, "tokens_seen": 2480144384 }, { "epoch": 0.5, "learning_rate": 0.0002509026719088502, "loss": 0.0647, "theoretical_loss": 3.399119103969589, "tokens_seen": 2480275456 }, { "epoch": 0.5, "learning_rate": 0.00025086255315734575, "loss": 0.0636, "theoretical_loss": 3.3991048735377194, "tokens_seen": 2480406528 }, { "epoch": 0.5, "learning_rate": 0.0002508224344058413, "loss": 0.0655, "theoretical_loss": 3.399090644068349, "tokens_seen": 2480537600 }, { "epoch": 0.5, "learning_rate": 0.00025078231565433683, "loss": 0.0642, "theoretical_loss": 3.399076415561362, "tokens_seen": 2480668672 }, { "epoch": 0.5, "learning_rate": 0.0002507421969028324, "loss": 0.0654, "theoretical_loss": 3.3990621880166425, "tokens_seen": 2480799744 }, { "epoch": 0.5, "learning_rate": 0.00025070207815132796, "loss": 0.0682, "theoretical_loss": 3.399047961434074, "tokens_seen": 2480930816 }, { "epoch": 0.5, "learning_rate": 0.0002506619593998235, "loss": 0.0686, "theoretical_loss": 3.399033735813542, "tokens_seen": 2481061888 }, { "epoch": 0.5, "learning_rate": 0.00025062184064831905, "loss": 0.0657, "theoretical_loss": 3.399019511154929, "tokens_seen": 2481192960 }, { "epoch": 0.5, "learning_rate": 0.00025058172189681456, "loss": 0.0684, "theoretical_loss": 3.3990052874581203, "tokens_seen": 2481324032 }, { "epoch": 0.5, "learning_rate": 0.00025054160314531013, "loss": 0.0691, "theoretical_loss": 3.398991064722999, "tokens_seen": 2481455104 }, { "epoch": 0.5, "learning_rate": 0.00025050148439380564, "loss": 0.0644, "theoretical_loss": 3.3989768429494505, "tokens_seen": 2481586176 }, { "epoch": 0.5, "learning_rate": 0.0002504613656423012, "loss": 0.0625, "theoretical_loss": 3.3989626221373586, "tokens_seen": 2481717248 }, { "epoch": 0.5, "learning_rate": 0.0002504212468907968, "loss": 0.0671, "theoretical_loss": 3.3989484022866066, "tokens_seen": 2481848320 }, { "epoch": 0.5, "learning_rate": 0.0002503811281392923, "loss": 0.0624, "theoretical_loss": 3.3989341833970803, "tokens_seen": 2481979392 }, { "epoch": 0.5, "learning_rate": 0.00025034100938778786, "loss": 0.0695, "theoretical_loss": 3.398919965468663, "tokens_seen": 2482110464 }, { "epoch": 0.5, "objective/train/advantage_avg": 1.4965448826842476e-05, "objective/train/docs_used": 903113, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3270014524459839, "objective/train/original_loss": 1.3270013332366943, "objective/train/theoretical_loss": 3.398905748501239, "objective/train/tokens_used": 852766176, "objective/train/value_avg": -0.007297515869140625, "objective/train/value_loss": 0.000495147134643048, "objective/train/value_max": -4.1961669921875e-05, "objective/train/value_min": -0.94580078125, "objective/train/value_reward_corr": 0.7263284535283716, "objective/train/value_std": 0.0186767578125, "objective/train/weight_avg": 1.0002200603485107, "objective/train/weighted_lm_loss": 1.326468586921692, "objective/train/weights_max": 1.1980594396591187, "objective/train/weights_min": 0.37203988432884216, "theoretical_loss": 3.398905748501239, "tokens_seen": 2482241536 }, { "epoch": 0.5, "learning_rate": 0.00025030089063628343, "loss": 0.0626, "theoretical_loss": 3.398905748501239, "tokens_seen": 2482241536 }, { "epoch": 0.5, "learning_rate": 0.00025026077188477895, "loss": 0.0648, "theoretical_loss": 3.3988915324946927, "tokens_seen": 2482372608 }, { "epoch": 0.5, "learning_rate": 0.0002502206531332745, "loss": 0.0664, "theoretical_loss": 3.3988773174489086, "tokens_seen": 2482503680 }, { "epoch": 0.5, "learning_rate": 0.00025018053438177003, "loss": 0.0658, "theoretical_loss": 3.3988631033637713, "tokens_seen": 2482634752 }, { "epoch": 0.5, "learning_rate": 0.0002501404156302656, "loss": 0.066, "theoretical_loss": 3.3988488902391643, "tokens_seen": 2482765824 }, { "epoch": 0.5, "learning_rate": 0.0002501002968787611, "loss": 0.0658, "theoretical_loss": 3.3988346780749725, "tokens_seen": 2482896896 }, { "epoch": 0.5, "learning_rate": 0.0002500601781272567, "loss": 0.0685, "theoretical_loss": 3.3988204668710806, "tokens_seen": 2483027968 }, { "epoch": 0.5, "learning_rate": 0.00025002005937575225, "loss": 0.0629, "theoretical_loss": 3.3988062566273727, "tokens_seen": 2483159040 }, { "epoch": 0.51, "learning_rate": 0.00024997994062424776, "loss": 0.0662, "theoretical_loss": 3.398792047343733, "tokens_seen": 2483290112 }, { "epoch": 0.51, "learning_rate": 0.00024993982187274333, "loss": 0.0653, "theoretical_loss": 3.3987778390200467, "tokens_seen": 2483421184 }, { "epoch": 0.51, "learning_rate": 0.0002498997031212389, "loss": 0.0665, "theoretical_loss": 3.3987636316561978, "tokens_seen": 2483552256 }, { "epoch": 0.51, "learning_rate": 0.0002498595843697344, "loss": 0.0685, "theoretical_loss": 3.398749425252071, "tokens_seen": 2483683328 }, { "epoch": 0.51, "learning_rate": 0.00024981946561823, "loss": 0.0633, "theoretical_loss": 3.3987352198075502, "tokens_seen": 2483814400 }, { "epoch": 0.51, "learning_rate": 0.0002497793468667255, "loss": 0.0669, "theoretical_loss": 3.3987210153225207, "tokens_seen": 2483945472 }, { "epoch": 0.51, "learning_rate": 0.00024973922811522106, "loss": 0.0652, "theoretical_loss": 3.3987068117968664, "tokens_seen": 2484076544 }, { "epoch": 0.51, "learning_rate": 0.00024969910936371663, "loss": 0.0672, "theoretical_loss": 3.398692609230473, "tokens_seen": 2484207616 }, { "epoch": 0.51, "learning_rate": 0.00024965899061221215, "loss": 0.0662, "theoretical_loss": 3.3986784076232235, "tokens_seen": 2484338688 }, { "epoch": 0.51, "learning_rate": 0.0002496188718607077, "loss": 0.0668, "theoretical_loss": 3.398664206975004, "tokens_seen": 2484469760 }, { "epoch": 0.51, "learning_rate": 0.00024957875310920323, "loss": 0.0637, "theoretical_loss": 3.3986500072856987, "tokens_seen": 2484600832 }, { "epoch": 0.51, "learning_rate": 0.0002495386343576988, "loss": 0.068, "theoretical_loss": 3.3986358085551918, "tokens_seen": 2484731904 }, { "epoch": 0.51, "learning_rate": 0.00024949851560619437, "loss": 0.0653, "theoretical_loss": 3.398621610783368, "tokens_seen": 2484862976 }, { "epoch": 0.51, "learning_rate": 0.0002494583968546899, "loss": 0.0667, "theoretical_loss": 3.398607413970113, "tokens_seen": 2484994048 }, { "epoch": 0.51, "learning_rate": 0.00024941827810318545, "loss": 0.066, "theoretical_loss": 3.3985932181153107, "tokens_seen": 2485125120 }, { "epoch": 0.51, "learning_rate": 0.00024937815935168096, "loss": 0.0694, "theoretical_loss": 3.3985790232188458, "tokens_seen": 2485256192 }, { "epoch": 0.51, "learning_rate": 0.00024933804060017653, "loss": 0.0653, "theoretical_loss": 3.398564829280603, "tokens_seen": 2485387264 }, { "epoch": 0.51, "objective/train/advantage_avg": 6.283269613049924e-05, "objective/train/docs_used": 904288, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3574708700180054, "objective/train/original_loss": 1.357470989227295, "objective/train/theoretical_loss": 3.3985506363004676, "objective/train/tokens_used": 856042976, "objective/train/value_avg": -0.0057830810546875, "objective/train/value_loss": 0.0001713089586701244, "objective/train/value_max": -3.975629806518555e-05, "objective/train/value_min": -0.292236328125, "objective/train/value_reward_corr": 0.627100483707199, "objective/train/value_std": 0.01044464111328125, "objective/train/weight_avg": 1.0001403093338013, "objective/train/weighted_lm_loss": 1.3579777479171753, "objective/train/weights_max": 1.268601417541504, "objective/train/weights_min": 0.3721363842487335, "theoretical_loss": 3.3985506363004676, "tokens_seen": 2485518336 }, { "epoch": 0.51, "learning_rate": 0.0002492979218486721, "loss": 0.0677, "theoretical_loss": 3.3985506363004676, "tokens_seen": 2485518336 }, { "epoch": 0.51, "learning_rate": 0.0002492578030971676, "loss": 0.0669, "theoretical_loss": 3.3985364442783244, "tokens_seen": 2485649408 }, { "epoch": 0.51, "learning_rate": 0.0002492176843456632, "loss": 0.0696, "theoretical_loss": 3.398522253214058, "tokens_seen": 2485780480 }, { "epoch": 0.51, "learning_rate": 0.0002491775655941587, "loss": 0.0682, "theoretical_loss": 3.398508063107553, "tokens_seen": 2485911552 }, { "epoch": 0.51, "learning_rate": 0.00024913744684265426, "loss": 0.0656, "theoretical_loss": 3.3984938739586945, "tokens_seen": 2486042624 }, { "epoch": 0.51, "learning_rate": 0.00024909732809114983, "loss": 0.0679, "theoretical_loss": 3.3984796857673674, "tokens_seen": 2486173696 }, { "epoch": 0.51, "learning_rate": 0.00024905720933964535, "loss": 0.0645, "theoretical_loss": 3.3984654985334566, "tokens_seen": 2486304768 }, { "epoch": 0.51, "learning_rate": 0.0002490170905881409, "loss": 0.0637, "theoretical_loss": 3.3984513122568467, "tokens_seen": 2486435840 }, { "epoch": 0.51, "learning_rate": 0.00024897697183663643, "loss": 0.0654, "theoretical_loss": 3.3984371269374236, "tokens_seen": 2486566912 }, { "epoch": 0.51, "learning_rate": 0.000248936853085132, "loss": 0.0703, "theoretical_loss": 3.3984229425750714, "tokens_seen": 2486697984 }, { "epoch": 0.51, "learning_rate": 0.00024889673433362757, "loss": 0.0664, "theoretical_loss": 3.398408759169675, "tokens_seen": 2486829056 }, { "epoch": 0.51, "learning_rate": 0.0002488566155821231, "loss": 0.0684, "theoretical_loss": 3.3983945767211203, "tokens_seen": 2486960128 }, { "epoch": 0.51, "learning_rate": 0.00024881649683061865, "loss": 0.0677, "theoretical_loss": 3.398380395229291, "tokens_seen": 2487091200 }, { "epoch": 0.51, "learning_rate": 0.00024877637807911416, "loss": 0.0703, "theoretical_loss": 3.3983662146940734, "tokens_seen": 2487222272 }, { "epoch": 0.51, "learning_rate": 0.00024873625932760973, "loss": 0.0696, "theoretical_loss": 3.398352035115352, "tokens_seen": 2487353344 }, { "epoch": 0.51, "learning_rate": 0.0002486961405761053, "loss": 0.0676, "theoretical_loss": 3.398337856493012, "tokens_seen": 2487484416 }, { "epoch": 0.51, "learning_rate": 0.0002486560218246008, "loss": 0.0688, "theoretical_loss": 3.398323678826938, "tokens_seen": 2487615488 }, { "epoch": 0.51, "learning_rate": 0.0002486159030730964, "loss": 0.065, "theoretical_loss": 3.3983095021170158, "tokens_seen": 2487746560 }, { "epoch": 0.51, "learning_rate": 0.0002485757843215919, "loss": 0.0668, "theoretical_loss": 3.3982953263631304, "tokens_seen": 2487877632 }, { "epoch": 0.51, "learning_rate": 0.00024853566557008747, "loss": 0.0652, "theoretical_loss": 3.398281151565167, "tokens_seen": 2488008704 }, { "epoch": 0.51, "learning_rate": 0.00024849554681858303, "loss": 0.0624, "theoretical_loss": 3.3982669777230106, "tokens_seen": 2488139776 }, { "epoch": 0.51, "learning_rate": 0.00024845542806707855, "loss": 0.0662, "theoretical_loss": 3.398252804836546, "tokens_seen": 2488270848 }, { "epoch": 0.51, "learning_rate": 0.0002484153093155741, "loss": 0.0687, "theoretical_loss": 3.3982386329056595, "tokens_seen": 2488401920 }, { "epoch": 0.51, "learning_rate": 0.00024837519056406963, "loss": 0.064, "theoretical_loss": 3.3982244619302358, "tokens_seen": 2488532992 }, { "epoch": 0.51, "learning_rate": 0.0002483350718125652, "loss": 0.0653, "theoretical_loss": 3.3982102919101598, "tokens_seen": 2488664064 }, { "epoch": 0.51, "objective/train/advantage_avg": 0.0007464580121450126, "objective/train/docs_used": 905609, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2528904676437378, "objective/train/original_loss": 1.2528905868530273, "objective/train/theoretical_loss": 3.398196122845317, "objective/train/tokens_used": 859319776, "objective/train/value_avg": -0.00678253173828125, "objective/train/value_loss": 0.0003358404792379588, "objective/train/value_max": -3.147125244140625e-05, "objective/train/value_min": -0.93896484375, "objective/train/value_reward_corr": 0.6901472386845386, "objective/train/value_std": 0.019744873046875, "objective/train/weight_avg": 1.0008972883224487, "objective/train/weighted_lm_loss": 1.2538511753082275, "objective/train/weights_max": 1.680822491645813, "objective/train/weights_min": 0.3690904378890991, "theoretical_loss": 3.398196122845317, "tokens_seen": 2488795136 }, { "epoch": 0.51, "learning_rate": 0.00024829495306106077, "loss": 0.0608, "theoretical_loss": 3.398196122845317, "tokens_seen": 2488795136 }, { "epoch": 0.51, "learning_rate": 0.0002482548343095563, "loss": 0.0659, "theoretical_loss": 3.398181954735593, "tokens_seen": 2488926208 }, { "epoch": 0.51, "learning_rate": 0.00024821471555805185, "loss": 0.0677, "theoretical_loss": 3.3981677875808725, "tokens_seen": 2489057280 }, { "epoch": 0.51, "learning_rate": 0.00024817459680654736, "loss": 0.0627, "theoretical_loss": 3.398153621381042, "tokens_seen": 2489188352 }, { "epoch": 0.51, "learning_rate": 0.00024813447805504293, "loss": 0.0678, "theoretical_loss": 3.3981394561359854, "tokens_seen": 2489319424 }, { "epoch": 0.51, "learning_rate": 0.0002480943593035385, "loss": 0.0636, "theoretical_loss": 3.398125291845589, "tokens_seen": 2489450496 }, { "epoch": 0.51, "learning_rate": 0.000248054240552034, "loss": 0.0665, "theoretical_loss": 3.398111128509738, "tokens_seen": 2489581568 }, { "epoch": 0.51, "learning_rate": 0.0002480141218005296, "loss": 0.0672, "theoretical_loss": 3.398096966128318, "tokens_seen": 2489712640 }, { "epoch": 0.51, "learning_rate": 0.0002479740030490251, "loss": 0.0674, "theoretical_loss": 3.3980828047012146, "tokens_seen": 2489843712 }, { "epoch": 0.51, "learning_rate": 0.00024793388429752067, "loss": 0.0691, "theoretical_loss": 3.398068644228312, "tokens_seen": 2489974784 }, { "epoch": 0.51, "learning_rate": 0.00024789376554601623, "loss": 0.0695, "theoretical_loss": 3.3980544847094976, "tokens_seen": 2490105856 }, { "epoch": 0.51, "learning_rate": 0.00024785364679451175, "loss": 0.0645, "theoretical_loss": 3.3980403261446552, "tokens_seen": 2490236928 }, { "epoch": 0.51, "learning_rate": 0.0002478135280430073, "loss": 0.0695, "theoretical_loss": 3.3980261685336712, "tokens_seen": 2490368000 }, { "epoch": 0.51, "learning_rate": 0.00024777340929150283, "loss": 0.0668, "theoretical_loss": 3.398012011876431, "tokens_seen": 2490499072 }, { "epoch": 0.51, "learning_rate": 0.0002477332905399984, "loss": 0.0653, "theoretical_loss": 3.39799785617282, "tokens_seen": 2490630144 }, { "epoch": 0.51, "learning_rate": 0.00024769317178849397, "loss": 0.0692, "theoretical_loss": 3.3979837014227243, "tokens_seen": 2490761216 }, { "epoch": 0.51, "learning_rate": 0.0002476530530369895, "loss": 0.0686, "theoretical_loss": 3.3979695476260288, "tokens_seen": 2490892288 }, { "epoch": 0.51, "learning_rate": 0.00024761293428548505, "loss": 0.0661, "theoretical_loss": 3.3979553947826195, "tokens_seen": 2491023360 }, { "epoch": 0.51, "learning_rate": 0.00024757281553398056, "loss": 0.0656, "theoretical_loss": 3.397941242892382, "tokens_seen": 2491154432 }, { "epoch": 0.51, "learning_rate": 0.00024753269678247613, "loss": 0.0639, "theoretical_loss": 3.397927091955202, "tokens_seen": 2491285504 }, { "epoch": 0.51, "learning_rate": 0.0002474925780309717, "loss": 0.0665, "theoretical_loss": 3.397912941970965, "tokens_seen": 2491416576 }, { "epoch": 0.51, "learning_rate": 0.0002474524592794672, "loss": 0.0688, "theoretical_loss": 3.3978987929395568, "tokens_seen": 2491547648 }, { "epoch": 0.51, "learning_rate": 0.0002474123405279628, "loss": 0.0677, "theoretical_loss": 3.3978846448608633, "tokens_seen": 2491678720 }, { "epoch": 0.51, "learning_rate": 0.0002473722217764583, "loss": 0.0637, "theoretical_loss": 3.39787049773477, "tokens_seen": 2491809792 }, { "epoch": 0.51, "learning_rate": 0.00024733210302495387, "loss": 0.0667, "theoretical_loss": 3.3978563515611624, "tokens_seen": 2491940864 }, { "epoch": 0.51, "objective/train/advantage_avg": -5.5015963880578056e-05, "objective/train/docs_used": 906987, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4864906072616577, "objective/train/original_loss": 1.4864904880523682, "objective/train/theoretical_loss": 3.3978422063399267, "objective/train/tokens_used": 862596576, "objective/train/value_avg": -0.00604248046875, "objective/train/value_loss": 0.00017924272106029093, "objective/train/value_max": -4.470348358154297e-05, "objective/train/value_min": -0.646484375, "objective/train/value_reward_corr": 0.6643545236163619, "objective/train/value_std": 0.01174163818359375, "objective/train/weight_avg": 1.0000249147415161, "objective/train/weighted_lm_loss": 1.4860825538635254, "objective/train/weights_max": 1.2107371091842651, "objective/train/weights_min": 0.3682950735092163, "theoretical_loss": 3.3978422063399267, "tokens_seen": 2492071936 }, { "epoch": 0.51, "learning_rate": 0.00024729198427344943, "loss": 0.0673, "theoretical_loss": 3.3978422063399267, "tokens_seen": 2492071936 }, { "epoch": 0.51, "learning_rate": 0.00024725186552194495, "loss": 0.0664, "theoretical_loss": 3.397828062070949, "tokens_seen": 2492203008 }, { "epoch": 0.51, "learning_rate": 0.0002472117467704405, "loss": 0.0655, "theoretical_loss": 3.3978139187541143, "tokens_seen": 2492334080 }, { "epoch": 0.51, "learning_rate": 0.00024717162801893603, "loss": 0.0646, "theoretical_loss": 3.3977997763893093, "tokens_seen": 2492465152 }, { "epoch": 0.51, "learning_rate": 0.0002471315092674316, "loss": 0.0665, "theoretical_loss": 3.397785634976419, "tokens_seen": 2492596224 }, { "epoch": 0.51, "learning_rate": 0.00024709139051592717, "loss": 0.0686, "theoretical_loss": 3.3977714945153297, "tokens_seen": 2492727296 }, { "epoch": 0.51, "learning_rate": 0.0002470512717644227, "loss": 0.0675, "theoretical_loss": 3.3977573550059272, "tokens_seen": 2492858368 }, { "epoch": 0.51, "learning_rate": 0.00024701115301291825, "loss": 0.0675, "theoretical_loss": 3.397743216448098, "tokens_seen": 2492989440 }, { "epoch": 0.51, "learning_rate": 0.00024697103426141377, "loss": 0.065, "theoretical_loss": 3.397729078841727, "tokens_seen": 2493120512 }, { "epoch": 0.51, "learning_rate": 0.00024693091550990933, "loss": 0.0619, "theoretical_loss": 3.3977149421867012, "tokens_seen": 2493251584 }, { "epoch": 0.51, "learning_rate": 0.0002468907967584049, "loss": 0.067, "theoretical_loss": 3.3977008064829057, "tokens_seen": 2493382656 }, { "epoch": 0.51, "learning_rate": 0.0002468506780069004, "loss": 0.0682, "theoretical_loss": 3.397686671730227, "tokens_seen": 2493513728 }, { "epoch": 0.51, "learning_rate": 0.000246810559255396, "loss": 0.0634, "theoretical_loss": 3.3976725379285506, "tokens_seen": 2493644800 }, { "epoch": 0.51, "learning_rate": 0.0002467704405038915, "loss": 0.0656, "theoretical_loss": 3.3976584050777636, "tokens_seen": 2493775872 }, { "epoch": 0.51, "learning_rate": 0.00024673032175238707, "loss": 0.0674, "theoretical_loss": 3.3976442731777508, "tokens_seen": 2493906944 }, { "epoch": 0.51, "learning_rate": 0.00024669020300088264, "loss": 0.0676, "theoretical_loss": 3.397630142228399, "tokens_seen": 2494038016 }, { "epoch": 0.51, "learning_rate": 0.00024665008424937815, "loss": 0.0683, "theoretical_loss": 3.3976160122295944, "tokens_seen": 2494169088 }, { "epoch": 0.51, "learning_rate": 0.0002466099654978737, "loss": 0.0686, "theoretical_loss": 3.3976018831812222, "tokens_seen": 2494300160 }, { "epoch": 0.51, "learning_rate": 0.00024656984674636923, "loss": 0.0683, "theoretical_loss": 3.39758775508317, "tokens_seen": 2494431232 }, { "epoch": 0.51, "learning_rate": 0.0002465297279948648, "loss": 0.0673, "theoretical_loss": 3.3975736279353224, "tokens_seen": 2494562304 }, { "epoch": 0.51, "learning_rate": 0.00024648960924336037, "loss": 0.0658, "theoretical_loss": 3.3975595017375664, "tokens_seen": 2494693376 }, { "epoch": 0.51, "learning_rate": 0.0002464494904918559, "loss": 0.0705, "theoretical_loss": 3.3975453764897883, "tokens_seen": 2494824448 }, { "epoch": 0.51, "learning_rate": 0.00024640937174035145, "loss": 0.0669, "theoretical_loss": 3.397531252191874, "tokens_seen": 2494955520 }, { "epoch": 0.51, "learning_rate": 0.00024636925298884697, "loss": 0.0678, "theoretical_loss": 3.3975171288437096, "tokens_seen": 2495086592 }, { "epoch": 0.51, "learning_rate": 0.00024632913423734253, "loss": 0.0644, "theoretical_loss": 3.397503006445182, "tokens_seen": 2495217664 }, { "epoch": 0.51, "objective/train/advantage_avg": 0.0007024522637948394, "objective/train/docs_used": 908214, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3406531810760498, "objective/train/original_loss": 1.3406531810760498, "objective/train/theoretical_loss": 3.397488884996177, "objective/train/tokens_used": 865873376, "objective/train/value_avg": -0.009979248046875, "objective/train/value_loss": 0.00014636684500146657, "objective/train/value_max": -6.252527236938477e-05, "objective/train/value_min": -0.1905517578125, "objective/train/value_reward_corr": 0.7605853923004736, "objective/train/value_std": 0.0139617919921875, "objective/train/weight_avg": 1.000769853591919, "objective/train/weighted_lm_loss": 1.3412847518920898, "objective/train/weights_max": 1.1049416065216064, "objective/train/weights_min": 0.4303572475910187, "theoretical_loss": 3.397488884996177, "tokens_seen": 2495348736 }, { "epoch": 0.51, "learning_rate": 0.0002462890154858381, "loss": 0.0636, "theoretical_loss": 3.397488884996177, "tokens_seen": 2495348736 }, { "epoch": 0.51, "learning_rate": 0.0002462488967343336, "loss": 0.065, "theoretical_loss": 3.3974747644965806, "tokens_seen": 2495479808 }, { "epoch": 0.51, "learning_rate": 0.0002462087779828292, "loss": 0.0626, "theoretical_loss": 3.3974606449462796, "tokens_seen": 2495610880 }, { "epoch": 0.51, "learning_rate": 0.00024616865923132475, "loss": 0.0636, "theoretical_loss": 3.39744652634516, "tokens_seen": 2495741952 }, { "epoch": 0.51, "learning_rate": 0.00024612854047982027, "loss": 0.0632, "theoretical_loss": 3.3974324086931085, "tokens_seen": 2495873024 }, { "epoch": 0.51, "learning_rate": 0.00024608842172831584, "loss": 0.0638, "theoretical_loss": 3.397418291990011, "tokens_seen": 2496004096 }, { "epoch": 0.51, "learning_rate": 0.00024604830297681135, "loss": 0.0725, "theoretical_loss": 3.3974041762357547, "tokens_seen": 2496135168 }, { "epoch": 0.51, "learning_rate": 0.0002460081842253069, "loss": 0.0634, "theoretical_loss": 3.397390061430225, "tokens_seen": 2496266240 }, { "epoch": 0.51, "learning_rate": 0.0002459680654738025, "loss": 0.0648, "theoretical_loss": 3.397375947573309, "tokens_seen": 2496397312 }, { "epoch": 0.51, "learning_rate": 0.000245927946722298, "loss": 0.0665, "theoretical_loss": 3.397361834664893, "tokens_seen": 2496528384 }, { "epoch": 0.51, "learning_rate": 0.00024588782797079357, "loss": 0.0667, "theoretical_loss": 3.397347722704863, "tokens_seen": 2496659456 }, { "epoch": 0.51, "learning_rate": 0.0002458477092192891, "loss": 0.0682, "theoretical_loss": 3.397333611693106, "tokens_seen": 2496790528 }, { "epoch": 0.51, "learning_rate": 0.00024580759046778465, "loss": 0.0662, "theoretical_loss": 3.397319501629509, "tokens_seen": 2496921600 }, { "epoch": 0.51, "learning_rate": 0.0002457674717162802, "loss": 0.0681, "theoretical_loss": 3.397305392513957, "tokens_seen": 2497052672 }, { "epoch": 0.51, "learning_rate": 0.00024572735296477573, "loss": 0.0644, "theoretical_loss": 3.397291284346338, "tokens_seen": 2497183744 }, { "epoch": 0.51, "learning_rate": 0.0002456872342132713, "loss": 0.0694, "theoretical_loss": 3.397277177126538, "tokens_seen": 2497314816 }, { "epoch": 0.51, "learning_rate": 0.0002456471154617668, "loss": 0.0678, "theoretical_loss": 3.397263070854444, "tokens_seen": 2497445888 }, { "epoch": 0.51, "learning_rate": 0.0002456069967102624, "loss": 0.0652, "theoretical_loss": 3.3972489655299416, "tokens_seen": 2497576960 }, { "epoch": 0.51, "learning_rate": 0.00024556687795875795, "loss": 0.0663, "theoretical_loss": 3.397234861152918, "tokens_seen": 2497708032 }, { "epoch": 0.51, "learning_rate": 0.00024552675920725347, "loss": 0.0672, "theoretical_loss": 3.39722075772326, "tokens_seen": 2497839104 }, { "epoch": 0.51, "learning_rate": 0.00024548664045574904, "loss": 0.0663, "theoretical_loss": 3.397206655240854, "tokens_seen": 2497970176 }, { "epoch": 0.51, "learning_rate": 0.00024544652170424455, "loss": 0.0661, "theoretical_loss": 3.397192553705587, "tokens_seen": 2498101248 }, { "epoch": 0.51, "learning_rate": 0.0002454064029527401, "loss": 0.0655, "theoretical_loss": 3.3971784531173457, "tokens_seen": 2498232320 }, { "epoch": 0.51, "learning_rate": 0.0002453662842012357, "loss": 0.0687, "theoretical_loss": 3.3971643534760165, "tokens_seen": 2498363392 }, { "epoch": 0.51, "learning_rate": 0.0002453261654497312, "loss": 0.0652, "theoretical_loss": 3.397150254781486, "tokens_seen": 2498494464 }, { "epoch": 0.51, "objective/train/advantage_avg": 0.0002061380073428154, "objective/train/docs_used": 909311, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4420231580734253, "objective/train/original_loss": 1.4420230388641357, "objective/train/theoretical_loss": 3.3971361570336414, "objective/train/tokens_used": 869150176, "objective/train/value_avg": -0.00740814208984375, "objective/train/value_loss": 0.00014977964747231454, "objective/train/value_max": -3.594160079956055e-05, "objective/train/value_min": -0.2264404296875, "objective/train/value_reward_corr": 0.7531353748543264, "objective/train/value_std": 0.0156097412109375, "objective/train/weight_avg": 1.0002764463424683, "objective/train/weighted_lm_loss": 1.4417861700057983, "objective/train/weights_max": 1.126390814781189, "objective/train/weights_min": 0.3721363842487335, "theoretical_loss": 3.3971361570336414, "tokens_seen": 2498625536 }, { "epoch": 0.51, "learning_rate": 0.00024528604669822677, "loss": 0.0645, "theoretical_loss": 3.3971361570336414, "tokens_seen": 2498625536 }, { "epoch": 0.51, "learning_rate": 0.0002452459279467223, "loss": 0.0663, "theoretical_loss": 3.3971220602323693, "tokens_seen": 2498756608 }, { "epoch": 0.51, "learning_rate": 0.00024520580919521785, "loss": 0.0643, "theoretical_loss": 3.3971079643775566, "tokens_seen": 2498887680 }, { "epoch": 0.51, "learning_rate": 0.0002451656904437134, "loss": 0.0652, "theoretical_loss": 3.3970938694690895, "tokens_seen": 2499018752 }, { "epoch": 0.51, "learning_rate": 0.00024512557169220894, "loss": 0.0675, "theoretical_loss": 3.397079775506856, "tokens_seen": 2499149824 }, { "epoch": 0.51, "learning_rate": 0.0002450854529407045, "loss": 0.0653, "theoretical_loss": 3.3970656824907417, "tokens_seen": 2499280896 }, { "epoch": 0.51, "learning_rate": 0.0002450453341892, "loss": 0.0678, "theoretical_loss": 3.3970515904206344, "tokens_seen": 2499411968 }, { "epoch": 0.51, "learning_rate": 0.0002450052154376956, "loss": 0.0634, "theoretical_loss": 3.3970374992964207, "tokens_seen": 2499543040 }, { "epoch": 0.51, "learning_rate": 0.00024496509668619115, "loss": 0.0628, "theoretical_loss": 3.3970234091179874, "tokens_seen": 2499674112 }, { "epoch": 0.52, "learning_rate": 0.00024492497793468667, "loss": 0.0679, "theoretical_loss": 3.3970093198852216, "tokens_seen": 2499805184 }, { "epoch": 0.52, "learning_rate": 0.00024488485918318224, "loss": 0.0645, "theoretical_loss": 3.39699523159801, "tokens_seen": 2499936256 }, { "epoch": 0.52, "learning_rate": 0.00024484474043167775, "loss": 0.0684, "theoretical_loss": 3.3969811442562396, "tokens_seen": 2500067328 }, { "epoch": 0.52, "learning_rate": 0.0002448046216801733, "loss": 0.0674, "theoretical_loss": 3.396967057859798, "tokens_seen": 2500198400 }, { "epoch": 0.52, "learning_rate": 0.0002447645029286689, "loss": 0.0655, "theoretical_loss": 3.396952972408571, "tokens_seen": 2500329472 }, { "epoch": 0.52, "learning_rate": 0.0002447243841771644, "loss": 0.0682, "theoretical_loss": 3.396938887902447, "tokens_seen": 2500460544 }, { "epoch": 0.52, "learning_rate": 0.00024468426542565997, "loss": 0.0635, "theoretical_loss": 3.3969248043413125, "tokens_seen": 2500591616 }, { "epoch": 0.52, "learning_rate": 0.0002446441466741555, "loss": 0.071, "theoretical_loss": 3.396910721725054, "tokens_seen": 2500722688 }, { "epoch": 0.52, "learning_rate": 0.00024460402792265105, "loss": 0.0597, "theoretical_loss": 3.3968966400535594, "tokens_seen": 2500853760 }, { "epoch": 0.52, "learning_rate": 0.0002445639091711466, "loss": 0.0668, "theoretical_loss": 3.3968825593267153, "tokens_seen": 2500984832 }, { "epoch": 0.52, "learning_rate": 0.00024452379041964214, "loss": 0.0646, "theoretical_loss": 3.396868479544409, "tokens_seen": 2501115904 }, { "epoch": 0.52, "learning_rate": 0.0002444836716681377, "loss": 0.0652, "theoretical_loss": 3.3968544007065278, "tokens_seen": 2501246976 }, { "epoch": 0.52, "learning_rate": 0.0002444435529166332, "loss": 0.0662, "theoretical_loss": 3.3968403228129587, "tokens_seen": 2501378048 }, { "epoch": 0.52, "learning_rate": 0.0002444034341651288, "loss": 0.0639, "theoretical_loss": 3.396826245863589, "tokens_seen": 2501509120 }, { "epoch": 0.52, "learning_rate": 0.00024436331541362436, "loss": 0.0676, "theoretical_loss": 3.396812169858306, "tokens_seen": 2501640192 }, { "epoch": 0.52, "learning_rate": 0.00024432319666211987, "loss": 0.0663, "theoretical_loss": 3.396798094796996, "tokens_seen": 2501771264 }, { "epoch": 0.52, "objective/train/advantage_avg": -0.0003976252628490329, "objective/train/docs_used": 910572, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.0517197847366333, "objective/train/original_loss": 1.0517196655273438, "objective/train/theoretical_loss": 3.396784020679547, "objective/train/tokens_used": 872426976, "objective/train/value_avg": -0.005474090576171875, "objective/train/value_loss": 0.00010607629519654438, "objective/train/value_max": -2.8848648071289062e-05, "objective/train/value_min": -0.2666015625, "objective/train/value_reward_corr": 0.6565807194212963, "objective/train/value_std": 0.00859832763671875, "objective/train/weight_avg": 0.9996508359909058, "objective/train/weighted_lm_loss": 1.0510649681091309, "objective/train/weights_max": 1.0898046493530273, "objective/train/weights_min": 0.36988183856010437, "theoretical_loss": 3.396784020679547, "tokens_seen": 2501902336 }, { "epoch": 0.52, "learning_rate": 0.00024428307791061544, "loss": 0.0608, "theoretical_loss": 3.396784020679547, "tokens_seen": 2501902336 }, { "epoch": 0.52, "learning_rate": 0.00024424295915911095, "loss": 0.0638, "theoretical_loss": 3.396769947505847, "tokens_seen": 2502033408 }, { "epoch": 0.52, "learning_rate": 0.0002442028404076065, "loss": 0.0641, "theoretical_loss": 3.3967558752757823, "tokens_seen": 2502164480 }, { "epoch": 0.52, "learning_rate": 0.0002441627216561021, "loss": 0.069, "theoretical_loss": 3.39674180398924, "tokens_seen": 2502295552 }, { "epoch": 0.52, "learning_rate": 0.00024412260290459763, "loss": 0.0654, "theoretical_loss": 3.396727733646108, "tokens_seen": 2502426624 }, { "epoch": 0.52, "learning_rate": 0.00024408248415309317, "loss": 0.0625, "theoretical_loss": 3.3967136642462736, "tokens_seen": 2502557696 }, { "epoch": 0.52, "learning_rate": 0.00024404236540158869, "loss": 0.0703, "theoretical_loss": 3.396699595789624, "tokens_seen": 2502688768 }, { "epoch": 0.52, "learning_rate": 0.00024400224665008425, "loss": 0.0695, "theoretical_loss": 3.3966855282760466, "tokens_seen": 2502819840 }, { "epoch": 0.52, "learning_rate": 0.0002439621278985798, "loss": 0.0671, "theoretical_loss": 3.396671461705429, "tokens_seen": 2502950912 }, { "epoch": 0.52, "learning_rate": 0.00024392200914707536, "loss": 0.0654, "theoretical_loss": 3.3966573960776585, "tokens_seen": 2503081984 }, { "epoch": 0.52, "learning_rate": 0.0002438818903955709, "loss": 0.0649, "theoretical_loss": 3.396643331392622, "tokens_seen": 2503213056 }, { "epoch": 0.52, "learning_rate": 0.00024384177164406642, "loss": 0.0632, "theoretical_loss": 3.3966292676502077, "tokens_seen": 2503344128 }, { "epoch": 0.52, "learning_rate": 0.000243801652892562, "loss": 0.0622, "theoretical_loss": 3.3966152048503027, "tokens_seen": 2503475200 }, { "epoch": 0.52, "learning_rate": 0.00024376153414105753, "loss": 0.0685, "theoretical_loss": 3.396601142992795, "tokens_seen": 2503606272 }, { "epoch": 0.52, "learning_rate": 0.0002437214153895531, "loss": 0.0649, "theoretical_loss": 3.396587082077571, "tokens_seen": 2503737344 }, { "epoch": 0.52, "learning_rate": 0.00024368129663804864, "loss": 0.0639, "theoretical_loss": 3.3965730221045196, "tokens_seen": 2503868416 }, { "epoch": 0.52, "learning_rate": 0.00024364117788654415, "loss": 0.0659, "theoretical_loss": 3.3965589630735273, "tokens_seen": 2503999488 }, { "epoch": 0.52, "learning_rate": 0.00024360105913503972, "loss": 0.0645, "theoretical_loss": 3.3965449049844825, "tokens_seen": 2504130560 }, { "epoch": 0.52, "learning_rate": 0.00024356094038353526, "loss": 0.0657, "theoretical_loss": 3.3965308478372718, "tokens_seen": 2504261632 }, { "epoch": 0.52, "learning_rate": 0.00024352082163203083, "loss": 0.0648, "theoretical_loss": 3.3965167916317833, "tokens_seen": 2504392704 }, { "epoch": 0.52, "learning_rate": 0.00024348070288052637, "loss": 0.0635, "theoretical_loss": 3.3965027363679052, "tokens_seen": 2504523776 }, { "epoch": 0.52, "learning_rate": 0.00024344058412902189, "loss": 0.0687, "theoretical_loss": 3.3964886820455247, "tokens_seen": 2504654848 }, { "epoch": 0.52, "learning_rate": 0.00024340046537751745, "loss": 0.0671, "theoretical_loss": 3.3964746286645293, "tokens_seen": 2504785920 }, { "epoch": 0.52, "learning_rate": 0.000243360346626013, "loss": 0.0673, "theoretical_loss": 3.396460576224807, "tokens_seen": 2504916992 }, { "epoch": 0.52, "learning_rate": 0.00024332022787450856, "loss": 0.0659, "theoretical_loss": 3.3964465247262448, "tokens_seen": 2505048064 }, { "epoch": 0.52, "objective/train/advantage_avg": -0.0003388494369573891, "objective/train/docs_used": 911801, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2291241884231567, "objective/train/original_loss": 1.2291240692138672, "objective/train/theoretical_loss": 3.396432474168731, "objective/train/tokens_used": 875703776, "objective/train/value_avg": -0.005954742431640625, "objective/train/value_loss": 9.195626626024023e-05, "objective/train/value_max": -3.147125244140625e-05, "objective/train/value_min": -0.2197265625, "objective/train/value_reward_corr": 0.741124404344261, "objective/train/value_std": 0.0103607177734375, "objective/train/weight_avg": 0.9997062683105469, "objective/train/weighted_lm_loss": 1.2288764715194702, "objective/train/weights_max": 1.1281108856201172, "objective/train/weights_min": 0.6478195786476135, "theoretical_loss": 3.396432474168731, "tokens_seen": 2505179136 }, { "epoch": 0.52, "learning_rate": 0.0002432801091230041, "loss": 0.0649, "theoretical_loss": 3.396432474168731, "tokens_seen": 2505179136 }, { "epoch": 0.52, "learning_rate": 0.00024323999037149962, "loss": 0.066, "theoretical_loss": 3.3964184245521536, "tokens_seen": 2505310208 }, { "epoch": 0.52, "learning_rate": 0.0002431998716199952, "loss": 0.0649, "theoretical_loss": 3.3964043758763998, "tokens_seen": 2505441280 }, { "epoch": 0.52, "learning_rate": 0.00024315975286849073, "loss": 0.0619, "theoretical_loss": 3.396390328141358, "tokens_seen": 2505572352 }, { "epoch": 0.52, "learning_rate": 0.0002431196341169863, "loss": 0.0642, "theoretical_loss": 3.396376281346915, "tokens_seen": 2505703424 }, { "epoch": 0.52, "learning_rate": 0.00024307951536548184, "loss": 0.0705, "theoretical_loss": 3.3963622354929597, "tokens_seen": 2505834496 }, { "epoch": 0.52, "learning_rate": 0.00024303939661397735, "loss": 0.0649, "theoretical_loss": 3.3963481905793795, "tokens_seen": 2505965568 }, { "epoch": 0.52, "learning_rate": 0.00024299927786247292, "loss": 0.0695, "theoretical_loss": 3.396334146606062, "tokens_seen": 2506096640 }, { "epoch": 0.52, "learning_rate": 0.00024295915911096846, "loss": 0.064, "theoretical_loss": 3.396320103572896, "tokens_seen": 2506227712 }, { "epoch": 0.52, "learning_rate": 0.00024291904035946403, "loss": 0.0666, "theoretical_loss": 3.396306061479768, "tokens_seen": 2506358784 }, { "epoch": 0.52, "learning_rate": 0.00024287892160795957, "loss": 0.0646, "theoretical_loss": 3.396292020326567, "tokens_seen": 2506489856 }, { "epoch": 0.52, "learning_rate": 0.00024283880285645509, "loss": 0.0688, "theoretical_loss": 3.3962779801131804, "tokens_seen": 2506620928 }, { "epoch": 0.52, "learning_rate": 0.00024279868410495065, "loss": 0.0633, "theoretical_loss": 3.396263940839497, "tokens_seen": 2506752000 }, { "epoch": 0.52, "learning_rate": 0.0002427585653534462, "loss": 0.0628, "theoretical_loss": 3.3962499025054034, "tokens_seen": 2506883072 }, { "epoch": 0.52, "learning_rate": 0.00024271844660194176, "loss": 0.0651, "theoretical_loss": 3.3962358651107887, "tokens_seen": 2507014144 }, { "epoch": 0.52, "learning_rate": 0.0002426783278504373, "loss": 0.065, "theoretical_loss": 3.3962218286555403, "tokens_seen": 2507145216 }, { "epoch": 0.52, "learning_rate": 0.00024263820909893282, "loss": 0.0652, "theoretical_loss": 3.3962077931395465, "tokens_seen": 2507276288 }, { "epoch": 0.52, "learning_rate": 0.0002425980903474284, "loss": 0.0621, "theoretical_loss": 3.3961937585626956, "tokens_seen": 2507407360 }, { "epoch": 0.52, "learning_rate": 0.00024255797159592393, "loss": 0.0636, "theoretical_loss": 3.3961797249248753, "tokens_seen": 2507538432 }, { "epoch": 0.52, "learning_rate": 0.0002425178528444195, "loss": 0.0672, "theoretical_loss": 3.3961656922259738, "tokens_seen": 2507669504 }, { "epoch": 0.52, "learning_rate": 0.00024247773409291504, "loss": 0.0667, "theoretical_loss": 3.396151660465879, "tokens_seen": 2507800576 }, { "epoch": 0.52, "learning_rate": 0.00024243761534141055, "loss": 0.0633, "theoretical_loss": 3.3961376296444796, "tokens_seen": 2507931648 }, { "epoch": 0.52, "learning_rate": 0.00024239749658990612, "loss": 0.066, "theoretical_loss": 3.3961235997616632, "tokens_seen": 2508062720 }, { "epoch": 0.52, "learning_rate": 0.00024235737783840166, "loss": 0.0675, "theoretical_loss": 3.396109570817318, "tokens_seen": 2508193792 }, { "epoch": 0.52, "learning_rate": 0.00024231725908689723, "loss": 0.0641, "theoretical_loss": 3.3960955428113326, "tokens_seen": 2508324864 }, { "epoch": 0.52, "objective/train/advantage_avg": 0.000505926669575274, "objective/train/docs_used": 912960, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.396094799041748, "objective/train/original_loss": 1.3960949182510376, "objective/train/theoretical_loss": 3.3960815157435946, "objective/train/tokens_used": 878980576, "objective/train/value_avg": -0.0038356781005859375, "objective/train/value_loss": 0.00014205790648702532, "objective/train/value_max": -3.3736228942871094e-05, "objective/train/value_min": -0.37548828125, "objective/train/value_reward_corr": 0.5848885595589, "objective/train/value_std": 0.007598876953125, "objective/train/weight_avg": 1.0005682706832886, "objective/train/weighted_lm_loss": 1.3967970609664917, "objective/train/weights_max": 1.1742674112319946, "objective/train/weights_min": 0.38283777236938477, "theoretical_loss": 3.3960815157435946, "tokens_seen": 2508455936 }, { "epoch": 0.52, "learning_rate": 0.00024227714033539277, "loss": 0.0665, "theoretical_loss": 3.3960815157435946, "tokens_seen": 2508455936 }, { "epoch": 0.52, "learning_rate": 0.00024223702158388831, "loss": 0.0645, "theoretical_loss": 3.396067489613993, "tokens_seen": 2508587008 }, { "epoch": 0.52, "learning_rate": 0.00024219690283238386, "loss": 0.0656, "theoretical_loss": 3.3960534644224154, "tokens_seen": 2508718080 }, { "epoch": 0.52, "learning_rate": 0.0002421567840808794, "loss": 0.0662, "theoretical_loss": 3.3960394401687504, "tokens_seen": 2508849152 }, { "epoch": 0.52, "learning_rate": 0.00024211666532937497, "loss": 0.0647, "theoretical_loss": 3.3960254168528863, "tokens_seen": 2508980224 }, { "epoch": 0.52, "learning_rate": 0.0002420765465778705, "loss": 0.0664, "theoretical_loss": 3.396011394474711, "tokens_seen": 2509111296 }, { "epoch": 0.52, "learning_rate": 0.00024203642782636607, "loss": 0.0693, "theoretical_loss": 3.395997373034113, "tokens_seen": 2509242368 }, { "epoch": 0.52, "learning_rate": 0.0002419963090748616, "loss": 0.0659, "theoretical_loss": 3.3959833525309806, "tokens_seen": 2509373440 }, { "epoch": 0.52, "learning_rate": 0.00024195619032335713, "loss": 0.0671, "theoretical_loss": 3.395969332965203, "tokens_seen": 2509504512 }, { "epoch": 0.52, "learning_rate": 0.0002419160715718527, "loss": 0.063, "theoretical_loss": 3.395955314336667, "tokens_seen": 2509635584 }, { "epoch": 0.52, "learning_rate": 0.00024187595282034824, "loss": 0.0703, "theoretical_loss": 3.3959412966452627, "tokens_seen": 2509766656 }, { "epoch": 0.52, "learning_rate": 0.0002418358340688438, "loss": 0.0656, "theoretical_loss": 3.395927279890877, "tokens_seen": 2509897728 }, { "epoch": 0.52, "learning_rate": 0.00024179571531733932, "loss": 0.0697, "theoretical_loss": 3.3959132640733998, "tokens_seen": 2510028800 }, { "epoch": 0.52, "learning_rate": 0.00024175559656583486, "loss": 0.0678, "theoretical_loss": 3.395899249192718, "tokens_seen": 2510159872 }, { "epoch": 0.52, "learning_rate": 0.00024171547781433043, "loss": 0.0671, "theoretical_loss": 3.395885235248721, "tokens_seen": 2510290944 }, { "epoch": 0.52, "learning_rate": 0.00024167535906282597, "loss": 0.0668, "theoretical_loss": 3.3958712222412974, "tokens_seen": 2510422016 }, { "epoch": 0.52, "learning_rate": 0.00024163524031132154, "loss": 0.0646, "theoretical_loss": 3.3958572101703353, "tokens_seen": 2510553088 }, { "epoch": 0.52, "learning_rate": 0.00024159512155981706, "loss": 0.0626, "theoretical_loss": 3.3958431990357236, "tokens_seen": 2510684160 }, { "epoch": 0.52, "learning_rate": 0.0002415550028083126, "loss": 0.0677, "theoretical_loss": 3.3958291888373506, "tokens_seen": 2510815232 }, { "epoch": 0.52, "learning_rate": 0.00024151488405680817, "loss": 0.0657, "theoretical_loss": 3.395815179575105, "tokens_seen": 2510946304 }, { "epoch": 0.52, "learning_rate": 0.0002414747653053037, "loss": 0.0608, "theoretical_loss": 3.3958011712488747, "tokens_seen": 2511077376 }, { "epoch": 0.52, "learning_rate": 0.00024143464655379928, "loss": 0.065, "theoretical_loss": 3.3957871638585493, "tokens_seen": 2511208448 }, { "epoch": 0.52, "learning_rate": 0.0002413945278022948, "loss": 0.0665, "theoretical_loss": 3.395773157404017, "tokens_seen": 2511339520 }, { "epoch": 0.52, "learning_rate": 0.00024135440905079033, "loss": 0.0679, "theoretical_loss": 3.3957591518851666, "tokens_seen": 2511470592 }, { "epoch": 0.52, "learning_rate": 0.0002413142902992859, "loss": 0.0623, "theoretical_loss": 3.395745147301887, "tokens_seen": 2511601664 }, { "epoch": 0.52, "objective/train/advantage_avg": 0.0007067410624586046, "objective/train/docs_used": 914176, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3131245374679565, "objective/train/original_loss": 1.313124656677246, "objective/train/theoretical_loss": 3.395731143654066, "objective/train/tokens_used": 882257376, "objective/train/value_avg": -0.00690460205078125, "objective/train/value_loss": 0.00015159182657953352, "objective/train/value_max": -4.869699478149414e-05, "objective/train/value_min": -0.30859375, "objective/train/value_reward_corr": 0.7145152471644527, "objective/train/value_std": 0.014312744140625, "objective/train/weight_avg": 1.0007787942886353, "objective/train/weighted_lm_loss": 1.314253330230713, "objective/train/weights_max": 1.1927199363708496, "objective/train/weights_min": 0.3808683454990387, "theoretical_loss": 3.395731143654066, "tokens_seen": 2511732736 }, { "epoch": 0.52, "learning_rate": 0.00024127417154778144, "loss": 0.0646, "theoretical_loss": 3.395731143654066, "tokens_seen": 2511732736 }, { "epoch": 0.52, "learning_rate": 0.000241234052796277, "loss": 0.0645, "theoretical_loss": 3.395717140941593, "tokens_seen": 2511863808 }, { "epoch": 0.52, "learning_rate": 0.00024119393404477252, "loss": 0.0656, "theoretical_loss": 3.3957031391643566, "tokens_seen": 2511994880 }, { "epoch": 0.52, "learning_rate": 0.00024115381529326806, "loss": 0.0676, "theoretical_loss": 3.395689138322245, "tokens_seen": 2512125952 }, { "epoch": 0.52, "learning_rate": 0.00024111369654176363, "loss": 0.0656, "theoretical_loss": 3.395675138415148, "tokens_seen": 2512257024 }, { "epoch": 0.52, "learning_rate": 0.00024107357779025917, "loss": 0.0651, "theoretical_loss": 3.3956611394429537, "tokens_seen": 2512388096 }, { "epoch": 0.52, "learning_rate": 0.00024103345903875474, "loss": 0.0658, "theoretical_loss": 3.395647141405551, "tokens_seen": 2512519168 }, { "epoch": 0.52, "learning_rate": 0.00024099334028725026, "loss": 0.0631, "theoretical_loss": 3.395633144302829, "tokens_seen": 2512650240 }, { "epoch": 0.52, "learning_rate": 0.0002409532215357458, "loss": 0.0628, "theoretical_loss": 3.395619148134676, "tokens_seen": 2512781312 }, { "epoch": 0.52, "learning_rate": 0.00024091310278424137, "loss": 0.0656, "theoretical_loss": 3.3956051529009814, "tokens_seen": 2512912384 }, { "epoch": 0.52, "learning_rate": 0.0002408729840327369, "loss": 0.0691, "theoretical_loss": 3.3955911586016336, "tokens_seen": 2513043456 }, { "epoch": 0.52, "learning_rate": 0.00024083286528123248, "loss": 0.0655, "theoretical_loss": 3.3955771652365216, "tokens_seen": 2513174528 }, { "epoch": 0.52, "learning_rate": 0.000240792746529728, "loss": 0.0649, "theoretical_loss": 3.395563172805535, "tokens_seen": 2513305600 }, { "epoch": 0.52, "learning_rate": 0.00024075262777822353, "loss": 0.0661, "theoretical_loss": 3.395549181308562, "tokens_seen": 2513436672 }, { "epoch": 0.52, "learning_rate": 0.0002407125090267191, "loss": 0.0688, "theoretical_loss": 3.395535190745491, "tokens_seen": 2513567744 }, { "epoch": 0.52, "learning_rate": 0.00024067239027521464, "loss": 0.0646, "theoretical_loss": 3.3955212011162126, "tokens_seen": 2513698816 }, { "epoch": 0.52, "learning_rate": 0.0002406322715237102, "loss": 0.0669, "theoretical_loss": 3.3955072124206143, "tokens_seen": 2513829888 }, { "epoch": 0.52, "learning_rate": 0.00024059215277220572, "loss": 0.0651, "theoretical_loss": 3.395493224658586, "tokens_seen": 2513960960 }, { "epoch": 0.52, "learning_rate": 0.00024055203402070126, "loss": 0.065, "theoretical_loss": 3.395479237830016, "tokens_seen": 2514092032 }, { "epoch": 0.52, "learning_rate": 0.00024051191526919683, "loss": 0.0677, "theoretical_loss": 3.395465251934794, "tokens_seen": 2514223104 }, { "epoch": 0.52, "learning_rate": 0.00024047179651769237, "loss": 0.0651, "theoretical_loss": 3.395451266972809, "tokens_seen": 2514354176 }, { "epoch": 0.52, "learning_rate": 0.00024043167776618794, "loss": 0.0611, "theoretical_loss": 3.39543728294395, "tokens_seen": 2514485248 }, { "epoch": 0.52, "learning_rate": 0.00024039155901468346, "loss": 0.0662, "theoretical_loss": 3.3954232998481055, "tokens_seen": 2514616320 }, { "epoch": 0.52, "learning_rate": 0.000240351440263179, "loss": 0.0625, "theoretical_loss": 3.395409317685165, "tokens_seen": 2514747392 }, { "epoch": 0.52, "learning_rate": 0.00024031132151167457, "loss": 0.0667, "theoretical_loss": 3.3953953364550187, "tokens_seen": 2514878464 }, { "epoch": 0.52, "objective/train/advantage_avg": 0.00015045219333842397, "objective/train/docs_used": 915361, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3109140396118164, "objective/train/original_loss": 1.3109138011932373, "objective/train/theoretical_loss": 3.395381356157554, "objective/train/tokens_used": 885534176, "objective/train/value_avg": -0.008270263671875, "objective/train/value_loss": 0.0005536848329938948, "objective/train/value_max": -3.170967102050781e-05, "objective/train/value_min": -0.95654296875, "objective/train/value_reward_corr": 0.6227273902702556, "objective/train/value_std": 0.019683837890625, "objective/train/weight_avg": 1.0003957748413086, "objective/train/weighted_lm_loss": 1.3120744228363037, "objective/train/weights_max": 2.220717430114746, "objective/train/weights_min": 0.37227270007133484, "theoretical_loss": 3.395381356157554, "tokens_seen": 2515009536 }, { "epoch": 0.52, "learning_rate": 0.0002402712027601701, "loss": 0.0665, "theoretical_loss": 3.395381356157554, "tokens_seen": 2515009536 }, { "epoch": 0.52, "learning_rate": 0.00024023108400866568, "loss": 0.0658, "theoretical_loss": 3.3953673767926613, "tokens_seen": 2515140608 }, { "epoch": 0.52, "learning_rate": 0.0002401909652571612, "loss": 0.0639, "theoretical_loss": 3.3953533983602293, "tokens_seen": 2515271680 }, { "epoch": 0.52, "learning_rate": 0.00024015084650565673, "loss": 0.0658, "theoretical_loss": 3.395339420860147, "tokens_seen": 2515402752 }, { "epoch": 0.52, "learning_rate": 0.0002401107277541523, "loss": 0.065, "theoretical_loss": 3.3953254442923044, "tokens_seen": 2515533824 }, { "epoch": 0.52, "learning_rate": 0.00024007060900264784, "loss": 0.0683, "theoretical_loss": 3.39531146865659, "tokens_seen": 2515664896 }, { "epoch": 0.52, "learning_rate": 0.0002400304902511434, "loss": 0.0651, "theoretical_loss": 3.3952974939528935, "tokens_seen": 2515795968 }, { "epoch": 0.52, "learning_rate": 0.00023999037149963892, "loss": 0.0652, "theoretical_loss": 3.395283520181104, "tokens_seen": 2515927040 }, { "epoch": 0.52, "learning_rate": 0.00023995025274813447, "loss": 0.0656, "theoretical_loss": 3.395269547341111, "tokens_seen": 2516058112 }, { "epoch": 0.52, "learning_rate": 0.00023991013399663003, "loss": 0.0631, "theoretical_loss": 3.3952555754328038, "tokens_seen": 2516189184 }, { "epoch": 0.53, "learning_rate": 0.00023987001524512558, "loss": 0.0639, "theoretical_loss": 3.3952416044560714, "tokens_seen": 2516320256 }, { "epoch": 0.53, "learning_rate": 0.00023982989649362114, "loss": 0.0654, "theoretical_loss": 3.3952276344108037, "tokens_seen": 2516451328 }, { "epoch": 0.53, "learning_rate": 0.00023978977774211666, "loss": 0.0677, "theoretical_loss": 3.3952136652968896, "tokens_seen": 2516582400 }, { "epoch": 0.53, "learning_rate": 0.00023974965899061223, "loss": 0.0636, "theoretical_loss": 3.3951996971142187, "tokens_seen": 2516713472 }, { "epoch": 0.53, "learning_rate": 0.00023970954023910777, "loss": 0.0608, "theoretical_loss": 3.3951857298626806, "tokens_seen": 2516844544 }, { "epoch": 0.53, "learning_rate": 0.0002396694214876033, "loss": 0.0667, "theoretical_loss": 3.3951717635421645, "tokens_seen": 2516975616 }, { "epoch": 0.53, "learning_rate": 0.00023962930273609888, "loss": 0.0663, "theoretical_loss": 3.3951577981525602, "tokens_seen": 2517106688 }, { "epoch": 0.53, "learning_rate": 0.0002395891839845944, "loss": 0.0671, "theoretical_loss": 3.3951438336937567, "tokens_seen": 2517237760 }, { "epoch": 0.53, "learning_rate": 0.00023954906523308996, "loss": 0.0606, "theoretical_loss": 3.395129870165644, "tokens_seen": 2517368832 }, { "epoch": 0.53, "learning_rate": 0.0002395089464815855, "loss": 0.0657, "theoretical_loss": 3.3951159075681105, "tokens_seen": 2517499904 }, { "epoch": 0.53, "learning_rate": 0.00023946882773008104, "loss": 0.0634, "theoretical_loss": 3.3951019459010476, "tokens_seen": 2517630976 }, { "epoch": 0.53, "learning_rate": 0.0002394287089785766, "loss": 0.0703, "theoretical_loss": 3.395087985164343, "tokens_seen": 2517762048 }, { "epoch": 0.53, "learning_rate": 0.00023938859022707212, "loss": 0.0659, "theoretical_loss": 3.3950740253578875, "tokens_seen": 2517893120 }, { "epoch": 0.53, "learning_rate": 0.0002393484714755677, "loss": 0.0657, "theoretical_loss": 3.3950600664815704, "tokens_seen": 2518024192 }, { "epoch": 0.53, "learning_rate": 0.00023930835272406323, "loss": 0.0654, "theoretical_loss": 3.3950461085352814, "tokens_seen": 2518155264 }, { "epoch": 0.53, "objective/train/advantage_avg": -0.0004922683001495898, "objective/train/docs_used": 916685, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3738093376159668, "objective/train/original_loss": 1.3738093376159668, "objective/train/theoretical_loss": 3.39503215151891, "objective/train/tokens_used": 888810976, "objective/train/value_avg": -0.0063934326171875, "objective/train/value_loss": 0.0002033289201790467, "objective/train/value_max": -7.426738739013672e-05, "objective/train/value_min": -0.2088623046875, "objective/train/value_reward_corr": 0.6884223179012001, "objective/train/value_std": 0.01068115234375, "objective/train/weight_avg": 0.999602198600769, "objective/train/weighted_lm_loss": 1.3732523918151855, "objective/train/weights_max": 1.1994760036468506, "objective/train/weights_min": 0.40636128187179565, "theoretical_loss": 3.39503215151891, "tokens_seen": 2518286336 }, { "epoch": 0.53, "learning_rate": 0.00023926823397255878, "loss": 0.0659, "theoretical_loss": 3.39503215151891, "tokens_seen": 2518286336 }, { "epoch": 0.53, "learning_rate": 0.00023922811522105434, "loss": 0.0622, "theoretical_loss": 3.3950181954323453, "tokens_seen": 2518417408 }, { "epoch": 0.53, "learning_rate": 0.00023918799646954986, "loss": 0.0629, "theoretical_loss": 3.3950042402754774, "tokens_seen": 2518548480 }, { "epoch": 0.53, "learning_rate": 0.00023914787771804543, "loss": 0.063, "theoretical_loss": 3.3949902860481966, "tokens_seen": 2518679552 }, { "epoch": 0.53, "learning_rate": 0.00023910775896654097, "loss": 0.0699, "theoretical_loss": 3.394976332750392, "tokens_seen": 2518810624 }, { "epoch": 0.53, "learning_rate": 0.0002390676402150365, "loss": 0.0697, "theoretical_loss": 3.394962380381953, "tokens_seen": 2518941696 }, { "epoch": 0.53, "learning_rate": 0.00023902752146353208, "loss": 0.0618, "theoretical_loss": 3.39494842894277, "tokens_seen": 2519072768 }, { "epoch": 0.53, "learning_rate": 0.0002389874027120276, "loss": 0.0612, "theoretical_loss": 3.3949344784327327, "tokens_seen": 2519203840 }, { "epoch": 0.53, "learning_rate": 0.00023894728396052316, "loss": 0.0665, "theoretical_loss": 3.3949205288517303, "tokens_seen": 2519334912 }, { "epoch": 0.53, "learning_rate": 0.0002389071652090187, "loss": 0.0637, "theoretical_loss": 3.3949065801996534, "tokens_seen": 2519465984 }, { "epoch": 0.53, "learning_rate": 0.00023886704645751424, "loss": 0.0635, "theoretical_loss": 3.394892632476391, "tokens_seen": 2519597056 }, { "epoch": 0.53, "learning_rate": 0.0002388269277060098, "loss": 0.0659, "theoretical_loss": 3.3948786856818334, "tokens_seen": 2519728128 }, { "epoch": 0.53, "learning_rate": 0.00023878680895450533, "loss": 0.0648, "theoretical_loss": 3.394864739815871, "tokens_seen": 2519859200 }, { "epoch": 0.53, "learning_rate": 0.0002387466902030009, "loss": 0.064, "theoretical_loss": 3.3948507948783924, "tokens_seen": 2519990272 }, { "epoch": 0.53, "learning_rate": 0.00023870657145149643, "loss": 0.0686, "theoretical_loss": 3.3948368508692885, "tokens_seen": 2520121344 }, { "epoch": 0.53, "learning_rate": 0.00023866645269999198, "loss": 0.0639, "theoretical_loss": 3.3948229077884484, "tokens_seen": 2520252416 }, { "epoch": 0.53, "learning_rate": 0.00023862633394848754, "loss": 0.0656, "theoretical_loss": 3.394808965635763, "tokens_seen": 2520383488 }, { "epoch": 0.53, "learning_rate": 0.00023858621519698306, "loss": 0.0686, "theoretical_loss": 3.394795024411122, "tokens_seen": 2520514560 }, { "epoch": 0.53, "learning_rate": 0.00023854609644547863, "loss": 0.0675, "theoretical_loss": 3.3947810841144146, "tokens_seen": 2520645632 }, { "epoch": 0.53, "learning_rate": 0.00023850597769397417, "loss": 0.0658, "theoretical_loss": 3.394767144745532, "tokens_seen": 2520776704 }, { "epoch": 0.53, "learning_rate": 0.0002384658589424697, "loss": 0.0656, "theoretical_loss": 3.394753206304363, "tokens_seen": 2520907776 }, { "epoch": 0.53, "learning_rate": 0.00023842574019096528, "loss": 0.0642, "theoretical_loss": 3.394739268790798, "tokens_seen": 2521038848 }, { "epoch": 0.53, "learning_rate": 0.0002383856214394608, "loss": 0.0658, "theoretical_loss": 3.394725332204728, "tokens_seen": 2521169920 }, { "epoch": 0.53, "learning_rate": 0.00023834550268795636, "loss": 0.0643, "theoretical_loss": 3.3947113965460414, "tokens_seen": 2521300992 }, { "epoch": 0.53, "learning_rate": 0.0002383053839364519, "loss": 0.064, "theoretical_loss": 3.39469746181463, "tokens_seen": 2521432064 }, { "epoch": 0.53, "objective/train/advantage_avg": 0.0005203895852901042, "objective/train/docs_used": 917863, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2548446655273438, "objective/train/original_loss": 1.2548446655273438, "objective/train/theoretical_loss": 3.3946835280103826, "objective/train/tokens_used": 892087776, "objective/train/value_avg": -0.00743865966796875, "objective/train/value_loss": 0.00013191119069233537, "objective/train/value_max": -5.3048133850097656e-05, "objective/train/value_min": -0.258056640625, "objective/train/value_reward_corr": 0.6902807682056561, "objective/train/value_std": 0.011383056640625, "objective/train/weight_avg": 1.000582218170166, "objective/train/weighted_lm_loss": 1.2552107572555542, "objective/train/weights_max": 1.1398009061813354, "objective/train/weights_min": 0.3803688883781433, "theoretical_loss": 3.3946835280103826, "tokens_seen": 2521563136 }, { "epoch": 0.53, "learning_rate": 0.00023826526518494744, "loss": 0.0651, "theoretical_loss": 3.3946835280103826, "tokens_seen": 2521563136 }, { "epoch": 0.53, "learning_rate": 0.000238225146433443, "loss": 0.0626, "theoretical_loss": 3.3946695951331893, "tokens_seen": 2521694208 }, { "epoch": 0.53, "learning_rate": 0.00023818502768193853, "loss": 0.0629, "theoretical_loss": 3.3946556631829417, "tokens_seen": 2521825280 }, { "epoch": 0.53, "learning_rate": 0.0002381449089304341, "loss": 0.0635, "theoretical_loss": 3.3946417321595286, "tokens_seen": 2521956352 }, { "epoch": 0.53, "learning_rate": 0.00023810479017892964, "loss": 0.0617, "theoretical_loss": 3.3946278020628404, "tokens_seen": 2522087424 }, { "epoch": 0.53, "learning_rate": 0.00023806467142742518, "loss": 0.0682, "theoretical_loss": 3.3946138728927675, "tokens_seen": 2522218496 }, { "epoch": 0.53, "learning_rate": 0.00023802455267592075, "loss": 0.0647, "theoretical_loss": 3.3945999446492, "tokens_seen": 2522349568 }, { "epoch": 0.53, "learning_rate": 0.00023798443392441626, "loss": 0.0656, "theoretical_loss": 3.394586017332028, "tokens_seen": 2522480640 }, { "epoch": 0.53, "learning_rate": 0.00023794431517291183, "loss": 0.0656, "theoretical_loss": 3.3945720909411428, "tokens_seen": 2522611712 }, { "epoch": 0.53, "learning_rate": 0.00023790419642140737, "loss": 0.0673, "theoretical_loss": 3.394558165476433, "tokens_seen": 2522742784 }, { "epoch": 0.53, "learning_rate": 0.0002378640776699029, "loss": 0.0637, "theoretical_loss": 3.3945442409377904, "tokens_seen": 2522873856 }, { "epoch": 0.53, "learning_rate": 0.00023782395891839848, "loss": 0.0686, "theoretical_loss": 3.394530317325104, "tokens_seen": 2523004928 }, { "epoch": 0.53, "learning_rate": 0.000237783840166894, "loss": 0.065, "theoretical_loss": 3.394516394638265, "tokens_seen": 2523136000 }, { "epoch": 0.53, "learning_rate": 0.00023774372141538956, "loss": 0.0682, "theoretical_loss": 3.3945024728771633, "tokens_seen": 2523267072 }, { "epoch": 0.53, "learning_rate": 0.0002377036026638851, "loss": 0.0645, "theoretical_loss": 3.3944885520416896, "tokens_seen": 2523398144 }, { "epoch": 0.53, "learning_rate": 0.00023766348391238064, "loss": 0.0678, "theoretical_loss": 3.3944746321317343, "tokens_seen": 2523529216 }, { "epoch": 0.53, "learning_rate": 0.0002376233651608762, "loss": 0.0677, "theoretical_loss": 3.3944607131471876, "tokens_seen": 2523660288 }, { "epoch": 0.53, "learning_rate": 0.00023758324640937173, "loss": 0.0651, "theoretical_loss": 3.39444679508794, "tokens_seen": 2523791360 }, { "epoch": 0.53, "learning_rate": 0.0002375431276578673, "loss": 0.0651, "theoretical_loss": 3.3944328779538813, "tokens_seen": 2523922432 }, { "epoch": 0.53, "learning_rate": 0.00023750300890636284, "loss": 0.063, "theoretical_loss": 3.394418961744903, "tokens_seen": 2524053504 }, { "epoch": 0.53, "learning_rate": 0.0002374628901548584, "loss": 0.0666, "theoretical_loss": 3.3944050464608955, "tokens_seen": 2524184576 }, { "epoch": 0.53, "learning_rate": 0.00023742277140335395, "loss": 0.0641, "theoretical_loss": 3.3943911321017484, "tokens_seen": 2524315648 }, { "epoch": 0.53, "learning_rate": 0.00023738265265184946, "loss": 0.066, "theoretical_loss": 3.394377218667353, "tokens_seen": 2524446720 }, { "epoch": 0.53, "learning_rate": 0.00023734253390034503, "loss": 0.0666, "theoretical_loss": 3.394363306157599, "tokens_seen": 2524577792 }, { "epoch": 0.53, "learning_rate": 0.00023730241514884057, "loss": 0.0675, "theoretical_loss": 3.3943493945723784, "tokens_seen": 2524708864 }, { "epoch": 0.53, "objective/train/advantage_avg": -0.00013150869926903397, "objective/train/docs_used": 918983, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3734575510025024, "objective/train/original_loss": 1.373457670211792, "objective/train/theoretical_loss": 3.39433548391158, "objective/train/tokens_used": 895364576, "objective/train/value_avg": -0.006908416748046875, "objective/train/value_loss": 0.00017328675312455744, "objective/train/value_max": -2.9325485229492188e-05, "objective/train/value_min": -0.19482421875, "objective/train/value_reward_corr": 0.6839067058350398, "objective/train/value_std": 0.010894775390625, "objective/train/weight_avg": 0.9999459385871887, "objective/train/weighted_lm_loss": 1.3730627298355103, "objective/train/weights_max": 1.0975703001022339, "objective/train/weights_min": 0.3702559173107147, "theoretical_loss": 3.39433548391158, "tokens_seen": 2524839936 }, { "epoch": 0.53, "learning_rate": 0.00023726229639733614, "loss": 0.0696, "theoretical_loss": 3.39433548391158, "tokens_seen": 2524839936 }, { "epoch": 0.53, "learning_rate": 0.00023722217764583168, "loss": 0.0672, "theoretical_loss": 3.394321574175096, "tokens_seen": 2524971008 }, { "epoch": 0.53, "learning_rate": 0.0002371820588943272, "loss": 0.0672, "theoretical_loss": 3.394307665362816, "tokens_seen": 2525102080 }, { "epoch": 0.53, "learning_rate": 0.00023714194014282276, "loss": 0.0674, "theoretical_loss": 3.394293757474631, "tokens_seen": 2525233152 }, { "epoch": 0.53, "learning_rate": 0.0002371018213913183, "loss": 0.0628, "theoretical_loss": 3.3942798505104315, "tokens_seen": 2525364224 }, { "epoch": 0.53, "learning_rate": 0.00023706170263981387, "loss": 0.0656, "theoretical_loss": 3.394265944470108, "tokens_seen": 2525495296 }, { "epoch": 0.53, "learning_rate": 0.0002370215838883094, "loss": 0.0678, "theoretical_loss": 3.3942520393535514, "tokens_seen": 2525626368 }, { "epoch": 0.53, "learning_rate": 0.00023698146513680493, "loss": 0.0658, "theoretical_loss": 3.3942381351606525, "tokens_seen": 2525757440 }, { "epoch": 0.53, "learning_rate": 0.0002369413463853005, "loss": 0.0706, "theoretical_loss": 3.394224231891302, "tokens_seen": 2525888512 }, { "epoch": 0.53, "learning_rate": 0.00023690122763379604, "loss": 0.0652, "theoretical_loss": 3.3942103295453903, "tokens_seen": 2526019584 }, { "epoch": 0.53, "learning_rate": 0.0002368611088822916, "loss": 0.0663, "theoretical_loss": 3.3941964281228088, "tokens_seen": 2526150656 }, { "epoch": 0.53, "learning_rate": 0.00023682099013078715, "loss": 0.0676, "theoretical_loss": 3.394182527623448, "tokens_seen": 2526281728 }, { "epoch": 0.53, "learning_rate": 0.00023678087137928266, "loss": 0.0656, "theoretical_loss": 3.394168628047198, "tokens_seen": 2526412800 }, { "epoch": 0.53, "learning_rate": 0.00023674075262777823, "loss": 0.0646, "theoretical_loss": 3.39415472939395, "tokens_seen": 2526543872 }, { "epoch": 0.53, "learning_rate": 0.00023670063387627377, "loss": 0.0695, "theoretical_loss": 3.3941408316635955, "tokens_seen": 2526674944 }, { "epoch": 0.53, "learning_rate": 0.00023666051512476934, "loss": 0.0653, "theoretical_loss": 3.3941269348560246, "tokens_seen": 2526806016 }, { "epoch": 0.53, "learning_rate": 0.00023662039637326488, "loss": 0.0675, "theoretical_loss": 3.394113038971129, "tokens_seen": 2526937088 }, { "epoch": 0.53, "learning_rate": 0.0002365802776217604, "loss": 0.0665, "theoretical_loss": 3.3940991440087984, "tokens_seen": 2527068160 }, { "epoch": 0.53, "learning_rate": 0.00023654015887025596, "loss": 0.0681, "theoretical_loss": 3.3940852499689242, "tokens_seen": 2527199232 }, { "epoch": 0.53, "learning_rate": 0.0002365000401187515, "loss": 0.0682, "theoretical_loss": 3.3940713568513976, "tokens_seen": 2527330304 }, { "epoch": 0.53, "learning_rate": 0.00023645992136724707, "loss": 0.0652, "theoretical_loss": 3.3940574646561097, "tokens_seen": 2527461376 }, { "epoch": 0.53, "learning_rate": 0.0002364198026157426, "loss": 0.0689, "theoretical_loss": 3.394043573382951, "tokens_seen": 2527592448 }, { "epoch": 0.53, "learning_rate": 0.00023637968386423813, "loss": 0.0657, "theoretical_loss": 3.394029683031812, "tokens_seen": 2527723520 }, { "epoch": 0.53, "learning_rate": 0.0002363395651127337, "loss": 0.0686, "theoretical_loss": 3.394015793602585, "tokens_seen": 2527854592 }, { "epoch": 0.53, "learning_rate": 0.00023629944636122924, "loss": 0.0645, "theoretical_loss": 3.39400190509516, "tokens_seen": 2527985664 }, { "epoch": 0.53, "objective/train/advantage_avg": 0.0004710943321697414, "objective/train/docs_used": 920165, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4900257587432861, "objective/train/original_loss": 1.4900257587432861, "objective/train/theoretical_loss": 3.393988017509429, "objective/train/tokens_used": 898641376, "objective/train/value_avg": -0.00904083251953125, "objective/train/value_loss": 0.00047539075603708625, "objective/train/value_max": -5.918741226196289e-05, "objective/train/value_min": -0.7919921875, "objective/train/value_reward_corr": 0.6817419507265292, "objective/train/value_std": 0.0220794677734375, "objective/train/weight_avg": 1.0006822347640991, "objective/train/weighted_lm_loss": 1.4893174171447754, "objective/train/weights_max": 1.8265132904052734, "objective/train/weights_min": 0.3756278157234192, "theoretical_loss": 3.393988017509429, "tokens_seen": 2528116736 }, { "epoch": 0.53, "learning_rate": 0.0002362593276097248, "loss": 0.0713, "theoretical_loss": 3.393988017509429, "tokens_seen": 2528116736 }, { "epoch": 0.53, "learning_rate": 0.00023621920885822035, "loss": 0.0629, "theoretical_loss": 3.3939741308452813, "tokens_seen": 2528247808 }, { "epoch": 0.53, "learning_rate": 0.00023617909010671586, "loss": 0.067, "theoretical_loss": 3.3939602451026096, "tokens_seen": 2528378880 }, { "epoch": 0.53, "learning_rate": 0.00023613897135521143, "loss": 0.0668, "theoretical_loss": 3.393946360281305, "tokens_seen": 2528509952 }, { "epoch": 0.53, "learning_rate": 0.00023609885260370697, "loss": 0.0714, "theoretical_loss": 3.393932476381257, "tokens_seen": 2528641024 }, { "epoch": 0.53, "learning_rate": 0.00023605873385220254, "loss": 0.0682, "theoretical_loss": 3.3939185934023586, "tokens_seen": 2528772096 }, { "epoch": 0.53, "learning_rate": 0.00023601861510069808, "loss": 0.0701, "theoretical_loss": 3.3939047113445, "tokens_seen": 2528903168 }, { "epoch": 0.53, "learning_rate": 0.0002359784963491936, "loss": 0.0667, "theoretical_loss": 3.3938908302075723, "tokens_seen": 2529034240 }, { "epoch": 0.53, "learning_rate": 0.00023593837759768916, "loss": 0.0654, "theoretical_loss": 3.393876949991467, "tokens_seen": 2529165312 }, { "epoch": 0.53, "learning_rate": 0.0002358982588461847, "loss": 0.0684, "theoretical_loss": 3.393863070696075, "tokens_seen": 2529296384 }, { "epoch": 0.53, "learning_rate": 0.00023585814009468027, "loss": 0.0671, "theoretical_loss": 3.393849192321288, "tokens_seen": 2529427456 }, { "epoch": 0.53, "learning_rate": 0.00023581802134317581, "loss": 0.0664, "theoretical_loss": 3.3938353148669966, "tokens_seen": 2529558528 }, { "epoch": 0.53, "learning_rate": 0.00023577790259167133, "loss": 0.0638, "theoretical_loss": 3.3938214383330925, "tokens_seen": 2529689600 }, { "epoch": 0.53, "learning_rate": 0.0002357377838401669, "loss": 0.0643, "theoretical_loss": 3.3938075627194673, "tokens_seen": 2529820672 }, { "epoch": 0.53, "learning_rate": 0.00023569766508866244, "loss": 0.0629, "theoretical_loss": 3.3937936880260113, "tokens_seen": 2529951744 }, { "epoch": 0.53, "learning_rate": 0.000235657546337158, "loss": 0.0677, "theoretical_loss": 3.3937798142526168, "tokens_seen": 2530082816 }, { "epoch": 0.53, "learning_rate": 0.00023561742758565355, "loss": 0.0687, "theoretical_loss": 3.3937659413991743, "tokens_seen": 2530213888 }, { "epoch": 0.53, "learning_rate": 0.00023557730883414906, "loss": 0.0668, "theoretical_loss": 3.3937520694655756, "tokens_seen": 2530344960 }, { "epoch": 0.53, "learning_rate": 0.00023553719008264463, "loss": 0.0653, "theoretical_loss": 3.3937381984517123, "tokens_seen": 2530476032 }, { "epoch": 0.53, "learning_rate": 0.00023549707133114017, "loss": 0.063, "theoretical_loss": 3.393724328357475, "tokens_seen": 2530607104 }, { "epoch": 0.53, "learning_rate": 0.00023545695257963574, "loss": 0.062, "theoretical_loss": 3.3937104591827563, "tokens_seen": 2530738176 }, { "epoch": 0.53, "learning_rate": 0.00023541683382813128, "loss": 0.0679, "theoretical_loss": 3.3936965909274464, "tokens_seen": 2530869248 }, { "epoch": 0.53, "learning_rate": 0.0002353767150766268, "loss": 0.0685, "theoretical_loss": 3.3936827235914375, "tokens_seen": 2531000320 }, { "epoch": 0.53, "learning_rate": 0.00023533659632512236, "loss": 0.0662, "theoretical_loss": 3.3936688571746205, "tokens_seen": 2531131392 }, { "epoch": 0.53, "learning_rate": 0.0002352964775736179, "loss": 0.0672, "theoretical_loss": 3.3936549916768874, "tokens_seen": 2531262464 }, { "epoch": 0.53, "objective/train/advantage_avg": 0.0011438115034252405, "objective/train/docs_used": 921348, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3953757286071777, "objective/train/original_loss": 1.3953756093978882, "objective/train/theoretical_loss": 3.39364112709813, "objective/train/tokens_used": 901918176, "objective/train/value_avg": -0.008453369140625, "objective/train/value_loss": 0.0002688639215193689, "objective/train/value_max": -5.7816505432128906e-05, "objective/train/value_min": -0.294677734375, "objective/train/value_reward_corr": 0.6332306107485811, "objective/train/value_std": 0.0147705078125, "objective/train/weight_avg": 1.00126314163208, "objective/train/weighted_lm_loss": 1.3960059881210327, "objective/train/weights_max": 1.297872543334961, "objective/train/weights_min": 0.4004276692867279, "theoretical_loss": 3.39364112709813, "tokens_seen": 2531393536 }, { "epoch": 0.53, "learning_rate": 0.00023525635882211347, "loss": 0.0692, "theoretical_loss": 3.39364112709813, "tokens_seen": 2531393536 }, { "epoch": 0.53, "learning_rate": 0.00023521624007060901, "loss": 0.0648, "theoretical_loss": 3.3936272634382387, "tokens_seen": 2531524608 }, { "epoch": 0.53, "learning_rate": 0.00023517612131910453, "loss": 0.0673, "theoretical_loss": 3.393613400697106, "tokens_seen": 2531655680 }, { "epoch": 0.53, "learning_rate": 0.0002351360025676001, "loss": 0.0641, "theoretical_loss": 3.3935995388746227, "tokens_seen": 2531786752 }, { "epoch": 0.53, "learning_rate": 0.00023509588381609564, "loss": 0.0665, "theoretical_loss": 3.393585677970681, "tokens_seen": 2531917824 }, { "epoch": 0.53, "learning_rate": 0.0002350557650645912, "loss": 0.068, "theoretical_loss": 3.393571817985172, "tokens_seen": 2532048896 }, { "epoch": 0.53, "learning_rate": 0.00023501564631308675, "loss": 0.068, "theoretical_loss": 3.393557958917988, "tokens_seen": 2532179968 }, { "epoch": 0.53, "learning_rate": 0.00023497552756158226, "loss": 0.0613, "theoretical_loss": 3.3935441007690197, "tokens_seen": 2532311040 }, { "epoch": 0.53, "learning_rate": 0.00023493540881007783, "loss": 0.0647, "theoretical_loss": 3.3935302435381596, "tokens_seen": 2532442112 }, { "epoch": 0.53, "learning_rate": 0.00023489529005857337, "loss": 0.0662, "theoretical_loss": 3.393516387225299, "tokens_seen": 2532573184 }, { "epoch": 0.53, "learning_rate": 0.00023485517130706894, "loss": 0.0669, "theoretical_loss": 3.393502531830329, "tokens_seen": 2532704256 }, { "epoch": 0.54, "learning_rate": 0.00023481505255556448, "loss": 0.0671, "theoretical_loss": 3.393488677353142, "tokens_seen": 2532835328 }, { "epoch": 0.54, "learning_rate": 0.00023477493380406002, "loss": 0.0669, "theoretical_loss": 3.39347482379363, "tokens_seen": 2532966400 }, { "epoch": 0.54, "learning_rate": 0.00023473481505255556, "loss": 0.0642, "theoretical_loss": 3.3934609711516845, "tokens_seen": 2533097472 }, { "epoch": 0.54, "learning_rate": 0.0002346946963010511, "loss": 0.0663, "theoretical_loss": 3.3934471194271967, "tokens_seen": 2533228544 }, { "epoch": 0.54, "learning_rate": 0.00023465457754954667, "loss": 0.0687, "theoretical_loss": 3.3934332686200586, "tokens_seen": 2533359616 }, { "epoch": 0.54, "learning_rate": 0.00023461445879804221, "loss": 0.0641, "theoretical_loss": 3.3934194187301623, "tokens_seen": 2533490688 }, { "epoch": 0.54, "learning_rate": 0.00023457434004653776, "loss": 0.0687, "theoretical_loss": 3.3934055697573995, "tokens_seen": 2533621760 }, { "epoch": 0.54, "learning_rate": 0.0002345342212950333, "loss": 0.0637, "theoretical_loss": 3.393391721701662, "tokens_seen": 2533752832 }, { "epoch": 0.54, "learning_rate": 0.00023449410254352884, "loss": 0.0663, "theoretical_loss": 3.393377874562841, "tokens_seen": 2533883904 }, { "epoch": 0.54, "learning_rate": 0.0002344539837920244, "loss": 0.0669, "theoretical_loss": 3.3933640283408293, "tokens_seen": 2534014976 }, { "epoch": 0.54, "learning_rate": 0.00023441386504051995, "loss": 0.0652, "theoretical_loss": 3.393350183035519, "tokens_seen": 2534146048 }, { "epoch": 0.54, "learning_rate": 0.0002343737462890155, "loss": 0.0655, "theoretical_loss": 3.393336338646801, "tokens_seen": 2534277120 }, { "epoch": 0.54, "learning_rate": 0.00023433362753751103, "loss": 0.068, "theoretical_loss": 3.3933224951745675, "tokens_seen": 2534408192 }, { "epoch": 0.54, "learning_rate": 0.00023429350878600657, "loss": 0.0635, "theoretical_loss": 3.393308652618711, "tokens_seen": 2534539264 }, { "epoch": 0.54, "objective/train/advantage_avg": 7.604699931107461e-05, "objective/train/docs_used": 922493, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2233229875564575, "objective/train/original_loss": 1.223323106765747, "objective/train/theoretical_loss": 3.3932948109791226, "objective/train/tokens_used": 905194976, "objective/train/value_avg": -0.004955291748046875, "objective/train/value_loss": 0.00018662074580788612, "objective/train/value_max": -3.975629806518555e-05, "objective/train/value_min": -0.93798828125, "objective/train/value_reward_corr": 0.6786674576168041, "objective/train/value_std": 0.01309967041015625, "objective/train/weight_avg": 1.000161051750183, "objective/train/weighted_lm_loss": 1.2234770059585571, "objective/train/weights_max": 1.5793782472610474, "objective/train/weights_min": 0.3747633397579193, "theoretical_loss": 3.3932948109791226, "tokens_seen": 2534670336 }, { "epoch": 0.54, "learning_rate": 0.00023425339003450214, "loss": 0.0618, "theoretical_loss": 3.3932948109791226, "tokens_seen": 2534670336 }, { "epoch": 0.54, "learning_rate": 0.00023421327128299768, "loss": 0.0647, "theoretical_loss": 3.393280970255695, "tokens_seen": 2534801408 }, { "epoch": 0.54, "learning_rate": 0.00023417315253149322, "loss": 0.0662, "theoretical_loss": 3.39326713044832, "tokens_seen": 2534932480 }, { "epoch": 0.54, "learning_rate": 0.00023413303377998876, "loss": 0.0663, "theoretical_loss": 3.3932532915568894, "tokens_seen": 2535063552 }, { "epoch": 0.54, "learning_rate": 0.0002340929150284843, "loss": 0.069, "theoretical_loss": 3.393239453581295, "tokens_seen": 2535194624 }, { "epoch": 0.54, "learning_rate": 0.00023405279627697987, "loss": 0.0635, "theoretical_loss": 3.3932256165214296, "tokens_seen": 2535325696 }, { "epoch": 0.54, "learning_rate": 0.00023401267752547542, "loss": 0.062, "theoretical_loss": 3.393211780377185, "tokens_seen": 2535456768 }, { "epoch": 0.54, "learning_rate": 0.00023397255877397098, "loss": 0.066, "theoretical_loss": 3.393197945148453, "tokens_seen": 2535587840 }, { "epoch": 0.54, "learning_rate": 0.0002339324400224665, "loss": 0.0645, "theoretical_loss": 3.3931841108351257, "tokens_seen": 2535718912 }, { "epoch": 0.54, "learning_rate": 0.00023389232127096204, "loss": 0.0636, "theoretical_loss": 3.393170277437096, "tokens_seen": 2535849984 }, { "epoch": 0.54, "learning_rate": 0.0002338522025194576, "loss": 0.0631, "theoretical_loss": 3.393156444954255, "tokens_seen": 2535981056 }, { "epoch": 0.54, "learning_rate": 0.00023381208376795315, "loss": 0.0669, "theoretical_loss": 3.393142613386495, "tokens_seen": 2536112128 }, { "epoch": 0.54, "learning_rate": 0.00023377196501644872, "loss": 0.0648, "theoretical_loss": 3.393128782733709, "tokens_seen": 2536243200 }, { "epoch": 0.54, "learning_rate": 0.00023373184626494423, "loss": 0.0694, "theoretical_loss": 3.393114952995788, "tokens_seen": 2536374272 }, { "epoch": 0.54, "learning_rate": 0.00023369172751343977, "loss": 0.0669, "theoretical_loss": 3.3931011241726248, "tokens_seen": 2536505344 }, { "epoch": 0.54, "learning_rate": 0.00023365160876193534, "loss": 0.0674, "theoretical_loss": 3.3930872962641123, "tokens_seen": 2536636416 }, { "epoch": 0.54, "learning_rate": 0.00023361149001043088, "loss": 0.066, "theoretical_loss": 3.393073469270142, "tokens_seen": 2536767488 }, { "epoch": 0.54, "learning_rate": 0.00023357137125892645, "loss": 0.0645, "theoretical_loss": 3.3930596431906057, "tokens_seen": 2536898560 }, { "epoch": 0.54, "learning_rate": 0.00023353125250742197, "loss": 0.0665, "theoretical_loss": 3.393045818025397, "tokens_seen": 2537029632 }, { "epoch": 0.54, "learning_rate": 0.0002334911337559175, "loss": 0.0692, "theoretical_loss": 3.3930319937744073, "tokens_seen": 2537160704 }, { "epoch": 0.54, "learning_rate": 0.00023345101500441307, "loss": 0.0694, "theoretical_loss": 3.393018170437529, "tokens_seen": 2537291776 }, { "epoch": 0.54, "learning_rate": 0.00023341089625290862, "loss": 0.0635, "theoretical_loss": 3.3930043480146543, "tokens_seen": 2537422848 }, { "epoch": 0.54, "learning_rate": 0.00023337077750140418, "loss": 0.064, "theoretical_loss": 3.392990526505676, "tokens_seen": 2537553920 }, { "epoch": 0.54, "learning_rate": 0.0002333306587498997, "loss": 0.0652, "theoretical_loss": 3.3929767059104865, "tokens_seen": 2537684992 }, { "epoch": 0.54, "learning_rate": 0.00023329053999839524, "loss": 0.0692, "theoretical_loss": 3.3929628862289776, "tokens_seen": 2537816064 }, { "epoch": 0.54, "objective/train/advantage_avg": -5.742259236285463e-05, "objective/train/docs_used": 923645, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3771110773086548, "objective/train/original_loss": 1.3771110773086548, "objective/train/theoretical_loss": 3.3929490674610423, "objective/train/tokens_used": 908471776, "objective/train/value_avg": -0.0087432861328125, "objective/train/value_loss": 0.00020767546084243804, "objective/train/value_max": -1.3113021850585938e-05, "objective/train/value_min": -0.2257080078125, "objective/train/value_reward_corr": 0.7011517911850242, "objective/train/value_std": 0.01357269287109375, "objective/train/weight_avg": 1.0000407695770264, "objective/train/weighted_lm_loss": 1.377713918685913, "objective/train/weights_max": 1.1882543563842773, "objective/train/weights_min": 0.3712006211280823, "theoretical_loss": 3.3929490674610423, "tokens_seen": 2537947136 }, { "epoch": 0.54, "learning_rate": 0.0002332504212468908, "loss": 0.0692, "theoretical_loss": 3.3929490674610423, "tokens_seen": 2537947136 }, { "epoch": 0.54, "learning_rate": 0.00023321030249538635, "loss": 0.0645, "theoretical_loss": 3.392935249606573, "tokens_seen": 2538078208 }, { "epoch": 0.54, "learning_rate": 0.00023317018374388192, "loss": 0.065, "theoretical_loss": 3.392921432665461, "tokens_seen": 2538209280 }, { "epoch": 0.54, "learning_rate": 0.00023313006499237743, "loss": 0.0662, "theoretical_loss": 3.3929076166376007, "tokens_seen": 2538340352 }, { "epoch": 0.54, "learning_rate": 0.00023308994624087297, "loss": 0.0627, "theoretical_loss": 3.3928938015228836, "tokens_seen": 2538471424 }, { "epoch": 0.54, "learning_rate": 0.00023304982748936854, "loss": 0.0665, "theoretical_loss": 3.3928799873212014, "tokens_seen": 2538602496 }, { "epoch": 0.54, "learning_rate": 0.00023300970873786408, "loss": 0.0632, "theoretical_loss": 3.392866174032448, "tokens_seen": 2538733568 }, { "epoch": 0.54, "learning_rate": 0.00023296958998635965, "loss": 0.0641, "theoretical_loss": 3.3928523616565154, "tokens_seen": 2538864640 }, { "epoch": 0.54, "learning_rate": 0.00023292947123485517, "loss": 0.0666, "theoretical_loss": 3.392838550193296, "tokens_seen": 2538995712 }, { "epoch": 0.54, "learning_rate": 0.0002328893524833507, "loss": 0.0722, "theoretical_loss": 3.3928247396426827, "tokens_seen": 2539126784 }, { "epoch": 0.54, "learning_rate": 0.00023284923373184628, "loss": 0.0663, "theoretical_loss": 3.3928109300045675, "tokens_seen": 2539257856 }, { "epoch": 0.54, "learning_rate": 0.00023280911498034182, "loss": 0.0649, "theoretical_loss": 3.3927971212788437, "tokens_seen": 2539388928 }, { "epoch": 0.54, "learning_rate": 0.00023276899622883738, "loss": 0.0636, "theoretical_loss": 3.3927833134654035, "tokens_seen": 2539520000 }, { "epoch": 0.54, "learning_rate": 0.0002327288774773329, "loss": 0.0649, "theoretical_loss": 3.3927695065641394, "tokens_seen": 2539651072 }, { "epoch": 0.54, "learning_rate": 0.00023268875872582844, "loss": 0.0648, "theoretical_loss": 3.392755700574945, "tokens_seen": 2539782144 }, { "epoch": 0.54, "learning_rate": 0.000232648639974324, "loss": 0.0675, "theoretical_loss": 3.392741895497712, "tokens_seen": 2539913216 }, { "epoch": 0.54, "learning_rate": 0.00023260852122281955, "loss": 0.0696, "theoretical_loss": 3.3927280913323337, "tokens_seen": 2540044288 }, { "epoch": 0.54, "learning_rate": 0.00023256840247131512, "loss": 0.0645, "theoretical_loss": 3.392714288078702, "tokens_seen": 2540175360 }, { "epoch": 0.54, "learning_rate": 0.00023252828371981063, "loss": 0.066, "theoretical_loss": 3.3927004857367105, "tokens_seen": 2540306432 }, { "epoch": 0.54, "learning_rate": 0.00023248816496830617, "loss": 0.0653, "theoretical_loss": 3.3926866843062515, "tokens_seen": 2540437504 }, { "epoch": 0.54, "learning_rate": 0.00023244804621680174, "loss": 0.0653, "theoretical_loss": 3.392672883787218, "tokens_seen": 2540568576 }, { "epoch": 0.54, "learning_rate": 0.00023240792746529728, "loss": 0.0685, "theoretical_loss": 3.392659084179503, "tokens_seen": 2540699648 }, { "epoch": 0.54, "learning_rate": 0.00023236780871379285, "loss": 0.0656, "theoretical_loss": 3.392645285482999, "tokens_seen": 2540830720 }, { "epoch": 0.54, "learning_rate": 0.00023232768996228837, "loss": 0.0689, "theoretical_loss": 3.3926314876975985, "tokens_seen": 2540961792 }, { "epoch": 0.54, "learning_rate": 0.00023228757121078393, "loss": 0.0643, "theoretical_loss": 3.392617690823195, "tokens_seen": 2541092864 }, { "epoch": 0.54, "objective/train/advantage_avg": 0.0006037571583874524, "objective/train/docs_used": 924855, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3289655447006226, "objective/train/original_loss": 1.328965425491333, "objective/train/theoretical_loss": 3.3926038948596813, "objective/train/tokens_used": 911748576, "objective/train/value_avg": -0.00695037841796875, "objective/train/value_loss": 0.00014737938181497157, "objective/train/value_max": -6.109476089477539e-05, "objective/train/value_min": -0.8095703125, "objective/train/value_reward_corr": 0.702861275576292, "objective/train/value_std": 0.0125579833984375, "objective/train/weight_avg": 1.0006718635559082, "objective/train/weighted_lm_loss": 1.330092191696167, "objective/train/weights_max": 1.1572949886322021, "objective/train/weights_min": 0.39359450340270996, "theoretical_loss": 3.3926038948596813, "tokens_seen": 2541223936 }, { "epoch": 0.54, "learning_rate": 0.00023224745245927948, "loss": 0.0647, "theoretical_loss": 3.3926038948596813, "tokens_seen": 2541223936 }, { "epoch": 0.54, "learning_rate": 0.00023220733370777502, "loss": 0.068, "theoretical_loss": 3.3925900998069496, "tokens_seen": 2541355008 }, { "epoch": 0.54, "learning_rate": 0.00023216721495627059, "loss": 0.0635, "theoretical_loss": 3.3925763056648934, "tokens_seen": 2541486080 }, { "epoch": 0.54, "learning_rate": 0.0002321270962047661, "loss": 0.0684, "theoretical_loss": 3.3925625124334053, "tokens_seen": 2541617152 }, { "epoch": 0.54, "learning_rate": 0.00023208697745326167, "loss": 0.0632, "theoretical_loss": 3.3925487201123787, "tokens_seen": 2541748224 }, { "epoch": 0.54, "learning_rate": 0.0002320468587017572, "loss": 0.0695, "theoretical_loss": 3.392534928701706, "tokens_seen": 2541879296 }, { "epoch": 0.54, "learning_rate": 0.00023200673995025275, "loss": 0.0658, "theoretical_loss": 3.392521138201281, "tokens_seen": 2542010368 }, { "epoch": 0.54, "learning_rate": 0.00023196662119874832, "loss": 0.0671, "theoretical_loss": 3.392507348610996, "tokens_seen": 2542141440 }, { "epoch": 0.54, "learning_rate": 0.00023192650244724383, "loss": 0.0663, "theoretical_loss": 3.392493559930744, "tokens_seen": 2542272512 }, { "epoch": 0.54, "learning_rate": 0.0002318863836957394, "loss": 0.0663, "theoretical_loss": 3.392479772160418, "tokens_seen": 2542403584 }, { "epoch": 0.54, "learning_rate": 0.00023184626494423494, "loss": 0.0646, "theoretical_loss": 3.3924659852999115, "tokens_seen": 2542534656 }, { "epoch": 0.54, "learning_rate": 0.00023180614619273048, "loss": 0.0687, "theoretical_loss": 3.3924521993491172, "tokens_seen": 2542665728 }, { "epoch": 0.54, "learning_rate": 0.00023176602744122605, "loss": 0.0642, "theoretical_loss": 3.3924384143079283, "tokens_seen": 2542796800 }, { "epoch": 0.54, "learning_rate": 0.00023172590868972157, "loss": 0.0646, "theoretical_loss": 3.392424630176238, "tokens_seen": 2542927872 }, { "epoch": 0.54, "learning_rate": 0.00023168578993821714, "loss": 0.0638, "theoretical_loss": 3.392410846953939, "tokens_seen": 2543058944 }, { "epoch": 0.54, "learning_rate": 0.00023164567118671268, "loss": 0.069, "theoretical_loss": 3.392397064640925, "tokens_seen": 2543190016 }, { "epoch": 0.54, "learning_rate": 0.00023160555243520822, "loss": 0.0665, "theoretical_loss": 3.3923832832370886, "tokens_seen": 2543321088 }, { "epoch": 0.54, "learning_rate": 0.00023156543368370379, "loss": 0.0676, "theoretical_loss": 3.3923695027423237, "tokens_seen": 2543452160 }, { "epoch": 0.54, "learning_rate": 0.0002315253149321993, "loss": 0.0664, "theoretical_loss": 3.392355723156523, "tokens_seen": 2543583232 }, { "epoch": 0.54, "learning_rate": 0.00023148519618069487, "loss": 0.0699, "theoretical_loss": 3.3923419444795795, "tokens_seen": 2543714304 }, { "epoch": 0.54, "learning_rate": 0.0002314450774291904, "loss": 0.063, "theoretical_loss": 3.392328166711387, "tokens_seen": 2543845376 }, { "epoch": 0.54, "learning_rate": 0.00023140495867768595, "loss": 0.0643, "theoretical_loss": 3.392314389851838, "tokens_seen": 2543976448 }, { "epoch": 0.54, "learning_rate": 0.00023136483992618152, "loss": 0.0664, "theoretical_loss": 3.3923006139008267, "tokens_seen": 2544107520 }, { "epoch": 0.54, "learning_rate": 0.00023132472117467703, "loss": 0.0649, "theoretical_loss": 3.392286838858246, "tokens_seen": 2544238592 }, { "epoch": 0.54, "learning_rate": 0.0002312846024231726, "loss": 0.0708, "theoretical_loss": 3.3922730647239887, "tokens_seen": 2544369664 }, { "epoch": 0.54, "objective/train/advantage_avg": -0.0005285614170134068, "objective/train/docs_used": 925876, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3342156410217285, "objective/train/original_loss": 1.334215521812439, "objective/train/theoretical_loss": 3.3922592914979486, "objective/train/tokens_used": 915025376, "objective/train/value_avg": -0.007610321044921875, "objective/train/value_loss": 0.00041393956053070724, "objective/train/value_max": -5.1021575927734375e-05, "objective/train/value_min": -0.763671875, "objective/train/value_reward_corr": 0.7526650429168105, "objective/train/value_std": 0.0227508544921875, "objective/train/weight_avg": 0.9996600151062012, "objective/train/weighted_lm_loss": 1.332885980606079, "objective/train/weights_max": 1.6096839904785156, "objective/train/weights_min": 0.3735102415084839, "theoretical_loss": 3.3922592914979486, "tokens_seen": 2544500736 }, { "epoch": 0.54, "learning_rate": 0.00023124448367166814, "loss": 0.0628, "theoretical_loss": 3.3922592914979486, "tokens_seen": 2544500736 }, { "epoch": 0.54, "learning_rate": 0.00023120436492016368, "loss": 0.0653, "theoretical_loss": 3.392245519180019, "tokens_seen": 2544631808 }, { "epoch": 0.54, "learning_rate": 0.00023116424616865925, "loss": 0.0646, "theoretical_loss": 3.392231747770093, "tokens_seen": 2544762880 }, { "epoch": 0.54, "learning_rate": 0.00023112412741715477, "loss": 0.0637, "theoretical_loss": 3.3922179772680643, "tokens_seen": 2544893952 }, { "epoch": 0.54, "learning_rate": 0.00023108400866565034, "loss": 0.0637, "theoretical_loss": 3.392204207673826, "tokens_seen": 2545025024 }, { "epoch": 0.54, "learning_rate": 0.00023104388991414588, "loss": 0.0609, "theoretical_loss": 3.392190438987272, "tokens_seen": 2545156096 }, { "epoch": 0.54, "learning_rate": 0.00023100377116264142, "loss": 0.0623, "theoretical_loss": 3.3921766712082952, "tokens_seen": 2545287168 }, { "epoch": 0.54, "learning_rate": 0.000230963652411137, "loss": 0.0664, "theoretical_loss": 3.39216290433679, "tokens_seen": 2545418240 }, { "epoch": 0.54, "learning_rate": 0.0002309235336596325, "loss": 0.0669, "theoretical_loss": 3.3921491383726483, "tokens_seen": 2545549312 }, { "epoch": 0.54, "learning_rate": 0.00023088341490812807, "loss": 0.0618, "theoretical_loss": 3.392135373315764, "tokens_seen": 2545680384 }, { "epoch": 0.54, "learning_rate": 0.0002308432961566236, "loss": 0.0627, "theoretical_loss": 3.392121609166032, "tokens_seen": 2545811456 }, { "epoch": 0.54, "learning_rate": 0.00023080317740511915, "loss": 0.0666, "theoretical_loss": 3.392107845923344, "tokens_seen": 2545942528 }, { "epoch": 0.54, "learning_rate": 0.00023076305865361472, "loss": 0.0632, "theoretical_loss": 3.392094083587595, "tokens_seen": 2546073600 }, { "epoch": 0.54, "learning_rate": 0.00023072293990211023, "loss": 0.0639, "theoretical_loss": 3.3920803221586775, "tokens_seen": 2546204672 }, { "epoch": 0.54, "learning_rate": 0.0002306828211506058, "loss": 0.0651, "theoretical_loss": 3.392066561636485, "tokens_seen": 2546335744 }, { "epoch": 0.54, "learning_rate": 0.00023064270239910134, "loss": 0.0672, "theoretical_loss": 3.3920528020209124, "tokens_seen": 2546466816 }, { "epoch": 0.54, "learning_rate": 0.00023060258364759689, "loss": 0.0652, "theoretical_loss": 3.392039043311852, "tokens_seen": 2546597888 }, { "epoch": 0.54, "learning_rate": 0.00023056246489609245, "loss": 0.0618, "theoretical_loss": 3.392025285509198, "tokens_seen": 2546728960 }, { "epoch": 0.54, "learning_rate": 0.00023052234614458797, "loss": 0.0649, "theoretical_loss": 3.392011528612844, "tokens_seen": 2546860032 }, { "epoch": 0.54, "learning_rate": 0.00023048222739308354, "loss": 0.0648, "theoretical_loss": 3.391997772622683, "tokens_seen": 2546991104 }, { "epoch": 0.54, "learning_rate": 0.00023044210864157908, "loss": 0.0635, "theoretical_loss": 3.39198401753861, "tokens_seen": 2547122176 }, { "epoch": 0.54, "learning_rate": 0.00023040198989007462, "loss": 0.062, "theoretical_loss": 3.3919702633605175, "tokens_seen": 2547253248 }, { "epoch": 0.54, "learning_rate": 0.0002303618711385702, "loss": 0.0654, "theoretical_loss": 3.3919565100883, "tokens_seen": 2547384320 }, { "epoch": 0.54, "learning_rate": 0.0002303217523870657, "loss": 0.0638, "theoretical_loss": 3.391942757721851, "tokens_seen": 2547515392 }, { "epoch": 0.54, "learning_rate": 0.00023028163363556127, "loss": 0.0668, "theoretical_loss": 3.3919290062610634, "tokens_seen": 2547646464 }, { "epoch": 0.54, "objective/train/advantage_avg": 0.0006259710062295198, "objective/train/docs_used": 927107, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2056567668914795, "objective/train/original_loss": 1.2056565284729004, "objective/train/theoretical_loss": 3.3919152557058325, "objective/train/tokens_used": 918302176, "objective/train/value_avg": -0.01270294189453125, "objective/train/value_loss": 0.00021674968593288213, "objective/train/value_max": -5.9664249420166016e-05, "objective/train/value_min": -0.39794921875, "objective/train/value_reward_corr": 0.8921793744542696, "objective/train/value_std": 0.0303955078125, "objective/train/weight_avg": 1.0007325410842896, "objective/train/weighted_lm_loss": 1.20606529712677, "objective/train/weights_max": 1.1620044708251953, "objective/train/weights_min": 0.750941276550293, "theoretical_loss": 3.3919152557058325, "tokens_seen": 2547777536 }, { "epoch": 0.54, "learning_rate": 0.0002302415148840568, "loss": 0.0633, "theoretical_loss": 3.3919152557058325, "tokens_seen": 2547777536 }, { "epoch": 0.54, "learning_rate": 0.00023020139613255235, "loss": 0.0628, "theoretical_loss": 3.3919015060560507, "tokens_seen": 2547908608 }, { "epoch": 0.54, "learning_rate": 0.00023016127738104792, "loss": 0.0669, "theoretical_loss": 3.3918877573116126, "tokens_seen": 2548039680 }, { "epoch": 0.54, "learning_rate": 0.00023012115862954343, "loss": 0.0655, "theoretical_loss": 3.391874009472412, "tokens_seen": 2548170752 }, { "epoch": 0.54, "learning_rate": 0.000230081039878039, "loss": 0.0634, "theoretical_loss": 3.3918602625383425, "tokens_seen": 2548301824 }, { "epoch": 0.54, "learning_rate": 0.00023004092112653454, "loss": 0.0632, "theoretical_loss": 3.3918465165092977, "tokens_seen": 2548432896 }, { "epoch": 0.54, "learning_rate": 0.00023000080237503009, "loss": 0.0637, "theoretical_loss": 3.391832771385172, "tokens_seen": 2548563968 }, { "epoch": 0.54, "learning_rate": 0.00022996068362352565, "loss": 0.065, "theoretical_loss": 3.3918190271658593, "tokens_seen": 2548695040 }, { "epoch": 0.54, "learning_rate": 0.00022992056487202117, "loss": 0.0652, "theoretical_loss": 3.3918052838512533, "tokens_seen": 2548826112 }, { "epoch": 0.54, "learning_rate": 0.00022988044612051674, "loss": 0.0693, "theoretical_loss": 3.391791541441248, "tokens_seen": 2548957184 }, { "epoch": 0.54, "learning_rate": 0.00022984032736901228, "loss": 0.0636, "theoretical_loss": 3.3917777999357366, "tokens_seen": 2549088256 }, { "epoch": 0.54, "learning_rate": 0.00022980020861750785, "loss": 0.0662, "theoretical_loss": 3.3917640593346143, "tokens_seen": 2549219328 }, { "epoch": 0.55, "learning_rate": 0.0002297600898660034, "loss": 0.0651, "theoretical_loss": 3.3917503196377745, "tokens_seen": 2549350400 }, { "epoch": 0.55, "learning_rate": 0.0002297199711144989, "loss": 0.0631, "theoretical_loss": 3.391736580845111, "tokens_seen": 2549481472 }, { "epoch": 0.55, "learning_rate": 0.00022967985236299447, "loss": 0.0629, "theoretical_loss": 3.391722842956518, "tokens_seen": 2549612544 }, { "epoch": 0.55, "learning_rate": 0.00022963973361149, "loss": 0.0636, "theoretical_loss": 3.3917091059718896, "tokens_seen": 2549743616 }, { "epoch": 0.55, "learning_rate": 0.00022959961485998558, "loss": 0.0672, "theoretical_loss": 3.3916953698911203, "tokens_seen": 2549874688 }, { "epoch": 0.55, "learning_rate": 0.00022955949610848112, "loss": 0.0664, "theoretical_loss": 3.391681634714103, "tokens_seen": 2550005760 }, { "epoch": 0.55, "learning_rate": 0.00022951937735697664, "loss": 0.0652, "theoretical_loss": 3.391667900440733, "tokens_seen": 2550136832 }, { "epoch": 0.55, "learning_rate": 0.0002294792586054722, "loss": 0.0654, "theoretical_loss": 3.3916541670709037, "tokens_seen": 2550267904 }, { "epoch": 0.55, "learning_rate": 0.00022943913985396775, "loss": 0.0634, "theoretical_loss": 3.3916404346045095, "tokens_seen": 2550398976 }, { "epoch": 0.55, "learning_rate": 0.0002293990211024633, "loss": 0.0682, "theoretical_loss": 3.391626703041444, "tokens_seen": 2550530048 }, { "epoch": 0.55, "learning_rate": 0.00022935890235095885, "loss": 0.0649, "theoretical_loss": 3.3916129723816026, "tokens_seen": 2550661120 }, { "epoch": 0.55, "learning_rate": 0.00022931878359945437, "loss": 0.0637, "theoretical_loss": 3.3915992426248787, "tokens_seen": 2550792192 }, { "epoch": 0.55, "learning_rate": 0.00022927866484794994, "loss": 0.0616, "theoretical_loss": 3.3915855137711657, "tokens_seen": 2550923264 }, { "epoch": 0.55, "objective/train/advantage_avg": 0.0006415789248421788, "objective/train/docs_used": 928186, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.1912511587142944, "objective/train/original_loss": 1.1912510395050049, "objective/train/theoretical_loss": 3.391571785820359, "objective/train/tokens_used": 921578976, "objective/train/value_avg": -0.0092926025390625, "objective/train/value_loss": 0.0002809008292388171, "objective/train/value_max": -2.2470951080322266e-05, "objective/train/value_min": -0.89990234375, "objective/train/value_reward_corr": 0.8083665287531266, "objective/train/value_std": 0.02294921875, "objective/train/weight_avg": 1.0007661581039429, "objective/train/weighted_lm_loss": 1.1920908689498901, "objective/train/weights_max": 1.8810609579086304, "objective/train/weights_min": 0.37109869718551636, "theoretical_loss": 3.391571785820359, "tokens_seen": 2551054336 }, { "epoch": 0.55, "learning_rate": 0.00022923854609644548, "loss": 0.0634, "theoretical_loss": 3.391571785820359, "tokens_seen": 2551054336 }, { "epoch": 0.55, "learning_rate": 0.00022919842734494105, "loss": 0.0658, "theoretical_loss": 3.3915580587723526, "tokens_seen": 2551185408 }, { "epoch": 0.55, "learning_rate": 0.0002291583085934366, "loss": 0.0598, "theoretical_loss": 3.3915443326270407, "tokens_seen": 2551316480 }, { "epoch": 0.55, "learning_rate": 0.0002291181898419321, "loss": 0.065, "theoretical_loss": 3.391530607384317, "tokens_seen": 2551447552 }, { "epoch": 0.55, "learning_rate": 0.00022907807109042767, "loss": 0.0644, "theoretical_loss": 3.3915168830440767, "tokens_seen": 2551578624 }, { "epoch": 0.55, "learning_rate": 0.0002290379523389232, "loss": 0.0665, "theoretical_loss": 3.3915031596062133, "tokens_seen": 2551709696 }, { "epoch": 0.55, "learning_rate": 0.00022899783358741878, "loss": 0.0657, "theoretical_loss": 3.3914894370706214, "tokens_seen": 2551840768 }, { "epoch": 0.55, "learning_rate": 0.00022895771483591432, "loss": 0.0662, "theoretical_loss": 3.391475715437196, "tokens_seen": 2551971840 }, { "epoch": 0.55, "learning_rate": 0.00022891759608440984, "loss": 0.067, "theoretical_loss": 3.3914619947058307, "tokens_seen": 2552102912 }, { "epoch": 0.55, "learning_rate": 0.0002288774773329054, "loss": 0.065, "theoretical_loss": 3.39144827487642, "tokens_seen": 2552233984 }, { "epoch": 0.55, "learning_rate": 0.00022883735858140095, "loss": 0.0656, "theoretical_loss": 3.391434555948858, "tokens_seen": 2552365056 }, { "epoch": 0.55, "learning_rate": 0.00022879723982989651, "loss": 0.0617, "theoretical_loss": 3.3914208379230395, "tokens_seen": 2552496128 }, { "epoch": 0.55, "learning_rate": 0.00022875712107839206, "loss": 0.0643, "theoretical_loss": 3.391407120798859, "tokens_seen": 2552627200 }, { "epoch": 0.55, "learning_rate": 0.00022871700232688757, "loss": 0.0658, "theoretical_loss": 3.3913934045762106, "tokens_seen": 2552758272 }, { "epoch": 0.55, "learning_rate": 0.00022867688357538314, "loss": 0.0654, "theoretical_loss": 3.3913796892549897, "tokens_seen": 2552889344 }, { "epoch": 0.55, "learning_rate": 0.00022863676482387868, "loss": 0.064, "theoretical_loss": 3.3913659748350895, "tokens_seen": 2553020416 }, { "epoch": 0.55, "learning_rate": 0.00022859664607237425, "loss": 0.0593, "theoretical_loss": 3.3913522613164053, "tokens_seen": 2553151488 }, { "epoch": 0.55, "learning_rate": 0.0002285565273208698, "loss": 0.0656, "theoretical_loss": 3.391338548698831, "tokens_seen": 2553282560 }, { "epoch": 0.55, "learning_rate": 0.0002285164085693653, "loss": 0.061, "theoretical_loss": 3.3913248369822617, "tokens_seen": 2553413632 }, { "epoch": 0.55, "learning_rate": 0.00022847628981786087, "loss": 0.0654, "theoretical_loss": 3.391311126166592, "tokens_seen": 2553544704 }, { "epoch": 0.55, "learning_rate": 0.0002284361710663564, "loss": 0.0628, "theoretical_loss": 3.391297416251716, "tokens_seen": 2553675776 }, { "epoch": 0.55, "learning_rate": 0.00022839605231485198, "loss": 0.0627, "theoretical_loss": 3.3912837072375286, "tokens_seen": 2553806848 }, { "epoch": 0.55, "learning_rate": 0.00022835593356334752, "loss": 0.0648, "theoretical_loss": 3.3912699991239244, "tokens_seen": 2553937920 }, { "epoch": 0.55, "learning_rate": 0.00022831581481184304, "loss": 0.0671, "theoretical_loss": 3.391256291910798, "tokens_seen": 2554068992 }, { "epoch": 0.55, "learning_rate": 0.0002282756960603386, "loss": 0.0643, "theoretical_loss": 3.391242585598044, "tokens_seen": 2554200064 }, { "epoch": 0.55, "objective/train/advantage_avg": 0.0008497779490426183, "objective/train/docs_used": 929369, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3482364416122437, "objective/train/original_loss": 1.348236322402954, "objective/train/theoretical_loss": 3.391228880185557, "objective/train/tokens_used": 924855776, "objective/train/value_avg": -0.005992889404296875, "objective/train/value_loss": 0.00013956836482975632, "objective/train/value_max": -3.451108932495117e-05, "objective/train/value_min": -0.3056640625, "objective/train/value_reward_corr": 0.681539471612262, "objective/train/value_std": 0.010345458984375, "objective/train/weight_avg": 1.0009145736694336, "objective/train/weighted_lm_loss": 1.3494898080825806, "objective/train/weights_max": 1.1620044708251953, "objective/train/weights_min": 0.38063594698905945, "theoretical_loss": 3.391228880185557, "tokens_seen": 2554331136 }, { "epoch": 0.55, "learning_rate": 0.00022823557730883415, "loss": 0.067, "theoretical_loss": 3.391228880185557, "tokens_seen": 2554331136 }, { "epoch": 0.55, "learning_rate": 0.00022819545855732971, "loss": 0.0614, "theoretical_loss": 3.3912151756732314, "tokens_seen": 2554462208 }, { "epoch": 0.55, "learning_rate": 0.00022815533980582526, "loss": 0.0662, "theoretical_loss": 3.3912014720609625, "tokens_seen": 2554593280 }, { "epoch": 0.55, "learning_rate": 0.00022811522105432077, "loss": 0.0648, "theoretical_loss": 3.3911877693486447, "tokens_seen": 2554724352 }, { "epoch": 0.55, "learning_rate": 0.00022807510230281634, "loss": 0.0641, "theoretical_loss": 3.391174067536173, "tokens_seen": 2554855424 }, { "epoch": 0.55, "learning_rate": 0.00022803498355131188, "loss": 0.0663, "theoretical_loss": 3.391160366623442, "tokens_seen": 2554986496 }, { "epoch": 0.55, "learning_rate": 0.00022799486479980745, "loss": 0.0611, "theoretical_loss": 3.391146666610346, "tokens_seen": 2555117568 }, { "epoch": 0.55, "learning_rate": 0.000227954746048303, "loss": 0.0671, "theoretical_loss": 3.3911329674967803, "tokens_seen": 2555248640 }, { "epoch": 0.55, "learning_rate": 0.0002279146272967985, "loss": 0.0622, "theoretical_loss": 3.3911192692826395, "tokens_seen": 2555379712 }, { "epoch": 0.55, "learning_rate": 0.00022787450854529407, "loss": 0.0666, "theoretical_loss": 3.3911055719678185, "tokens_seen": 2555510784 }, { "epoch": 0.55, "learning_rate": 0.0002278343897937896, "loss": 0.0655, "theoretical_loss": 3.3910918755522124, "tokens_seen": 2555641856 }, { "epoch": 0.55, "learning_rate": 0.00022779427104228518, "loss": 0.0655, "theoretical_loss": 3.3910781800357155, "tokens_seen": 2555772928 }, { "epoch": 0.55, "learning_rate": 0.00022775415229078072, "loss": 0.0687, "theoretical_loss": 3.391064485418223, "tokens_seen": 2555904000 }, { "epoch": 0.55, "learning_rate": 0.00022771403353927624, "loss": 0.0679, "theoretical_loss": 3.3910507916996298, "tokens_seen": 2556035072 }, { "epoch": 0.55, "learning_rate": 0.0002276739147877718, "loss": 0.0642, "theoretical_loss": 3.3910370988798304, "tokens_seen": 2556166144 }, { "epoch": 0.55, "learning_rate": 0.00022763379603626735, "loss": 0.0685, "theoretical_loss": 3.3910234069587206, "tokens_seen": 2556297216 }, { "epoch": 0.55, "learning_rate": 0.00022759367728476292, "loss": 0.0662, "theoretical_loss": 3.3910097159361943, "tokens_seen": 2556428288 }, { "epoch": 0.55, "learning_rate": 0.00022755355853325846, "loss": 0.0679, "theoretical_loss": 3.3909960258121474, "tokens_seen": 2556559360 }, { "epoch": 0.55, "learning_rate": 0.00022751343978175397, "loss": 0.0628, "theoretical_loss": 3.3909823365864744, "tokens_seen": 2556690432 }, { "epoch": 0.55, "learning_rate": 0.00022747332103024954, "loss": 0.0652, "theoretical_loss": 3.39096864825907, "tokens_seen": 2556821504 }, { "epoch": 0.55, "learning_rate": 0.00022743320227874508, "loss": 0.0677, "theoretical_loss": 3.3909549608298297, "tokens_seen": 2556952576 }, { "epoch": 0.55, "learning_rate": 0.00022739308352724065, "loss": 0.0663, "theoretical_loss": 3.3909412742986484, "tokens_seen": 2557083648 }, { "epoch": 0.55, "learning_rate": 0.0002273529647757362, "loss": 0.0639, "theoretical_loss": 3.3909275886654213, "tokens_seen": 2557214720 }, { "epoch": 0.55, "learning_rate": 0.0002273128460242317, "loss": 0.0651, "theoretical_loss": 3.390913903930043, "tokens_seen": 2557345792 }, { "epoch": 0.55, "learning_rate": 0.00022727272727272727, "loss": 0.0612, "theoretical_loss": 3.3909002200924094, "tokens_seen": 2557476864 }, { "epoch": 0.55, "objective/train/advantage_avg": -0.00020254334958735853, "objective/train/docs_used": 930481, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2775288820266724, "objective/train/original_loss": 1.2775287628173828, "objective/train/theoretical_loss": 3.3908865371524146, "objective/train/tokens_used": 928132576, "objective/train/value_avg": -0.005870819091796875, "objective/train/value_loss": 0.00014209412620402873, "objective/train/value_max": -3.820657730102539e-05, "objective/train/value_min": -0.3427734375, "objective/train/value_reward_corr": 0.6432521225379171, "objective/train/value_std": 0.010040283203125, "objective/train/weight_avg": 0.9998666644096375, "objective/train/weighted_lm_loss": 1.2778691053390503, "objective/train/weights_max": 1.1847256422042847, "objective/train/weights_min": 0.6827737092971802, "theoretical_loss": 3.3908865371524146, "tokens_seen": 2557607936 }, { "epoch": 0.55, "learning_rate": 0.00022723260852122281, "loss": 0.0649, "theoretical_loss": 3.3908865371524146, "tokens_seen": 2557607936 }, { "epoch": 0.55, "learning_rate": 0.00022719248976971838, "loss": 0.0636, "theoretical_loss": 3.3908728551099543, "tokens_seen": 2557739008 }, { "epoch": 0.55, "learning_rate": 0.00022715237101821392, "loss": 0.0606, "theoretical_loss": 3.3908591739649236, "tokens_seen": 2557870080 }, { "epoch": 0.55, "learning_rate": 0.00022711225226670946, "loss": 0.0638, "theoretical_loss": 3.390845493717218, "tokens_seen": 2558001152 }, { "epoch": 0.55, "learning_rate": 0.000227072133515205, "loss": 0.0619, "theoretical_loss": 3.3908318143667318, "tokens_seen": 2558132224 }, { "epoch": 0.55, "learning_rate": 0.00022703201476370055, "loss": 0.0603, "theoretical_loss": 3.3908181359133605, "tokens_seen": 2558263296 }, { "epoch": 0.55, "learning_rate": 0.00022699189601219612, "loss": 0.0668, "theoretical_loss": 3.3908044583569996, "tokens_seen": 2558394368 }, { "epoch": 0.55, "learning_rate": 0.00022695177726069166, "loss": 0.0626, "theoretical_loss": 3.390790781697544, "tokens_seen": 2558525440 }, { "epoch": 0.55, "learning_rate": 0.0002269116585091872, "loss": 0.0672, "theoretical_loss": 3.3907771059348897, "tokens_seen": 2558656512 }, { "epoch": 0.55, "learning_rate": 0.00022687153975768274, "loss": 0.0634, "theoretical_loss": 3.390763431068931, "tokens_seen": 2558787584 }, { "epoch": 0.55, "learning_rate": 0.00022683142100617828, "loss": 0.0656, "theoretical_loss": 3.3907497570995635, "tokens_seen": 2558918656 }, { "epoch": 0.55, "learning_rate": 0.00022679130225467385, "loss": 0.0643, "theoretical_loss": 3.390736084026683, "tokens_seen": 2559049728 }, { "epoch": 0.55, "learning_rate": 0.0002267511835031694, "loss": 0.063, "theoretical_loss": 3.390722411850184, "tokens_seen": 2559180800 }, { "epoch": 0.55, "learning_rate": 0.00022671106475166493, "loss": 0.0643, "theoretical_loss": 3.390708740569962, "tokens_seen": 2559311872 }, { "epoch": 0.55, "learning_rate": 0.00022667094600016047, "loss": 0.0625, "theoretical_loss": 3.390695070185913, "tokens_seen": 2559442944 }, { "epoch": 0.55, "learning_rate": 0.00022663082724865601, "loss": 0.0623, "theoretical_loss": 3.3906814006979316, "tokens_seen": 2559574016 }, { "epoch": 0.55, "learning_rate": 0.00022659070849715158, "loss": 0.0659, "theoretical_loss": 3.3906677321059133, "tokens_seen": 2559705088 }, { "epoch": 0.55, "learning_rate": 0.00022655058974564712, "loss": 0.0629, "theoretical_loss": 3.390654064409754, "tokens_seen": 2559836160 }, { "epoch": 0.55, "learning_rate": 0.00022651047099414267, "loss": 0.0641, "theoretical_loss": 3.390640397609349, "tokens_seen": 2559967232 }, { "epoch": 0.55, "learning_rate": 0.0002264703522426382, "loss": 0.0646, "theoretical_loss": 3.3906267317045935, "tokens_seen": 2560098304 }, { "epoch": 0.55, "learning_rate": 0.00022643023349113375, "loss": 0.0633, "theoretical_loss": 3.3906130666953826, "tokens_seen": 2560229376 }, { "epoch": 0.55, "learning_rate": 0.00022639011473962932, "loss": 0.0664, "theoretical_loss": 3.3905994025816124, "tokens_seen": 2560360448 }, { "epoch": 0.55, "learning_rate": 0.00022634999598812486, "loss": 0.064, "theoretical_loss": 3.3905857393631784, "tokens_seen": 2560491520 }, { "epoch": 0.55, "learning_rate": 0.0002263098772366204, "loss": 0.0644, "theoretical_loss": 3.3905720770399754, "tokens_seen": 2560622592 }, { "epoch": 0.55, "learning_rate": 0.00022626975848511594, "loss": 0.0631, "theoretical_loss": 3.3905584156118995, "tokens_seen": 2560753664 }, { "epoch": 0.55, "objective/train/advantage_avg": 0.0007230488699860871, "objective/train/docs_used": 931622, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.381005883216858, "objective/train/original_loss": 1.3810060024261475, "objective/train/theoretical_loss": 3.3905447550788463, "objective/train/tokens_used": 931409376, "objective/train/value_avg": -0.00464630126953125, "objective/train/value_loss": 0.00015776905638631433, "objective/train/value_max": -3.790855407714844e-05, "objective/train/value_min": -0.423583984375, "objective/train/value_reward_corr": 0.5464185679420336, "objective/train/value_std": 0.0084381103515625, "objective/train/weight_avg": 1.000791072845459, "objective/train/weighted_lm_loss": 1.3822040557861328, "objective/train/weights_max": 1.4675347805023193, "objective/train/weights_min": 0.38121721148490906, "theoretical_loss": 3.3905447550788463, "tokens_seen": 2560884736 }, { "epoch": 0.55, "learning_rate": 0.00022622963973361148, "loss": 0.0662, "theoretical_loss": 3.3905447550788463, "tokens_seen": 2560884736 }, { "epoch": 0.55, "learning_rate": 0.00022618952098210705, "loss": 0.068, "theoretical_loss": 3.3905310954407115, "tokens_seen": 2561015808 }, { "epoch": 0.55, "learning_rate": 0.0002261494022306026, "loss": 0.0639, "theoretical_loss": 3.39051743669739, "tokens_seen": 2561146880 }, { "epoch": 0.55, "learning_rate": 0.00022610928347909816, "loss": 0.0591, "theoretical_loss": 3.3905037788487773, "tokens_seen": 2561277952 }, { "epoch": 0.55, "learning_rate": 0.00022606916472759367, "loss": 0.0632, "theoretical_loss": 3.39049012189477, "tokens_seen": 2561409024 }, { "epoch": 0.55, "learning_rate": 0.00022602904597608921, "loss": 0.0688, "theoretical_loss": 3.3904764658352633, "tokens_seen": 2561540096 }, { "epoch": 0.55, "learning_rate": 0.00022598892722458478, "loss": 0.0618, "theoretical_loss": 3.3904628106701526, "tokens_seen": 2561671168 }, { "epoch": 0.55, "learning_rate": 0.00022594880847308032, "loss": 0.0637, "theoretical_loss": 3.3904491563993338, "tokens_seen": 2561802240 }, { "epoch": 0.55, "learning_rate": 0.0002259086897215759, "loss": 0.0631, "theoretical_loss": 3.3904355030227022, "tokens_seen": 2561933312 }, { "epoch": 0.55, "learning_rate": 0.0002258685709700714, "loss": 0.065, "theoretical_loss": 3.3904218505401538, "tokens_seen": 2562064384 }, { "epoch": 0.55, "learning_rate": 0.00022582845221856695, "loss": 0.0602, "theoretical_loss": 3.3904081989515844, "tokens_seen": 2562195456 }, { "epoch": 0.55, "learning_rate": 0.00022578833346706252, "loss": 0.0674, "theoretical_loss": 3.39039454825689, "tokens_seen": 2562326528 }, { "epoch": 0.55, "learning_rate": 0.00022574821471555806, "loss": 0.0629, "theoretical_loss": 3.390380898455965, "tokens_seen": 2562457600 }, { "epoch": 0.55, "learning_rate": 0.00022570809596405363, "loss": 0.0671, "theoretical_loss": 3.3903672495487074, "tokens_seen": 2562588672 }, { "epoch": 0.55, "learning_rate": 0.00022566797721254914, "loss": 0.0661, "theoretical_loss": 3.390353601535011, "tokens_seen": 2562719744 }, { "epoch": 0.55, "learning_rate": 0.00022562785846104468, "loss": 0.0636, "theoretical_loss": 3.390339954414772, "tokens_seen": 2562850816 }, { "epoch": 0.55, "learning_rate": 0.00022558773970954025, "loss": 0.0653, "theoretical_loss": 3.3903263081878876, "tokens_seen": 2562981888 }, { "epoch": 0.55, "learning_rate": 0.0002255476209580358, "loss": 0.0649, "theoretical_loss": 3.3903126628542517, "tokens_seen": 2563112960 }, { "epoch": 0.55, "learning_rate": 0.00022550750220653136, "loss": 0.0628, "theoretical_loss": 3.390299018413762, "tokens_seen": 2563244032 }, { "epoch": 0.55, "learning_rate": 0.00022546738345502687, "loss": 0.0666, "theoretical_loss": 3.390285374866312, "tokens_seen": 2563375104 }, { "epoch": 0.55, "learning_rate": 0.00022542726470352242, "loss": 0.0679, "theoretical_loss": 3.3902717322118, "tokens_seen": 2563506176 }, { "epoch": 0.55, "learning_rate": 0.00022538714595201798, "loss": 0.0632, "theoretical_loss": 3.390258090450121, "tokens_seen": 2563637248 }, { "epoch": 0.55, "learning_rate": 0.00022534702720051353, "loss": 0.0652, "theoretical_loss": 3.3902444495811705, "tokens_seen": 2563768320 }, { "epoch": 0.55, "learning_rate": 0.0002253069084490091, "loss": 0.0639, "theoretical_loss": 3.390230809604845, "tokens_seen": 2563899392 }, { "epoch": 0.55, "learning_rate": 0.0002252667896975046, "loss": 0.0633, "theoretical_loss": 3.3902171705210398, "tokens_seen": 2564030464 }, { "epoch": 0.55, "objective/train/advantage_avg": 0.0008711852715350688, "objective/train/docs_used": 932851, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.246869444847107, "objective/train/original_loss": 1.2468695640563965, "objective/train/theoretical_loss": 3.390203532329652, "objective/train/tokens_used": 934686176, "objective/train/value_avg": -0.0095977783203125, "objective/train/value_loss": 0.0006135719013400376, "objective/train/value_max": -2.6881694793701172e-05, "objective/train/value_min": -0.91796875, "objective/train/value_reward_corr": 0.7025568790437389, "objective/train/value_std": 0.0244903564453125, "objective/train/weight_avg": 1.0011435747146606, "objective/train/weighted_lm_loss": 1.2476946115493774, "objective/train/weights_max": 2.028170347213745, "objective/train/weights_min": 0.3750951290130615, "theoretical_loss": 3.390203532329652, "tokens_seen": 2564161536 }, { "epoch": 0.55, "learning_rate": 0.00022522667094600015, "loss": 0.0626, "theoretical_loss": 3.390203532329652, "tokens_seen": 2564161536 }, { "epoch": 0.55, "learning_rate": 0.00022518655219449572, "loss": 0.0664, "theoretical_loss": 3.390189895030576, "tokens_seen": 2564292608 }, { "epoch": 0.55, "learning_rate": 0.00022514643344299126, "loss": 0.0625, "theoretical_loss": 3.39017625862371, "tokens_seen": 2564423680 }, { "epoch": 0.55, "learning_rate": 0.00022510631469148683, "loss": 0.0665, "theoretical_loss": 3.390162623108948, "tokens_seen": 2564554752 }, { "epoch": 0.55, "learning_rate": 0.00022506619593998234, "loss": 0.0693, "theoretical_loss": 3.390148988486187, "tokens_seen": 2564685824 }, { "epoch": 0.55, "learning_rate": 0.00022502607718847788, "loss": 0.0638, "theoretical_loss": 3.390135354755323, "tokens_seen": 2564816896 }, { "epoch": 0.55, "learning_rate": 0.00022498595843697345, "loss": 0.0644, "theoretical_loss": 3.3901217219162514, "tokens_seen": 2564947968 }, { "epoch": 0.55, "learning_rate": 0.000224945839685469, "loss": 0.0685, "theoretical_loss": 3.3901080899688694, "tokens_seen": 2565079040 }, { "epoch": 0.55, "learning_rate": 0.00022490572093396456, "loss": 0.0638, "theoretical_loss": 3.3900944589130724, "tokens_seen": 2565210112 }, { "epoch": 0.55, "learning_rate": 0.00022486560218246007, "loss": 0.0666, "theoretical_loss": 3.390080828748757, "tokens_seen": 2565341184 }, { "epoch": 0.55, "learning_rate": 0.00022482548343095564, "loss": 0.0634, "theoretical_loss": 3.3900671994758187, "tokens_seen": 2565472256 }, { "epoch": 0.55, "learning_rate": 0.00022478536467945118, "loss": 0.0683, "theoretical_loss": 3.390053571094154, "tokens_seen": 2565603328 }, { "epoch": 0.56, "learning_rate": 0.00022474524592794673, "loss": 0.0686, "theoretical_loss": 3.3900399436036595, "tokens_seen": 2565734400 }, { "epoch": 0.56, "learning_rate": 0.0002247051271764423, "loss": 0.066, "theoretical_loss": 3.3900263170042306, "tokens_seen": 2565865472 }, { "epoch": 0.56, "learning_rate": 0.0002246650084249378, "loss": 0.0679, "theoretical_loss": 3.390012691295764, "tokens_seen": 2565996544 }, { "epoch": 0.56, "learning_rate": 0.00022462488967343338, "loss": 0.0608, "theoretical_loss": 3.3899990664781563, "tokens_seen": 2566127616 }, { "epoch": 0.56, "learning_rate": 0.00022458477092192892, "loss": 0.0648, "theoretical_loss": 3.3899854425513034, "tokens_seen": 2566258688 }, { "epoch": 0.56, "learning_rate": 0.00022454465217042446, "loss": 0.068, "theoretical_loss": 3.389971819515101, "tokens_seen": 2566389760 }, { "epoch": 0.56, "learning_rate": 0.00022450453341892003, "loss": 0.0699, "theoretical_loss": 3.389958197369446, "tokens_seen": 2566520832 }, { "epoch": 0.56, "learning_rate": 0.00022446441466741554, "loss": 0.0627, "theoretical_loss": 3.389944576114235, "tokens_seen": 2566651904 }, { "epoch": 0.56, "learning_rate": 0.0002244242959159111, "loss": 0.063, "theoretical_loss": 3.3899309557493638, "tokens_seen": 2566782976 }, { "epoch": 0.56, "learning_rate": 0.00022438417716440665, "loss": 0.0661, "theoretical_loss": 3.3899173362747286, "tokens_seen": 2566914048 }, { "epoch": 0.56, "learning_rate": 0.0002243440584129022, "loss": 0.0612, "theoretical_loss": 3.3899037176902262, "tokens_seen": 2567045120 }, { "epoch": 0.56, "learning_rate": 0.00022430393966139776, "loss": 0.07, "theoretical_loss": 3.389890099995753, "tokens_seen": 2567176192 }, { "epoch": 0.56, "learning_rate": 0.00022426382090989328, "loss": 0.0631, "theoretical_loss": 3.3898764831912054, "tokens_seen": 2567307264 }, { "epoch": 0.56, "objective/train/advantage_avg": -0.002020818181335926, "objective/train/docs_used": 933971, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.1675312519073486, "objective/train/original_loss": 1.1675312519073486, "objective/train/theoretical_loss": 3.389862867276479, "objective/train/tokens_used": 937962976, "objective/train/value_avg": -0.0075836181640625, "objective/train/value_loss": 0.0004593984049279243, "objective/train/value_max": -1.6033649444580078e-05, "objective/train/value_min": -0.96142578125, "objective/train/value_reward_corr": 0.801184706334191, "objective/train/value_std": 0.019622802734375, "objective/train/weight_avg": 0.9981890916824341, "objective/train/weighted_lm_loss": 1.166338324546814, "objective/train/weights_max": 1.4960556030273438, "objective/train/weights_min": 0.4014799892902374, "theoretical_loss": 3.389862867276479, "tokens_seen": 2567438336 }, { "epoch": 0.56, "learning_rate": 0.00022422370215838884, "loss": 0.0669, "theoretical_loss": 3.389862867276479, "tokens_seen": 2567438336 }, { "epoch": 0.56, "learning_rate": 0.00022418358340688438, "loss": 0.066, "theoretical_loss": 3.389849252251471, "tokens_seen": 2567569408 }, { "epoch": 0.56, "learning_rate": 0.00022414346465537993, "loss": 0.0642, "theoretical_loss": 3.3898356381160784, "tokens_seen": 2567700480 }, { "epoch": 0.56, "learning_rate": 0.0002241033459038755, "loss": 0.064, "theoretical_loss": 3.3898220248701962, "tokens_seen": 2567831552 }, { "epoch": 0.56, "learning_rate": 0.000224063227152371, "loss": 0.0645, "theoretical_loss": 3.3898084125137222, "tokens_seen": 2567962624 }, { "epoch": 0.56, "learning_rate": 0.00022402310840086658, "loss": 0.0646, "theoretical_loss": 3.389794801046552, "tokens_seen": 2568093696 }, { "epoch": 0.56, "learning_rate": 0.00022398298964936212, "loss": 0.0656, "theoretical_loss": 3.389781190468583, "tokens_seen": 2568224768 }, { "epoch": 0.56, "learning_rate": 0.00022394287089785766, "loss": 0.0705, "theoretical_loss": 3.3897675807797105, "tokens_seen": 2568355840 }, { "epoch": 0.56, "learning_rate": 0.00022390275214635323, "loss": 0.0677, "theoretical_loss": 3.3897539719798324, "tokens_seen": 2568486912 }, { "epoch": 0.56, "learning_rate": 0.00022386263339484874, "loss": 0.0686, "theoretical_loss": 3.389740364068844, "tokens_seen": 2568617984 }, { "epoch": 0.56, "learning_rate": 0.0002238225146433443, "loss": 0.0644, "theoretical_loss": 3.389726757046643, "tokens_seen": 2568749056 }, { "epoch": 0.56, "learning_rate": 0.00022378239589183985, "loss": 0.0642, "theoretical_loss": 3.3897131509131255, "tokens_seen": 2568880128 }, { "epoch": 0.56, "learning_rate": 0.0002237422771403354, "loss": 0.0644, "theoretical_loss": 3.3896995456681878, "tokens_seen": 2569011200 }, { "epoch": 0.56, "learning_rate": 0.00022370215838883096, "loss": 0.0659, "theoretical_loss": 3.3896859413117273, "tokens_seen": 2569142272 }, { "epoch": 0.56, "learning_rate": 0.00022366203963732648, "loss": 0.0692, "theoretical_loss": 3.38967233784364, "tokens_seen": 2569273344 }, { "epoch": 0.56, "learning_rate": 0.00022362192088582204, "loss": 0.069, "theoretical_loss": 3.389658735263823, "tokens_seen": 2569404416 }, { "epoch": 0.56, "learning_rate": 0.00022358180213431759, "loss": 0.0706, "theoretical_loss": 3.3896451335721727, "tokens_seen": 2569535488 }, { "epoch": 0.56, "learning_rate": 0.00022354168338281313, "loss": 0.0705, "theoretical_loss": 3.3896315327685858, "tokens_seen": 2569666560 }, { "epoch": 0.56, "learning_rate": 0.0002235015646313087, "loss": 0.0695, "theoretical_loss": 3.3896179328529588, "tokens_seen": 2569797632 }, { "epoch": 0.56, "learning_rate": 0.0002234614458798042, "loss": 0.0628, "theoretical_loss": 3.389604333825189, "tokens_seen": 2569928704 }, { "epoch": 0.56, "learning_rate": 0.00022342132712829978, "loss": 0.0658, "theoretical_loss": 3.3895907356851733, "tokens_seen": 2570059776 }, { "epoch": 0.56, "learning_rate": 0.00022338120837679532, "loss": 0.067, "theoretical_loss": 3.389577138432808, "tokens_seen": 2570190848 }, { "epoch": 0.56, "learning_rate": 0.00022334108962529086, "loss": 0.0671, "theoretical_loss": 3.38956354206799, "tokens_seen": 2570321920 }, { "epoch": 0.56, "learning_rate": 0.00022330097087378643, "loss": 0.068, "theoretical_loss": 3.389549946590616, "tokens_seen": 2570452992 }, { "epoch": 0.56, "learning_rate": 0.00022326085212228194, "loss": 0.0651, "theoretical_loss": 3.389536352000583, "tokens_seen": 2570584064 }, { "epoch": 0.56, "objective/train/advantage_avg": 0.0002775892789941281, "objective/train/docs_used": 935107, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.045942783355713, "objective/train/original_loss": 1.0459426641464233, "objective/train/theoretical_loss": 3.3895227582977876, "objective/train/tokens_used": 941239776, "objective/train/value_avg": -0.005664825439453125, "objective/train/value_loss": 0.000174183544004336, "objective/train/value_max": -2.7954578399658203e-05, "objective/train/value_min": -0.2105712890625, "objective/train/value_reward_corr": 0.7046094101738285, "objective/train/value_std": 0.011627197265625, "objective/train/weight_avg": 1.0003536939620972, "objective/train/weighted_lm_loss": 1.0451602935791016, "objective/train/weights_max": 1.1320422887802124, "objective/train/weights_min": 0.3707548677921295, "theoretical_loss": 3.3895227582977876, "tokens_seen": 2570715136 }, { "epoch": 0.56, "learning_rate": 0.0002232207333707775, "loss": 0.063, "theoretical_loss": 3.3895227582977876, "tokens_seen": 2570715136 }, { "epoch": 0.56, "learning_rate": 0.00022318061461927305, "loss": 0.071, "theoretical_loss": 3.389509165482127, "tokens_seen": 2570846208 }, { "epoch": 0.56, "learning_rate": 0.0002231404958677686, "loss": 0.0664, "theoretical_loss": 3.3894955735534977, "tokens_seen": 2570977280 }, { "epoch": 0.56, "learning_rate": 0.00022310037711626416, "loss": 0.0671, "theoretical_loss": 3.389481982511797, "tokens_seen": 2571108352 }, { "epoch": 0.56, "learning_rate": 0.00022306025836475968, "loss": 0.0678, "theoretical_loss": 3.3894683923569215, "tokens_seen": 2571239424 }, { "epoch": 0.56, "learning_rate": 0.00022302013961325524, "loss": 0.0695, "theoretical_loss": 3.3894548030887686, "tokens_seen": 2571370496 }, { "epoch": 0.56, "learning_rate": 0.00022298002086175079, "loss": 0.0649, "theoretical_loss": 3.389441214707235, "tokens_seen": 2571501568 }, { "epoch": 0.56, "learning_rate": 0.00022293990211024633, "loss": 0.0671, "theoretical_loss": 3.3894276272122172, "tokens_seen": 2571632640 }, { "epoch": 0.56, "learning_rate": 0.0002228997833587419, "loss": 0.0688, "theoretical_loss": 3.3894140406036124, "tokens_seen": 2571763712 }, { "epoch": 0.56, "learning_rate": 0.0002228596646072374, "loss": 0.0702, "theoretical_loss": 3.389400454881318, "tokens_seen": 2571894784 }, { "epoch": 0.56, "learning_rate": 0.00022281954585573298, "loss": 0.0684, "theoretical_loss": 3.3893868700452314, "tokens_seen": 2572025856 }, { "epoch": 0.56, "learning_rate": 0.00022277942710422852, "loss": 0.0642, "theoretical_loss": 3.389373286095248, "tokens_seen": 2572156928 }, { "epoch": 0.56, "learning_rate": 0.00022273930835272406, "loss": 0.0647, "theoretical_loss": 3.3893597030312663, "tokens_seen": 2572288000 }, { "epoch": 0.56, "learning_rate": 0.00022269918960121963, "loss": 0.0696, "theoretical_loss": 3.389346120853183, "tokens_seen": 2572419072 }, { "epoch": 0.56, "learning_rate": 0.00022265907084971514, "loss": 0.0653, "theoretical_loss": 3.389332539560895, "tokens_seen": 2572550144 }, { "epoch": 0.56, "learning_rate": 0.0002226189520982107, "loss": 0.063, "theoretical_loss": 3.3893189591543, "tokens_seen": 2572681216 }, { "epoch": 0.56, "learning_rate": 0.00022257883334670625, "loss": 0.0637, "theoretical_loss": 3.389305379633294, "tokens_seen": 2572812288 }, { "epoch": 0.56, "learning_rate": 0.0002225387145952018, "loss": 0.0668, "theoretical_loss": 3.3892918009977753, "tokens_seen": 2572943360 }, { "epoch": 0.56, "learning_rate": 0.00022249859584369736, "loss": 0.0642, "theoretical_loss": 3.38927822324764, "tokens_seen": 2573074432 }, { "epoch": 0.56, "learning_rate": 0.00022245847709219288, "loss": 0.0676, "theoretical_loss": 3.3892646463827862, "tokens_seen": 2573205504 }, { "epoch": 0.56, "learning_rate": 0.00022241835834068845, "loss": 0.0621, "theoretical_loss": 3.3892510704031107, "tokens_seen": 2573336576 }, { "epoch": 0.56, "learning_rate": 0.000222378239589184, "loss": 0.0639, "theoretical_loss": 3.3892374953085107, "tokens_seen": 2573467648 }, { "epoch": 0.56, "learning_rate": 0.00022233812083767956, "loss": 0.0668, "theoretical_loss": 3.3892239210988837, "tokens_seen": 2573598720 }, { "epoch": 0.56, "learning_rate": 0.0002222980020861751, "loss": 0.0682, "theoretical_loss": 3.389210347774126, "tokens_seen": 2573729792 }, { "epoch": 0.56, "learning_rate": 0.0002222578833346706, "loss": 0.0655, "theoretical_loss": 3.389196775334136, "tokens_seen": 2573860864 }, { "epoch": 0.56, "objective/train/advantage_avg": -1.8636035747476853e-05, "objective/train/docs_used": 936262, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.401955485343933, "objective/train/original_loss": 1.4019553661346436, "objective/train/theoretical_loss": 3.3891832037788103, "objective/train/tokens_used": 944516576, "objective/train/value_avg": -0.007602691650390625, "objective/train/value_loss": 0.00038249429780989885, "objective/train/value_max": -2.3365020751953125e-05, "objective/train/value_min": -0.80615234375, "objective/train/value_reward_corr": 0.6702655760888474, "objective/train/value_std": 0.019378662109375, "objective/train/weight_avg": 1.0001580715179443, "objective/train/weighted_lm_loss": 1.4021517038345337, "objective/train/weights_max": 2.1182637214660645, "objective/train/weights_min": 0.3688187897205353, "theoretical_loss": 3.3891832037788103, "tokens_seen": 2573991936 }, { "epoch": 0.56, "learning_rate": 0.00022221776458316618, "loss": 0.0662, "theoretical_loss": 3.3891832037788103, "tokens_seen": 2573991936 }, { "epoch": 0.56, "learning_rate": 0.00022217764583166172, "loss": 0.0674, "theoretical_loss": 3.3891696331080468, "tokens_seen": 2574123008 }, { "epoch": 0.56, "learning_rate": 0.0002221375270801573, "loss": 0.0708, "theoretical_loss": 3.389156063321742, "tokens_seen": 2574254080 }, { "epoch": 0.56, "learning_rate": 0.00022209740832865283, "loss": 0.0677, "theoretical_loss": 3.3891424944197936, "tokens_seen": 2574385152 }, { "epoch": 0.56, "learning_rate": 0.00022205728957714834, "loss": 0.0666, "theoretical_loss": 3.3891289264020994, "tokens_seen": 2574516224 }, { "epoch": 0.56, "learning_rate": 0.0002220171708256439, "loss": 0.059, "theoretical_loss": 3.3891153592685557, "tokens_seen": 2574647296 }, { "epoch": 0.56, "learning_rate": 0.00022197705207413945, "loss": 0.0675, "theoretical_loss": 3.3891017930190612, "tokens_seen": 2574778368 }, { "epoch": 0.56, "learning_rate": 0.00022193693332263502, "loss": 0.0677, "theoretical_loss": 3.389088227653512, "tokens_seen": 2574909440 }, { "epoch": 0.56, "learning_rate": 0.00022189681457113056, "loss": 0.0667, "theoretical_loss": 3.3890746631718067, "tokens_seen": 2575040512 }, { "epoch": 0.56, "learning_rate": 0.00022185669581962608, "loss": 0.0667, "theoretical_loss": 3.389061099573842, "tokens_seen": 2575171584 }, { "epoch": 0.56, "learning_rate": 0.00022181657706812165, "loss": 0.0678, "theoretical_loss": 3.3890475368595157, "tokens_seen": 2575302656 }, { "epoch": 0.56, "learning_rate": 0.0002217764583166172, "loss": 0.071, "theoretical_loss": 3.3890339750287244, "tokens_seen": 2575433728 }, { "epoch": 0.56, "learning_rate": 0.00022173633956511276, "loss": 0.0688, "theoretical_loss": 3.389020414081367, "tokens_seen": 2575564800 }, { "epoch": 0.56, "learning_rate": 0.0002216962208136083, "loss": 0.0594, "theoretical_loss": 3.3890068540173397, "tokens_seen": 2575695872 }, { "epoch": 0.56, "learning_rate": 0.0002216561020621038, "loss": 0.0649, "theoretical_loss": 3.3889932948365407, "tokens_seen": 2575826944 }, { "epoch": 0.56, "learning_rate": 0.00022161598331059938, "loss": 0.0634, "theoretical_loss": 3.3889797365388676, "tokens_seen": 2575958016 }, { "epoch": 0.56, "learning_rate": 0.00022157586455909492, "loss": 0.0669, "theoretical_loss": 3.3889661791242176, "tokens_seen": 2576089088 }, { "epoch": 0.56, "learning_rate": 0.0002215357458075905, "loss": 0.0688, "theoretical_loss": 3.3889526225924884, "tokens_seen": 2576220160 }, { "epoch": 0.56, "learning_rate": 0.00022149562705608603, "loss": 0.0634, "theoretical_loss": 3.3889390669435775, "tokens_seen": 2576351232 }, { "epoch": 0.56, "learning_rate": 0.00022145550830458154, "loss": 0.0675, "theoretical_loss": 3.3889255121773827, "tokens_seen": 2576482304 }, { "epoch": 0.56, "learning_rate": 0.0002214153895530771, "loss": 0.0645, "theoretical_loss": 3.388911958293802, "tokens_seen": 2576613376 }, { "epoch": 0.56, "learning_rate": 0.00022137527080157265, "loss": 0.0646, "theoretical_loss": 3.388898405292732, "tokens_seen": 2576744448 }, { "epoch": 0.56, "learning_rate": 0.00022133515205006822, "loss": 0.0645, "theoretical_loss": 3.388884853174071, "tokens_seen": 2576875520 }, { "epoch": 0.56, "learning_rate": 0.00022129503329856376, "loss": 0.0683, "theoretical_loss": 3.3888713019377166, "tokens_seen": 2577006592 }, { "epoch": 0.56, "learning_rate": 0.00022125491454705928, "loss": 0.0672, "theoretical_loss": 3.3888577515835663, "tokens_seen": 2577137664 }, { "epoch": 0.56, "objective/train/advantage_avg": 0.0004765666089951992, "objective/train/docs_used": 937411, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.363476037979126, "objective/train/original_loss": 1.3634757995605469, "objective/train/theoretical_loss": 3.388844202111518, "objective/train/tokens_used": 947793376, "objective/train/value_avg": -0.00852203369140625, "objective/train/value_loss": 0.00023555447114631534, "objective/train/value_max": -3.11732292175293e-05, "objective/train/value_min": -0.360595703125, "objective/train/value_reward_corr": 0.6791777126117108, "objective/train/value_std": 0.0157928466796875, "objective/train/weight_avg": 1.0005837678909302, "objective/train/weighted_lm_loss": 1.3649492263793945, "objective/train/weights_max": 1.3762446641921997, "objective/train/weights_min": 0.38029924035072327, "theoretical_loss": 3.388844202111518, "tokens_seen": 2577268736 }, { "epoch": 0.56, "learning_rate": 0.00022121479579555485, "loss": 0.0665, "theoretical_loss": 3.388844202111518, "tokens_seen": 2577268736 }, { "epoch": 0.56, "learning_rate": 0.0002211746770440504, "loss": 0.0632, "theoretical_loss": 3.3888306535214694, "tokens_seen": 2577399808 }, { "epoch": 0.56, "learning_rate": 0.00022113455829254596, "loss": 0.0634, "theoretical_loss": 3.3888171058133185, "tokens_seen": 2577530880 }, { "epoch": 0.56, "learning_rate": 0.0002210944395410415, "loss": 0.0656, "theoretical_loss": 3.3888035589869627, "tokens_seen": 2577661952 }, { "epoch": 0.56, "learning_rate": 0.000221054320789537, "loss": 0.0662, "theoretical_loss": 3.3887900130422994, "tokens_seen": 2577793024 }, { "epoch": 0.56, "learning_rate": 0.00022101420203803258, "loss": 0.0628, "theoretical_loss": 3.3887764679792274, "tokens_seen": 2577924096 }, { "epoch": 0.56, "learning_rate": 0.00022097408328652812, "loss": 0.0689, "theoretical_loss": 3.3887629237976435, "tokens_seen": 2578055168 }, { "epoch": 0.56, "learning_rate": 0.0002209339645350237, "loss": 0.0688, "theoretical_loss": 3.3887493804974462, "tokens_seen": 2578186240 }, { "epoch": 0.56, "learning_rate": 0.00022089384578351923, "loss": 0.0679, "theoretical_loss": 3.3887358380785333, "tokens_seen": 2578317312 }, { "epoch": 0.56, "learning_rate": 0.00022085372703201475, "loss": 0.0694, "theoretical_loss": 3.388722296540802, "tokens_seen": 2578448384 }, { "epoch": 0.56, "learning_rate": 0.0002208136082805103, "loss": 0.064, "theoretical_loss": 3.388708755884151, "tokens_seen": 2578579456 }, { "epoch": 0.56, "learning_rate": 0.00022077348952900585, "loss": 0.0651, "theoretical_loss": 3.3886952161084776, "tokens_seen": 2578710528 }, { "epoch": 0.56, "learning_rate": 0.00022073337077750142, "loss": 0.0669, "theoretical_loss": 3.38868167721368, "tokens_seen": 2578841600 }, { "epoch": 0.56, "learning_rate": 0.00022069325202599696, "loss": 0.0643, "theoretical_loss": 3.3886681391996563, "tokens_seen": 2578972672 }, { "epoch": 0.56, "learning_rate": 0.00022065313327449248, "loss": 0.0641, "theoretical_loss": 3.3886546020663038, "tokens_seen": 2579103744 }, { "epoch": 0.56, "learning_rate": 0.00022061301452298805, "loss": 0.0654, "theoretical_loss": 3.3886410658135206, "tokens_seen": 2579234816 }, { "epoch": 0.56, "learning_rate": 0.0002205728957714836, "loss": 0.0688, "theoretical_loss": 3.3886275304412057, "tokens_seen": 2579365888 }, { "epoch": 0.56, "learning_rate": 0.00022053277701997916, "loss": 0.0704, "theoretical_loss": 3.388613995949256, "tokens_seen": 2579496960 }, { "epoch": 0.56, "learning_rate": 0.0002204926582684747, "loss": 0.0702, "theoretical_loss": 3.3886004623375694, "tokens_seen": 2579628032 }, { "epoch": 0.56, "learning_rate": 0.0002204525395169702, "loss": 0.0651, "theoretical_loss": 3.388586929606045, "tokens_seen": 2579759104 }, { "epoch": 0.56, "learning_rate": 0.00022041242076546578, "loss": 0.0639, "theoretical_loss": 3.38857339775458, "tokens_seen": 2579890176 }, { "epoch": 0.56, "learning_rate": 0.00022037230201396132, "loss": 0.063, "theoretical_loss": 3.3885598667830727, "tokens_seen": 2580021248 }, { "epoch": 0.56, "learning_rate": 0.0002203321832624569, "loss": 0.063, "theoretical_loss": 3.3885463366914212, "tokens_seen": 2580152320 }, { "epoch": 0.56, "learning_rate": 0.00022029206451095243, "loss": 0.064, "theoretical_loss": 3.3885328074795233, "tokens_seen": 2580283392 }, { "epoch": 0.56, "learning_rate": 0.00022025194575944795, "loss": 0.0645, "theoretical_loss": 3.3885192791472774, "tokens_seen": 2580414464 }, { "epoch": 0.56, "objective/train/advantage_avg": 0.00015258016355801374, "objective/train/docs_used": 938550, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3060342073440552, "objective/train/original_loss": 1.3060340881347656, "objective/train/theoretical_loss": 3.3885057516945816, "objective/train/tokens_used": 951070176, "objective/train/value_avg": -0.006145477294921875, "objective/train/value_loss": 0.0002665415231604129, "objective/train/value_max": -3.820657730102539e-05, "objective/train/value_min": -0.66650390625, "objective/train/value_reward_corr": 0.7103767321022983, "objective/train/value_std": 0.01555633544921875, "objective/train/weight_avg": 1.0002716779708862, "objective/train/weighted_lm_loss": 1.306409478187561, "objective/train/weights_max": 1.3725225925445557, "objective/train/weights_min": 0.370111882686615, "theoretical_loss": 3.3885057516945816, "tokens_seen": 2580545536 }, { "epoch": 0.56, "learning_rate": 0.00022021182700794351, "loss": 0.0635, "theoretical_loss": 3.3885057516945816, "tokens_seen": 2580545536 }, { "epoch": 0.56, "learning_rate": 0.00022017170825643906, "loss": 0.0655, "theoretical_loss": 3.388492225121334, "tokens_seen": 2580676608 }, { "epoch": 0.56, "learning_rate": 0.00022013158950493462, "loss": 0.0668, "theoretical_loss": 3.388478699427433, "tokens_seen": 2580807680 }, { "epoch": 0.56, "learning_rate": 0.00022009147075343017, "loss": 0.0653, "theoretical_loss": 3.3884651746127763, "tokens_seen": 2580938752 }, { "epoch": 0.56, "learning_rate": 0.00022005135200192568, "loss": 0.0659, "theoretical_loss": 3.388451650677262, "tokens_seen": 2581069824 }, { "epoch": 0.56, "learning_rate": 0.00022001123325042125, "loss": 0.0675, "theoretical_loss": 3.3884381276207893, "tokens_seen": 2581200896 }, { "epoch": 0.56, "learning_rate": 0.0002199711144989168, "loss": 0.066, "theoretical_loss": 3.388424605443256, "tokens_seen": 2581331968 }, { "epoch": 0.56, "learning_rate": 0.00021993099574741236, "loss": 0.0627, "theoretical_loss": 3.38841108414456, "tokens_seen": 2581463040 }, { "epoch": 0.56, "learning_rate": 0.0002198908769959079, "loss": 0.0696, "theoretical_loss": 3.3883975637245993, "tokens_seen": 2581594112 }, { "epoch": 0.56, "learning_rate": 0.0002198507582444034, "loss": 0.07, "theoretical_loss": 3.3883840441832724, "tokens_seen": 2581725184 }, { "epoch": 0.56, "learning_rate": 0.00021981063949289898, "loss": 0.0677, "theoretical_loss": 3.388370525520479, "tokens_seen": 2581856256 }, { "epoch": 0.56, "learning_rate": 0.00021977052074139452, "loss": 0.0641, "theoretical_loss": 3.388357007736115, "tokens_seen": 2581987328 }, { "epoch": 0.56, "learning_rate": 0.0002197304019898901, "loss": 0.0659, "theoretical_loss": 3.3883434908300805, "tokens_seen": 2582118400 }, { "epoch": 0.57, "learning_rate": 0.00021969028323838563, "loss": 0.0616, "theoretical_loss": 3.388329974802273, "tokens_seen": 2582249472 }, { "epoch": 0.57, "learning_rate": 0.00021965016448688117, "loss": 0.0692, "theoretical_loss": 3.388316459652591, "tokens_seen": 2582380544 }, { "epoch": 0.57, "learning_rate": 0.00021961004573537671, "loss": 0.0612, "theoretical_loss": 3.3883029453809335, "tokens_seen": 2582511616 }, { "epoch": 0.57, "learning_rate": 0.00021956992698387226, "loss": 0.0647, "theoretical_loss": 3.3882894319871983, "tokens_seen": 2582642688 }, { "epoch": 0.57, "learning_rate": 0.00021952980823236782, "loss": 0.0637, "theoretical_loss": 3.388275919471284, "tokens_seen": 2582773760 }, { "epoch": 0.57, "learning_rate": 0.00021948968948086337, "loss": 0.0636, "theoretical_loss": 3.388262407833089, "tokens_seen": 2582904832 }, { "epoch": 0.57, "learning_rate": 0.0002194495707293589, "loss": 0.0675, "theoretical_loss": 3.3882488970725118, "tokens_seen": 2583035904 }, { "epoch": 0.57, "learning_rate": 0.00021940945197785445, "loss": 0.0629, "theoretical_loss": 3.3882353871894506, "tokens_seen": 2583166976 }, { "epoch": 0.57, "learning_rate": 0.00021936933322635, "loss": 0.0613, "theoretical_loss": 3.388221878183804, "tokens_seen": 2583298048 }, { "epoch": 0.57, "learning_rate": 0.00021932921447484556, "loss": 0.0638, "theoretical_loss": 3.388208370055471, "tokens_seen": 2583429120 }, { "epoch": 0.57, "learning_rate": 0.0002192890957233411, "loss": 0.0638, "theoretical_loss": 3.3881948628043492, "tokens_seen": 2583560192 }, { "epoch": 0.57, "learning_rate": 0.00021924897697183664, "loss": 0.0664, "theoretical_loss": 3.388181356430338, "tokens_seen": 2583691264 }, { "epoch": 0.57, "objective/train/advantage_avg": -0.00070371781475842, "objective/train/docs_used": 939673, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3801921606063843, "objective/train/original_loss": 1.3801920413970947, "objective/train/theoretical_loss": 3.3881678509333355, "objective/train/tokens_used": 954346976, "objective/train/value_avg": -0.0079803466796875, "objective/train/value_loss": 0.0002715910959523171, "objective/train/value_max": -6.0617923736572266e-05, "objective/train/value_min": -0.378173828125, "objective/train/value_reward_corr": 0.6537508000176394, "objective/train/value_std": 0.01415252685546875, "objective/train/weight_avg": 0.9994179010391235, "objective/train/weighted_lm_loss": 1.37916898727417, "objective/train/weights_max": 1.1479060649871826, "objective/train/weights_min": 0.36915308237075806, "theoretical_loss": 3.3881678509333355, "tokens_seen": 2583822336 }, { "epoch": 0.57, "learning_rate": 0.00021920885822033218, "loss": 0.0694, "theoretical_loss": 3.3881678509333355, "tokens_seen": 2583822336 }, { "epoch": 0.57, "learning_rate": 0.00021916873946882772, "loss": 0.0613, "theoretical_loss": 3.3881543463132404, "tokens_seen": 2583953408 }, { "epoch": 0.57, "learning_rate": 0.0002191286207173233, "loss": 0.0648, "theoretical_loss": 3.3881408425699515, "tokens_seen": 2584084480 }, { "epoch": 0.57, "learning_rate": 0.00021908850196581883, "loss": 0.0658, "theoretical_loss": 3.3881273397033667, "tokens_seen": 2584215552 }, { "epoch": 0.57, "learning_rate": 0.00021904838321431437, "loss": 0.0642, "theoretical_loss": 3.3881138377133855, "tokens_seen": 2584346624 }, { "epoch": 0.57, "learning_rate": 0.00021900826446280992, "loss": 0.0681, "theoretical_loss": 3.388100336599906, "tokens_seen": 2584477696 }, { "epoch": 0.57, "learning_rate": 0.00021896814571130546, "loss": 0.0675, "theoretical_loss": 3.3880868363628265, "tokens_seen": 2584608768 }, { "epoch": 0.57, "learning_rate": 0.00021892802695980102, "loss": 0.0683, "theoretical_loss": 3.388073337002047, "tokens_seen": 2584739840 }, { "epoch": 0.57, "learning_rate": 0.00021888790820829657, "loss": 0.0651, "theoretical_loss": 3.3880598385174645, "tokens_seen": 2584870912 }, { "epoch": 0.57, "learning_rate": 0.0002188477894567921, "loss": 0.0633, "theoretical_loss": 3.388046340908979, "tokens_seen": 2585001984 }, { "epoch": 0.57, "learning_rate": 0.00021880767070528765, "loss": 0.0658, "theoretical_loss": 3.3880328441764886, "tokens_seen": 2585133056 }, { "epoch": 0.57, "learning_rate": 0.0002187675519537832, "loss": 0.0643, "theoretical_loss": 3.388019348319892, "tokens_seen": 2585264128 }, { "epoch": 0.57, "learning_rate": 0.00021872743320227876, "loss": 0.0667, "theoretical_loss": 3.3880058533390884, "tokens_seen": 2585395200 }, { "epoch": 0.57, "learning_rate": 0.0002186873144507743, "loss": 0.0657, "theoretical_loss": 3.3879923592339765, "tokens_seen": 2585526272 }, { "epoch": 0.57, "learning_rate": 0.00021864719569926984, "loss": 0.0675, "theoretical_loss": 3.3879788660044543, "tokens_seen": 2585657344 }, { "epoch": 0.57, "learning_rate": 0.00021860707694776538, "loss": 0.0681, "theoretical_loss": 3.3879653736504216, "tokens_seen": 2585788416 }, { "epoch": 0.57, "learning_rate": 0.00021856695819626092, "loss": 0.063, "theoretical_loss": 3.3879518821717767, "tokens_seen": 2585919488 }, { "epoch": 0.57, "learning_rate": 0.0002185268394447565, "loss": 0.0656, "theoretical_loss": 3.3879383915684187, "tokens_seen": 2586050560 }, { "epoch": 0.57, "learning_rate": 0.00021848672069325203, "loss": 0.0652, "theoretical_loss": 3.387924901840246, "tokens_seen": 2586181632 }, { "epoch": 0.57, "learning_rate": 0.00021844660194174757, "loss": 0.0678, "theoretical_loss": 3.387911412987158, "tokens_seen": 2586312704 }, { "epoch": 0.57, "learning_rate": 0.00021840648319024312, "loss": 0.0633, "theoretical_loss": 3.3878979250090535, "tokens_seen": 2586443776 }, { "epoch": 0.57, "learning_rate": 0.00021836636443873866, "loss": 0.0658, "theoretical_loss": 3.3878844379058313, "tokens_seen": 2586574848 }, { "epoch": 0.57, "learning_rate": 0.00021832624568723423, "loss": 0.0663, "theoretical_loss": 3.38787095167739, "tokens_seen": 2586705920 }, { "epoch": 0.57, "learning_rate": 0.00021828612693572977, "loss": 0.0649, "theoretical_loss": 3.387857466323629, "tokens_seen": 2586836992 }, { "epoch": 0.57, "learning_rate": 0.00021824600818422534, "loss": 0.0694, "theoretical_loss": 3.3878439818444472, "tokens_seen": 2586968064 }, { "epoch": 0.57, "objective/train/advantage_avg": -0.0007274707895703614, "objective/train/docs_used": 940829, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3081254959106445, "objective/train/original_loss": 1.3081254959106445, "objective/train/theoretical_loss": 3.3878304982397434, "objective/train/tokens_used": 957623776, "objective/train/value_avg": -0.01174163818359375, "objective/train/value_loss": 0.00025527182151563466, "objective/train/value_max": -5.608797073364258e-05, "objective/train/value_min": -0.677734375, "objective/train/value_reward_corr": 0.8042987268705089, "objective/train/value_std": 0.02203369140625, "objective/train/weight_avg": 0.9993945360183716, "objective/train/weighted_lm_loss": 1.3069430589675903, "objective/train/weights_max": 1.2121343612670898, "objective/train/weights_min": 0.5298117399215698, "theoretical_loss": 3.3878304982397434, "tokens_seen": 2587099136 }, { "epoch": 0.57, "learning_rate": 0.00021820588943272085, "loss": 0.0701, "theoretical_loss": 3.3878304982397434, "tokens_seen": 2587099136 }, { "epoch": 0.57, "learning_rate": 0.0002181657706812164, "loss": 0.0646, "theoretical_loss": 3.3878170155094165, "tokens_seen": 2587230208 }, { "epoch": 0.57, "learning_rate": 0.00021812565192971196, "loss": 0.0618, "theoretical_loss": 3.387803533653366, "tokens_seen": 2587361280 }, { "epoch": 0.57, "learning_rate": 0.0002180855331782075, "loss": 0.0679, "theoretical_loss": 3.38779005267149, "tokens_seen": 2587492352 }, { "epoch": 0.57, "learning_rate": 0.00021804541442670307, "loss": 0.0661, "theoretical_loss": 3.3877765725636886, "tokens_seen": 2587623424 }, { "epoch": 0.57, "learning_rate": 0.00021800529567519858, "loss": 0.0655, "theoretical_loss": 3.387763093329861, "tokens_seen": 2587754496 }, { "epoch": 0.57, "learning_rate": 0.00021796517692369412, "loss": 0.0671, "theoretical_loss": 3.3877496149699047, "tokens_seen": 2587885568 }, { "epoch": 0.57, "learning_rate": 0.0002179250581721897, "loss": 0.0677, "theoretical_loss": 3.38773613748372, "tokens_seen": 2588016640 }, { "epoch": 0.57, "learning_rate": 0.00021788493942068523, "loss": 0.0687, "theoretical_loss": 3.387722660871206, "tokens_seen": 2588147712 }, { "epoch": 0.57, "learning_rate": 0.0002178448206691808, "loss": 0.0653, "theoretical_loss": 3.3877091851322607, "tokens_seen": 2588278784 }, { "epoch": 0.57, "learning_rate": 0.00021780470191767632, "loss": 0.0713, "theoretical_loss": 3.387695710266785, "tokens_seen": 2588409856 }, { "epoch": 0.57, "learning_rate": 0.00021776458316617186, "loss": 0.064, "theoretical_loss": 3.387682236274677, "tokens_seen": 2588540928 }, { "epoch": 0.57, "learning_rate": 0.00021772446441466743, "loss": 0.0628, "theoretical_loss": 3.387668763155836, "tokens_seen": 2588672000 }, { "epoch": 0.57, "learning_rate": 0.00021768434566316297, "loss": 0.0673, "theoretical_loss": 3.3876552909101614, "tokens_seen": 2588803072 }, { "epoch": 0.57, "learning_rate": 0.00021764422691165854, "loss": 0.0639, "theoretical_loss": 3.387641819537552, "tokens_seen": 2588934144 }, { "epoch": 0.57, "learning_rate": 0.00021760410816015405, "loss": 0.0656, "theoretical_loss": 3.3876283490379073, "tokens_seen": 2589065216 }, { "epoch": 0.57, "learning_rate": 0.0002175639894086496, "loss": 0.0646, "theoretical_loss": 3.3876148794111267, "tokens_seen": 2589196288 }, { "epoch": 0.57, "learning_rate": 0.00021752387065714516, "loss": 0.0669, "theoretical_loss": 3.3876014106571093, "tokens_seen": 2589327360 }, { "epoch": 0.57, "learning_rate": 0.0002174837519056407, "loss": 0.0699, "theoretical_loss": 3.3875879427757543, "tokens_seen": 2589458432 }, { "epoch": 0.57, "learning_rate": 0.00021744363315413627, "loss": 0.0637, "theoretical_loss": 3.387574475766961, "tokens_seen": 2589589504 }, { "epoch": 0.57, "learning_rate": 0.00021740351440263178, "loss": 0.0679, "theoretical_loss": 3.3875610096306286, "tokens_seen": 2589720576 }, { "epoch": 0.57, "learning_rate": 0.00021736339565112732, "loss": 0.0616, "theoretical_loss": 3.3875475443666563, "tokens_seen": 2589851648 }, { "epoch": 0.57, "learning_rate": 0.0002173232768996229, "loss": 0.0647, "theoretical_loss": 3.3875340799749445, "tokens_seen": 2589982720 }, { "epoch": 0.57, "learning_rate": 0.00021728315814811843, "loss": 0.0681, "theoretical_loss": 3.387520616455391, "tokens_seen": 2590113792 }, { "epoch": 0.57, "learning_rate": 0.000217243039396614, "loss": 0.0665, "theoretical_loss": 3.3875071538078965, "tokens_seen": 2590244864 }, { "epoch": 0.57, "objective/train/advantage_avg": 0.0004061113577336073, "objective/train/docs_used": 942074, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3340415954589844, "objective/train/original_loss": 1.3340413570404053, "objective/train/theoretical_loss": 3.3874936920323595, "objective/train/tokens_used": 960900576, "objective/train/value_avg": -0.00505828857421875, "objective/train/value_loss": 0.0001605599100003019, "objective/train/value_max": -2.7120113372802734e-05, "objective/train/value_min": -0.237060546875, "objective/train/value_reward_corr": 0.6688810801690647, "objective/train/value_std": 0.00992584228515625, "objective/train/weight_avg": 1.00047767162323, "objective/train/weighted_lm_loss": 1.3349987268447876, "objective/train/weights_max": 1.1923227310180664, "objective/train/weights_min": 0.36859163641929626, "theoretical_loss": 3.3874936920323595, "tokens_seen": 2590375936 }, { "epoch": 0.57, "learning_rate": 0.00021720292064510952, "loss": 0.0666, "theoretical_loss": 3.3874936920323595, "tokens_seen": 2590375936 }, { "epoch": 0.57, "learning_rate": 0.00021716280189360509, "loss": 0.0633, "theoretical_loss": 3.38748023112868, "tokens_seen": 2590507008 }, { "epoch": 0.57, "learning_rate": 0.00021712268314210063, "loss": 0.0634, "theoretical_loss": 3.387466771096757, "tokens_seen": 2590638080 }, { "epoch": 0.57, "learning_rate": 0.00021708256439059617, "loss": 0.068, "theoretical_loss": 3.3874533119364902, "tokens_seen": 2590769152 }, { "epoch": 0.57, "learning_rate": 0.00021704244563909174, "loss": 0.0665, "theoretical_loss": 3.3874398536477788, "tokens_seen": 2590900224 }, { "epoch": 0.57, "learning_rate": 0.00021700232688758725, "loss": 0.0663, "theoretical_loss": 3.3874263962305227, "tokens_seen": 2591031296 }, { "epoch": 0.57, "learning_rate": 0.00021696220813608282, "loss": 0.0662, "theoretical_loss": 3.387412939684621, "tokens_seen": 2591162368 }, { "epoch": 0.57, "learning_rate": 0.00021692208938457836, "loss": 0.063, "theoretical_loss": 3.3873994840099733, "tokens_seen": 2591293440 }, { "epoch": 0.57, "learning_rate": 0.0002168819706330739, "loss": 0.0704, "theoretical_loss": 3.3873860292064792, "tokens_seen": 2591424512 }, { "epoch": 0.57, "learning_rate": 0.00021684185188156947, "loss": 0.0656, "theoretical_loss": 3.3873725752740382, "tokens_seen": 2591555584 }, { "epoch": 0.57, "learning_rate": 0.00021680173313006498, "loss": 0.0695, "theoretical_loss": 3.38735912221255, "tokens_seen": 2591686656 }, { "epoch": 0.57, "learning_rate": 0.00021676161437856055, "loss": 0.0661, "theoretical_loss": 3.387345670021914, "tokens_seen": 2591817728 }, { "epoch": 0.57, "learning_rate": 0.0002167214956270561, "loss": 0.0644, "theoretical_loss": 3.3873322187020296, "tokens_seen": 2591948800 }, { "epoch": 0.57, "learning_rate": 0.00021668137687555163, "loss": 0.0624, "theoretical_loss": 3.387318768252797, "tokens_seen": 2592079872 }, { "epoch": 0.57, "learning_rate": 0.0002166412581240472, "loss": 0.0643, "theoretical_loss": 3.387305318674115, "tokens_seen": 2592210944 }, { "epoch": 0.57, "learning_rate": 0.00021660113937254272, "loss": 0.0649, "theoretical_loss": 3.387291869965884, "tokens_seen": 2592342016 }, { "epoch": 0.57, "learning_rate": 0.00021656102062103829, "loss": 0.0656, "theoretical_loss": 3.3872784221280035, "tokens_seen": 2592473088 }, { "epoch": 0.57, "learning_rate": 0.00021652090186953383, "loss": 0.069, "theoretical_loss": 3.3872649751603725, "tokens_seen": 2592604160 }, { "epoch": 0.57, "learning_rate": 0.00021648078311802937, "loss": 0.0651, "theoretical_loss": 3.3872515290628917, "tokens_seen": 2592735232 }, { "epoch": 0.57, "learning_rate": 0.00021644066436652494, "loss": 0.0631, "theoretical_loss": 3.3872380838354603, "tokens_seen": 2592866304 }, { "epoch": 0.57, "learning_rate": 0.00021640054561502045, "loss": 0.0659, "theoretical_loss": 3.387224639477978, "tokens_seen": 2592997376 }, { "epoch": 0.57, "learning_rate": 0.00021636042686351602, "loss": 0.0664, "theoretical_loss": 3.3872111959903446, "tokens_seen": 2593128448 }, { "epoch": 0.57, "learning_rate": 0.00021632030811201156, "loss": 0.0671, "theoretical_loss": 3.38719775337246, "tokens_seen": 2593259520 }, { "epoch": 0.57, "learning_rate": 0.0002162801893605071, "loss": 0.065, "theoretical_loss": 3.3871843116242237, "tokens_seen": 2593390592 }, { "epoch": 0.57, "learning_rate": 0.00021624007060900267, "loss": 0.0653, "theoretical_loss": 3.3871708707455355, "tokens_seen": 2593521664 }, { "epoch": 0.57, "objective/train/advantage_avg": 0.0008375823963433504, "objective/train/docs_used": 943233, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2253305912017822, "objective/train/original_loss": 1.2253303527832031, "objective/train/theoretical_loss": 3.3871574307362957, "objective/train/tokens_used": 964177376, "objective/train/value_avg": -0.007198333740234375, "objective/train/value_loss": 0.00018451780488248914, "objective/train/value_max": -5.1021575927734375e-05, "objective/train/value_min": -0.66259765625, "objective/train/value_reward_corr": 0.7223549295442713, "objective/train/value_std": 0.015228271484375, "objective/train/weight_avg": 1.0009183883666992, "objective/train/weighted_lm_loss": 1.226167917251587, "objective/train/weights_max": 1.181648850440979, "objective/train/weights_min": 0.3718043565750122, "theoretical_loss": 3.3871574307362957, "tokens_seen": 2593652736 }, { "epoch": 0.57, "learning_rate": 0.00021619995185749818, "loss": 0.0637, "theoretical_loss": 3.3871574307362957, "tokens_seen": 2593652736 }, { "epoch": 0.57, "learning_rate": 0.00021615983310599375, "loss": 0.0652, "theoretical_loss": 3.3871439915964032, "tokens_seen": 2593783808 }, { "epoch": 0.57, "learning_rate": 0.0002161197143544893, "loss": 0.0678, "theoretical_loss": 3.387130553325759, "tokens_seen": 2593914880 }, { "epoch": 0.57, "learning_rate": 0.00021607959560298484, "loss": 0.0673, "theoretical_loss": 3.387117115924262, "tokens_seen": 2594045952 }, { "epoch": 0.57, "learning_rate": 0.0002160394768514804, "loss": 0.065, "theoretical_loss": 3.3871036793918123, "tokens_seen": 2594177024 }, { "epoch": 0.57, "learning_rate": 0.00021599935809997592, "loss": 0.0653, "theoretical_loss": 3.38709024372831, "tokens_seen": 2594308096 }, { "epoch": 0.57, "learning_rate": 0.00021595923934847149, "loss": 0.0639, "theoretical_loss": 3.3870768089336556, "tokens_seen": 2594439168 }, { "epoch": 0.57, "learning_rate": 0.00021591912059696703, "loss": 0.0688, "theoretical_loss": 3.387063375007748, "tokens_seen": 2594570240 }, { "epoch": 0.57, "learning_rate": 0.00021587900184546257, "loss": 0.0636, "theoretical_loss": 3.3870499419504867, "tokens_seen": 2594701312 }, { "epoch": 0.57, "learning_rate": 0.00021583888309395814, "loss": 0.0682, "theoretical_loss": 3.3870365097617734, "tokens_seen": 2594832384 }, { "epoch": 0.57, "learning_rate": 0.00021579876434245365, "loss": 0.0625, "theoretical_loss": 3.3870230784415067, "tokens_seen": 2594963456 }, { "epoch": 0.57, "learning_rate": 0.00021575864559094922, "loss": 0.067, "theoretical_loss": 3.3870096479895873, "tokens_seen": 2595094528 }, { "epoch": 0.57, "learning_rate": 0.00021571852683944476, "loss": 0.0703, "theoretical_loss": 3.3869962184059146, "tokens_seen": 2595225600 }, { "epoch": 0.57, "learning_rate": 0.0002156784080879403, "loss": 0.0685, "theoretical_loss": 3.3869827896903892, "tokens_seen": 2595356672 }, { "epoch": 0.57, "learning_rate": 0.00021563828933643587, "loss": 0.0671, "theoretical_loss": 3.386969361842911, "tokens_seen": 2595487744 }, { "epoch": 0.57, "learning_rate": 0.00021559817058493139, "loss": 0.0699, "theoretical_loss": 3.386955934863379, "tokens_seen": 2595618816 }, { "epoch": 0.57, "learning_rate": 0.00021555805183342695, "loss": 0.0643, "theoretical_loss": 3.3869425087516953, "tokens_seen": 2595749888 }, { "epoch": 0.57, "learning_rate": 0.0002155179330819225, "loss": 0.0654, "theoretical_loss": 3.3869290835077583, "tokens_seen": 2595880960 }, { "epoch": 0.57, "learning_rate": 0.00021547781433041804, "loss": 0.0672, "theoretical_loss": 3.386915659131469, "tokens_seen": 2596012032 }, { "epoch": 0.57, "learning_rate": 0.0002154376955789136, "loss": 0.0687, "theoretical_loss": 3.386902235622727, "tokens_seen": 2596143104 }, { "epoch": 0.57, "learning_rate": 0.00021539757682740912, "loss": 0.0648, "theoretical_loss": 3.386888812981433, "tokens_seen": 2596274176 }, { "epoch": 0.57, "learning_rate": 0.0002153574580759047, "loss": 0.068, "theoretical_loss": 3.3868753912074867, "tokens_seen": 2596405248 }, { "epoch": 0.57, "learning_rate": 0.00021531733932440023, "loss": 0.0644, "theoretical_loss": 3.3868619703007883, "tokens_seen": 2596536320 }, { "epoch": 0.57, "learning_rate": 0.00021527722057289577, "loss": 0.0651, "theoretical_loss": 3.3868485502612384, "tokens_seen": 2596667392 }, { "epoch": 0.57, "learning_rate": 0.00021523710182139134, "loss": 0.0678, "theoretical_loss": 3.386835131088737, "tokens_seen": 2596798464 }, { "epoch": 0.57, "objective/train/advantage_avg": -7.333567918976769e-05, "objective/train/docs_used": 944104, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.277173638343811, "objective/train/original_loss": 1.2771732807159424, "objective/train/theoretical_loss": 3.3868217127831834, "objective/train/tokens_used": 967454176, "objective/train/value_avg": -0.00943756103515625, "objective/train/value_loss": 0.000310252420604229, "objective/train/value_max": -4.4345855712890625e-05, "objective/train/value_min": -0.32275390625, "objective/train/value_reward_corr": 0.676682975353449, "objective/train/value_std": 0.01523590087890625, "objective/train/weight_avg": 1.0000642538070679, "objective/train/weighted_lm_loss": 1.2775121927261353, "objective/train/weights_max": 1.1550747156143188, "objective/train/weights_min": 0.37030255794525146, "theoretical_loss": 3.3868217127831834, "tokens_seen": 2596929536 }, { "epoch": 0.57, "learning_rate": 0.00021519698306988685, "loss": 0.0644, "theoretical_loss": 3.3868217127831834, "tokens_seen": 2596929536 }, { "epoch": 0.57, "learning_rate": 0.00021515686431838242, "loss": 0.0665, "theoretical_loss": 3.386808295344479, "tokens_seen": 2597060608 }, { "epoch": 0.57, "learning_rate": 0.00021511674556687796, "loss": 0.065, "theoretical_loss": 3.386794878772524, "tokens_seen": 2597191680 }, { "epoch": 0.57, "learning_rate": 0.0002150766268153735, "loss": 0.0677, "theoretical_loss": 3.386781463067218, "tokens_seen": 2597322752 }, { "epoch": 0.57, "learning_rate": 0.00021503650806386907, "loss": 0.0651, "theoretical_loss": 3.386768048228462, "tokens_seen": 2597453824 }, { "epoch": 0.57, "learning_rate": 0.00021499638931236459, "loss": 0.0636, "theoretical_loss": 3.386754634256156, "tokens_seen": 2597584896 }, { "epoch": 0.57, "learning_rate": 0.00021495627056086015, "loss": 0.0664, "theoretical_loss": 3.3867412211502, "tokens_seen": 2597715968 }, { "epoch": 0.57, "learning_rate": 0.0002149161518093557, "loss": 0.069, "theoretical_loss": 3.3867278089104946, "tokens_seen": 2597847040 }, { "epoch": 0.57, "learning_rate": 0.00021487603305785126, "loss": 0.0667, "theoretical_loss": 3.3867143975369407, "tokens_seen": 2597978112 }, { "epoch": 0.57, "learning_rate": 0.0002148359143063468, "loss": 0.0664, "theoretical_loss": 3.3867009870294376, "tokens_seen": 2598109184 }, { "epoch": 0.57, "learning_rate": 0.00021479579555484232, "loss": 0.0664, "theoretical_loss": 3.3866875773878866, "tokens_seen": 2598240256 }, { "epoch": 0.57, "learning_rate": 0.0002147556768033379, "loss": 0.0684, "theoretical_loss": 3.3866741686121875, "tokens_seen": 2598371328 }, { "epoch": 0.57, "learning_rate": 0.00021471555805183343, "loss": 0.0657, "theoretical_loss": 3.3866607607022408, "tokens_seen": 2598502400 }, { "epoch": 0.57, "learning_rate": 0.000214675439300329, "loss": 0.0693, "theoretical_loss": 3.3866473536579473, "tokens_seen": 2598633472 }, { "epoch": 0.58, "learning_rate": 0.00021463532054882454, "loss": 0.064, "theoretical_loss": 3.3866339474792078, "tokens_seen": 2598764544 }, { "epoch": 0.58, "learning_rate": 0.00021459520179732005, "loss": 0.0639, "theoretical_loss": 3.3866205421659217, "tokens_seen": 2598895616 }, { "epoch": 0.58, "learning_rate": 0.00021455508304581562, "loss": 0.0625, "theoretical_loss": 3.3866071377179896, "tokens_seen": 2599026688 }, { "epoch": 0.58, "learning_rate": 0.00021451496429431116, "loss": 0.0668, "theoretical_loss": 3.386593734135313, "tokens_seen": 2599157760 }, { "epoch": 0.58, "learning_rate": 0.00021447484554280673, "loss": 0.0636, "theoretical_loss": 3.386580331417792, "tokens_seen": 2599288832 }, { "epoch": 0.58, "learning_rate": 0.00021443472679130227, "loss": 0.0664, "theoretical_loss": 3.3865669295653262, "tokens_seen": 2599419904 }, { "epoch": 0.58, "learning_rate": 0.00021439460803979779, "loss": 0.0643, "theoretical_loss": 3.386553528577817, "tokens_seen": 2599550976 }, { "epoch": 0.58, "learning_rate": 0.00021435448928829335, "loss": 0.065, "theoretical_loss": 3.3865401284551657, "tokens_seen": 2599682048 }, { "epoch": 0.58, "learning_rate": 0.0002143143705367889, "loss": 0.0625, "theoretical_loss": 3.3865267291972714, "tokens_seen": 2599813120 }, { "epoch": 0.58, "learning_rate": 0.00021427425178528446, "loss": 0.0677, "theoretical_loss": 3.3865133308040356, "tokens_seen": 2599944192 }, { "epoch": 0.58, "learning_rate": 0.00021423413303378, "loss": 0.067, "theoretical_loss": 3.386499933275359, "tokens_seen": 2600075264 }, { "epoch": 0.58, "objective/train/advantage_avg": -0.0008675082353875041, "objective/train/docs_used": 945341, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3201069831848145, "objective/train/original_loss": 1.3201069831848145, "objective/train/theoretical_loss": 3.3864865366111414, "objective/train/tokens_used": 970730976, "objective/train/value_avg": -0.0076446533203125, "objective/train/value_loss": 0.00015715429617557675, "objective/train/value_max": -2.2292137145996094e-05, "objective/train/value_min": -0.640625, "objective/train/value_reward_corr": 0.8298508633590476, "objective/train/value_std": 0.0165557861328125, "objective/train/weight_avg": 0.9992063641548157, "objective/train/weighted_lm_loss": 1.3191142082214355, "objective/train/weights_max": 1.1578621864318848, "objective/train/weights_min": 0.41808509826660156, "theoretical_loss": 3.3864865366111414, "tokens_seen": 2600206336 }, { "epoch": 0.58, "learning_rate": 0.00021419401428227552, "loss": 0.0653, "theoretical_loss": 3.3864865366111414, "tokens_seen": 2600206336 }, { "epoch": 0.58, "learning_rate": 0.0002141538955307711, "loss": 0.0657, "theoretical_loss": 3.386473140811284, "tokens_seen": 2600337408 }, { "epoch": 0.58, "learning_rate": 0.00021411377677926663, "loss": 0.0685, "theoretical_loss": 3.3864597458756878, "tokens_seen": 2600468480 }, { "epoch": 0.58, "learning_rate": 0.0002140736580277622, "loss": 0.0649, "theoretical_loss": 3.3864463518042527, "tokens_seen": 2600599552 }, { "epoch": 0.58, "learning_rate": 0.00021403353927625774, "loss": 0.0637, "theoretical_loss": 3.38643295859688, "tokens_seen": 2600730624 }, { "epoch": 0.58, "learning_rate": 0.00021399342052475325, "loss": 0.0678, "theoretical_loss": 3.38641956625347, "tokens_seen": 2600861696 }, { "epoch": 0.58, "learning_rate": 0.00021395330177324882, "loss": 0.0673, "theoretical_loss": 3.3864061747739242, "tokens_seen": 2600992768 }, { "epoch": 0.58, "learning_rate": 0.00021391318302174436, "loss": 0.0638, "theoretical_loss": 3.3863927841581427, "tokens_seen": 2601123840 }, { "epoch": 0.58, "learning_rate": 0.00021387306427023993, "loss": 0.067, "theoretical_loss": 3.386379394406026, "tokens_seen": 2601254912 }, { "epoch": 0.58, "learning_rate": 0.00021383294551873547, "loss": 0.0654, "theoretical_loss": 3.3863660055174756, "tokens_seen": 2601385984 }, { "epoch": 0.58, "learning_rate": 0.000213792826767231, "loss": 0.0639, "theoretical_loss": 3.3863526174923915, "tokens_seen": 2601517056 }, { "epoch": 0.58, "learning_rate": 0.00021375270801572656, "loss": 0.0669, "theoretical_loss": 3.3863392303306754, "tokens_seen": 2601648128 }, { "epoch": 0.58, "learning_rate": 0.0002137125892642221, "loss": 0.0665, "theoretical_loss": 3.3863258440322275, "tokens_seen": 2601779200 }, { "epoch": 0.58, "learning_rate": 0.00021367247051271766, "loss": 0.0666, "theoretical_loss": 3.386312458596949, "tokens_seen": 2601910272 }, { "epoch": 0.58, "learning_rate": 0.0002136323517612132, "loss": 0.065, "theoretical_loss": 3.3862990740247403, "tokens_seen": 2602041344 }, { "epoch": 0.58, "learning_rate": 0.00021359223300970872, "loss": 0.0707, "theoretical_loss": 3.386285690315503, "tokens_seen": 2602172416 }, { "epoch": 0.58, "learning_rate": 0.0002135521142582043, "loss": 0.0679, "theoretical_loss": 3.3862723074691377, "tokens_seen": 2602303488 }, { "epoch": 0.58, "learning_rate": 0.00021351199550669983, "loss": 0.0675, "theoretical_loss": 3.386258925485545, "tokens_seen": 2602434560 }, { "epoch": 0.58, "learning_rate": 0.0002134718767551954, "loss": 0.0625, "theoretical_loss": 3.3862455443646255, "tokens_seen": 2602565632 }, { "epoch": 0.58, "learning_rate": 0.00021343175800369094, "loss": 0.0705, "theoretical_loss": 3.386232164106281, "tokens_seen": 2602696704 }, { "epoch": 0.58, "learning_rate": 0.00021339163925218645, "loss": 0.0658, "theoretical_loss": 3.3862187847104126, "tokens_seen": 2602827776 }, { "epoch": 0.58, "learning_rate": 0.00021335152050068202, "loss": 0.0628, "theoretical_loss": 3.3862054061769205, "tokens_seen": 2602958848 }, { "epoch": 0.58, "learning_rate": 0.00021331140174917756, "loss": 0.0648, "theoretical_loss": 3.3861920285057057, "tokens_seen": 2603089920 }, { "epoch": 0.58, "learning_rate": 0.00021327128299767313, "loss": 0.0691, "theoretical_loss": 3.38617865169667, "tokens_seen": 2603220992 }, { "epoch": 0.58, "learning_rate": 0.00021323116424616867, "loss": 0.0645, "theoretical_loss": 3.386165275749714, "tokens_seen": 2603352064 }, { "epoch": 0.58, "objective/train/advantage_avg": 0.000835252518299967, "objective/train/docs_used": 946919, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3608109951019287, "objective/train/original_loss": 1.3608111143112183, "objective/train/theoretical_loss": 3.3861519006647383, "objective/train/tokens_used": 974007776, "objective/train/value_avg": -0.00682830810546875, "objective/train/value_loss": 0.000303322565741837, "objective/train/value_max": -3.820657730102539e-05, "objective/train/value_min": -0.458251953125, "objective/train/value_reward_corr": 0.6218714998375661, "objective/train/value_std": 0.01324462890625, "objective/train/weight_avg": 1.0009602308273315, "objective/train/weighted_lm_loss": 1.3623260259628296, "objective/train/weights_max": 1.353885293006897, "objective/train/weights_min": 0.3681437373161316, "theoretical_loss": 3.3861519006647383, "tokens_seen": 2603483136 }, { "epoch": 0.58, "learning_rate": 0.0002131910454946642, "loss": 0.0693, "theoretical_loss": 3.3861519006647383, "tokens_seen": 2603483136 }, { "epoch": 0.58, "learning_rate": 0.00021315092674315976, "loss": 0.0653, "theoretical_loss": 3.3861385264416444, "tokens_seen": 2603614208 }, { "epoch": 0.58, "learning_rate": 0.0002131108079916553, "loss": 0.0637, "theoretical_loss": 3.386125153080333, "tokens_seen": 2603745280 }, { "epoch": 0.58, "learning_rate": 0.00021307068924015087, "loss": 0.0652, "theoretical_loss": 3.3861117805807064, "tokens_seen": 2603876352 }, { "epoch": 0.58, "learning_rate": 0.0002130305704886464, "loss": 0.0673, "theoretical_loss": 3.3860984089426642, "tokens_seen": 2604007424 }, { "epoch": 0.58, "learning_rate": 0.00021299045173714192, "loss": 0.0678, "theoretical_loss": 3.3860850381661085, "tokens_seen": 2604138496 }, { "epoch": 0.58, "learning_rate": 0.0002129503329856375, "loss": 0.0707, "theoretical_loss": 3.3860716682509397, "tokens_seen": 2604269568 }, { "epoch": 0.58, "learning_rate": 0.00021291021423413303, "loss": 0.0667, "theoretical_loss": 3.3860582991970594, "tokens_seen": 2604400640 }, { "epoch": 0.58, "learning_rate": 0.0002128700954826286, "loss": 0.062, "theoretical_loss": 3.3860449310043688, "tokens_seen": 2604531712 }, { "epoch": 0.58, "learning_rate": 0.00021282997673112414, "loss": 0.0668, "theoretical_loss": 3.3860315636727694, "tokens_seen": 2604662784 }, { "epoch": 0.58, "learning_rate": 0.00021278985797961965, "loss": 0.0669, "theoretical_loss": 3.3860181972021617, "tokens_seen": 2604793856 }, { "epoch": 0.58, "learning_rate": 0.00021274973922811522, "loss": 0.0671, "theoretical_loss": 3.386004831592447, "tokens_seen": 2604924928 }, { "epoch": 0.58, "learning_rate": 0.00021270962047661076, "loss": 0.0654, "theoretical_loss": 3.385991466843527, "tokens_seen": 2605056000 }, { "epoch": 0.58, "learning_rate": 0.00021266950172510633, "loss": 0.0663, "theoretical_loss": 3.385978102955303, "tokens_seen": 2605187072 }, { "epoch": 0.58, "learning_rate": 0.00021262938297360187, "loss": 0.062, "theoretical_loss": 3.3859647399276755, "tokens_seen": 2605318144 }, { "epoch": 0.58, "learning_rate": 0.0002125892642220974, "loss": 0.0632, "theoretical_loss": 3.3859513777605468, "tokens_seen": 2605449216 }, { "epoch": 0.58, "learning_rate": 0.00021254914547059296, "loss": 0.0663, "theoretical_loss": 3.3859380164538173, "tokens_seen": 2605580288 }, { "epoch": 0.58, "learning_rate": 0.0002125090267190885, "loss": 0.0669, "theoretical_loss": 3.385924656007389, "tokens_seen": 2605711360 }, { "epoch": 0.58, "learning_rate": 0.00021246890796758407, "loss": 0.0663, "theoretical_loss": 3.385911296421163, "tokens_seen": 2605842432 }, { "epoch": 0.58, "learning_rate": 0.0002124287892160796, "loss": 0.0672, "theoretical_loss": 3.38589793769504, "tokens_seen": 2605973504 }, { "epoch": 0.58, "learning_rate": 0.00021238867046457512, "loss": 0.0688, "theoretical_loss": 3.3858845798289225, "tokens_seen": 2606104576 }, { "epoch": 0.58, "learning_rate": 0.0002123485517130707, "loss": 0.0654, "theoretical_loss": 3.3858712228227117, "tokens_seen": 2606235648 }, { "epoch": 0.58, "learning_rate": 0.00021230843296156623, "loss": 0.0668, "theoretical_loss": 3.385857866676308, "tokens_seen": 2606366720 }, { "epoch": 0.58, "learning_rate": 0.0002122683142100618, "loss": 0.0681, "theoretical_loss": 3.385844511389614, "tokens_seen": 2606497792 }, { "epoch": 0.58, "learning_rate": 0.00021222819545855734, "loss": 0.0654, "theoretical_loss": 3.38583115696253, "tokens_seen": 2606628864 }, { "epoch": 0.58, "objective/train/advantage_avg": -9.946455247700214e-05, "objective/train/docs_used": 947602, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3696080446243286, "objective/train/original_loss": 1.369607925415039, "objective/train/theoretical_loss": 3.385817803394959, "objective/train/tokens_used": 977284576, "objective/train/value_avg": -0.0091400146484375, "objective/train/value_loss": 0.0003014110552612692, "objective/train/value_max": -3.3974647521972656e-05, "objective/train/value_min": -0.8056640625, "objective/train/value_reward_corr": 0.7208240988837684, "objective/train/value_std": 0.0184326171875, "objective/train/weight_avg": 1.000032901763916, "objective/train/weighted_lm_loss": 1.3689240217208862, "objective/train/weights_max": 1.4901189804077148, "objective/train/weights_min": 0.3728724420070648, "theoretical_loss": 3.385817803394959, "tokens_seen": 2606759936 }, { "epoch": 0.58, "learning_rate": 0.00021218807670705285, "loss": 0.0668, "theoretical_loss": 3.385817803394959, "tokens_seen": 2606759936 }, { "epoch": 0.58, "learning_rate": 0.00021214795795554842, "loss": 0.0667, "theoretical_loss": 3.3858044506868006, "tokens_seen": 2606891008 }, { "epoch": 0.58, "learning_rate": 0.00021210783920404396, "loss": 0.0675, "theoretical_loss": 3.3857910988379576, "tokens_seen": 2607022080 }, { "epoch": 0.58, "learning_rate": 0.00021206772045253953, "loss": 0.0681, "theoretical_loss": 3.3857777478483313, "tokens_seen": 2607153152 }, { "epoch": 0.58, "learning_rate": 0.00021202760170103507, "loss": 0.066, "theoretical_loss": 3.385764397717823, "tokens_seen": 2607284224 }, { "epoch": 0.58, "learning_rate": 0.00021198748294953062, "loss": 0.0689, "theoretical_loss": 3.385751048446334, "tokens_seen": 2607415296 }, { "epoch": 0.58, "learning_rate": 0.00021194736419802616, "loss": 0.0662, "theoretical_loss": 3.3857377000337663, "tokens_seen": 2607546368 }, { "epoch": 0.58, "learning_rate": 0.0002119072454465217, "loss": 0.0636, "theoretical_loss": 3.3857243524800213, "tokens_seen": 2607677440 }, { "epoch": 0.58, "learning_rate": 0.00021186712669501727, "loss": 0.0666, "theoretical_loss": 3.3857110057850006, "tokens_seen": 2607808512 }, { "epoch": 0.58, "learning_rate": 0.0002118270079435128, "loss": 0.0686, "theoretical_loss": 3.3856976599486055, "tokens_seen": 2607939584 }, { "epoch": 0.58, "learning_rate": 0.00021178688919200835, "loss": 0.0695, "theoretical_loss": 3.3856843149707383, "tokens_seen": 2608070656 }, { "epoch": 0.58, "learning_rate": 0.0002117467704405039, "loss": 0.0665, "theoretical_loss": 3.3856709708513, "tokens_seen": 2608201728 }, { "epoch": 0.58, "learning_rate": 0.00021170665168899943, "loss": 0.0646, "theoretical_loss": 3.3856576275901924, "tokens_seen": 2608332800 }, { "epoch": 0.58, "learning_rate": 0.000211666532937495, "loss": 0.0656, "theoretical_loss": 3.385644285187317, "tokens_seen": 2608463872 }, { "epoch": 0.58, "learning_rate": 0.00021162641418599054, "loss": 0.0666, "theoretical_loss": 3.385630943642576, "tokens_seen": 2608594944 }, { "epoch": 0.58, "learning_rate": 0.00021158629543448608, "loss": 0.0633, "theoretical_loss": 3.3856176029558704, "tokens_seen": 2608726016 }, { "epoch": 0.58, "learning_rate": 0.00021154617668298162, "loss": 0.0654, "theoretical_loss": 3.3856042631271026, "tokens_seen": 2608857088 }, { "epoch": 0.58, "learning_rate": 0.00021150605793147717, "loss": 0.0664, "theoretical_loss": 3.385590924156174, "tokens_seen": 2608988160 }, { "epoch": 0.58, "learning_rate": 0.00021146593917997273, "loss": 0.0634, "theoretical_loss": 3.385577586042986, "tokens_seen": 2609119232 }, { "epoch": 0.58, "learning_rate": 0.00021142582042846827, "loss": 0.0671, "theoretical_loss": 3.3855642487874418, "tokens_seen": 2609250304 }, { "epoch": 0.58, "learning_rate": 0.00021138570167696382, "loss": 0.0672, "theoretical_loss": 3.385550912389441, "tokens_seen": 2609381376 }, { "epoch": 0.58, "learning_rate": 0.00021134558292545936, "loss": 0.0679, "theoretical_loss": 3.385537576848887, "tokens_seen": 2609512448 }, { "epoch": 0.58, "learning_rate": 0.0002113054641739549, "loss": 0.0678, "theoretical_loss": 3.385524242165681, "tokens_seen": 2609643520 }, { "epoch": 0.58, "learning_rate": 0.00021126534542245047, "loss": 0.0633, "theoretical_loss": 3.385510908339725, "tokens_seen": 2609774592 }, { "epoch": 0.58, "learning_rate": 0.000211225226670946, "loss": 0.0654, "theoretical_loss": 3.3854975753709207, "tokens_seen": 2609905664 }, { "epoch": 0.58, "objective/train/advantage_avg": 0.0024058781564235687, "objective/train/docs_used": 948805, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.250623345375061, "objective/train/original_loss": 1.2506234645843506, "objective/train/theoretical_loss": 3.3854842432591696, "objective/train/tokens_used": 980561376, "objective/train/value_avg": -0.007843017578125, "objective/train/value_loss": 0.00011997487308690324, "objective/train/value_max": -5.650520324707031e-05, "objective/train/value_min": -0.3212890625, "objective/train/value_reward_corr": 0.6925512265255763, "objective/train/value_std": 0.012237548828125, "objective/train/weight_avg": 1.0024653673171997, "objective/train/weighted_lm_loss": 1.2531182765960693, "objective/train/weights_max": 1.2103601694107056, "objective/train/weights_min": 0.7677412033081055, "theoretical_loss": 3.3854842432591696, "tokens_seen": 2610036736 }, { "epoch": 0.58, "learning_rate": 0.00021118510791944155, "loss": 0.0643, "theoretical_loss": 3.3854842432591696, "tokens_seen": 2610036736 }, { "epoch": 0.58, "learning_rate": 0.0002111449891679371, "loss": 0.0667, "theoretical_loss": 3.3854709120043744, "tokens_seen": 2610167808 }, { "epoch": 0.58, "learning_rate": 0.00021110487041643263, "loss": 0.0688, "theoretical_loss": 3.3854575816064365, "tokens_seen": 2610298880 }, { "epoch": 0.58, "learning_rate": 0.0002110647516649282, "loss": 0.0667, "theoretical_loss": 3.3854442520652577, "tokens_seen": 2610429952 }, { "epoch": 0.58, "learning_rate": 0.00021102463291342374, "loss": 0.0678, "theoretical_loss": 3.3854309233807403, "tokens_seen": 2610561024 }, { "epoch": 0.58, "learning_rate": 0.00021098451416191928, "loss": 0.0639, "theoretical_loss": 3.385417595552786, "tokens_seen": 2610692096 }, { "epoch": 0.58, "learning_rate": 0.00021094439541041482, "loss": 0.0643, "theoretical_loss": 3.385404268581297, "tokens_seen": 2610823168 }, { "epoch": 0.58, "learning_rate": 0.00021090427665891037, "loss": 0.0692, "theoretical_loss": 3.3853909424661746, "tokens_seen": 2610954240 }, { "epoch": 0.58, "learning_rate": 0.00021086415790740593, "loss": 0.0658, "theoretical_loss": 3.3853776172073218, "tokens_seen": 2611085312 }, { "epoch": 0.58, "learning_rate": 0.00021082403915590148, "loss": 0.0705, "theoretical_loss": 3.3853642928046397, "tokens_seen": 2611216384 }, { "epoch": 0.58, "learning_rate": 0.00021078392040439702, "loss": 0.0639, "theoretical_loss": 3.3853509692580306, "tokens_seen": 2611347456 }, { "epoch": 0.58, "learning_rate": 0.00021074380165289256, "loss": 0.0659, "theoretical_loss": 3.3853376465673968, "tokens_seen": 2611478528 }, { "epoch": 0.58, "learning_rate": 0.0002107036829013881, "loss": 0.0658, "theoretical_loss": 3.38532432473264, "tokens_seen": 2611609600 }, { "epoch": 0.58, "learning_rate": 0.00021066356414988367, "loss": 0.0668, "theoretical_loss": 3.3853110037536625, "tokens_seen": 2611740672 }, { "epoch": 0.58, "learning_rate": 0.0002106234453983792, "loss": 0.0676, "theoretical_loss": 3.385297683630366, "tokens_seen": 2611871744 }, { "epoch": 0.58, "learning_rate": 0.00021058332664687475, "loss": 0.0643, "theoretical_loss": 3.3852843643626533, "tokens_seen": 2612002816 }, { "epoch": 0.58, "learning_rate": 0.0002105432078953703, "loss": 0.0668, "theoretical_loss": 3.385271045950426, "tokens_seen": 2612133888 }, { "epoch": 0.58, "learning_rate": 0.00021050308914386583, "loss": 0.0648, "theoretical_loss": 3.3852577283935865, "tokens_seen": 2612264960 }, { "epoch": 0.58, "learning_rate": 0.0002104629703923614, "loss": 0.0661, "theoretical_loss": 3.385244411692036, "tokens_seen": 2612396032 }, { "epoch": 0.58, "learning_rate": 0.00021042285164085694, "loss": 0.0664, "theoretical_loss": 3.385231095845678, "tokens_seen": 2612527104 }, { "epoch": 0.58, "learning_rate": 0.00021038273288935248, "loss": 0.0668, "theoretical_loss": 3.385217780854415, "tokens_seen": 2612658176 }, { "epoch": 0.58, "learning_rate": 0.00021034261413784802, "loss": 0.0651, "theoretical_loss": 3.385204466718147, "tokens_seen": 2612789248 }, { "epoch": 0.58, "learning_rate": 0.00021030249538634357, "loss": 0.0663, "theoretical_loss": 3.385191153436778, "tokens_seen": 2612920320 }, { "epoch": 0.58, "learning_rate": 0.00021026237663483913, "loss": 0.064, "theoretical_loss": 3.3851778410102096, "tokens_seen": 2613051392 }, { "epoch": 0.58, "learning_rate": 0.00021022225788333468, "loss": 0.0617, "theoretical_loss": 3.3851645294383443, "tokens_seen": 2613182464 }, { "epoch": 0.58, "objective/train/advantage_avg": 0.0004402110062073916, "objective/train/docs_used": 950332, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.262986421585083, "objective/train/original_loss": 1.262986421585083, "objective/train/theoretical_loss": 3.3851512187210844, "objective/train/tokens_used": 983838176, "objective/train/value_avg": -0.00681304931640625, "objective/train/value_loss": 0.00019139038340654224, "objective/train/value_max": -2.6702880859375e-05, "objective/train/value_min": -0.2474365234375, "objective/train/value_reward_corr": 0.5507010402511724, "objective/train/value_std": 0.0107574462890625, "objective/train/weight_avg": 1.0005251169204712, "objective/train/weighted_lm_loss": 1.2637968063354492, "objective/train/weights_max": 1.2635471820831299, "objective/train/weights_min": 0.38045015931129456, "theoretical_loss": 3.3851512187210844, "tokens_seen": 2613313536 }, { "epoch": 0.58, "learning_rate": 0.00021018213913183024, "loss": 0.0658, "theoretical_loss": 3.3851512187210844, "tokens_seen": 2613313536 }, { "epoch": 0.58, "learning_rate": 0.00021014202038032576, "loss": 0.0651, "theoretical_loss": 3.385137908858332, "tokens_seen": 2613444608 }, { "epoch": 0.58, "learning_rate": 0.0002101019016288213, "loss": 0.0679, "theoretical_loss": 3.3851245998499895, "tokens_seen": 2613575680 }, { "epoch": 0.58, "learning_rate": 0.00021006178287731687, "loss": 0.0651, "theoretical_loss": 3.385111291695959, "tokens_seen": 2613706752 }, { "epoch": 0.58, "learning_rate": 0.0002100216641258124, "loss": 0.0669, "theoretical_loss": 3.385097984396143, "tokens_seen": 2613837824 }, { "epoch": 0.58, "learning_rate": 0.00020998154537430798, "loss": 0.0652, "theoretical_loss": 3.3850846779504438, "tokens_seen": 2613968896 }, { "epoch": 0.58, "learning_rate": 0.0002099414266228035, "loss": 0.0678, "theoretical_loss": 3.3850713723587633, "tokens_seen": 2614099968 }, { "epoch": 0.58, "learning_rate": 0.00020990130787129903, "loss": 0.0669, "theoretical_loss": 3.3850580676210047, "tokens_seen": 2614231040 }, { "epoch": 0.58, "learning_rate": 0.0002098611891197946, "loss": 0.0698, "theoretical_loss": 3.38504476373707, "tokens_seen": 2614362112 }, { "epoch": 0.58, "learning_rate": 0.00020982107036829014, "loss": 0.0651, "theoretical_loss": 3.385031460706862, "tokens_seen": 2614493184 }, { "epoch": 0.58, "learning_rate": 0.0002097809516167857, "loss": 0.0652, "theoretical_loss": 3.3850181585302823, "tokens_seen": 2614624256 }, { "epoch": 0.58, "learning_rate": 0.00020974083286528123, "loss": 0.064, "theoretical_loss": 3.385004857207234, "tokens_seen": 2614755328 }, { "epoch": 0.58, "learning_rate": 0.0002097007141137768, "loss": 0.0664, "theoretical_loss": 3.384991556737619, "tokens_seen": 2614886400 }, { "epoch": 0.58, "learning_rate": 0.00020966059536227234, "loss": 0.0655, "theoretical_loss": 3.3849782571213405, "tokens_seen": 2615017472 }, { "epoch": 0.58, "learning_rate": 0.00020962047661076788, "loss": 0.0672, "theoretical_loss": 3.3849649583583004, "tokens_seen": 2615148544 }, { "epoch": 0.59, "learning_rate": 0.00020958035785926344, "loss": 0.0701, "theoretical_loss": 3.384951660448401, "tokens_seen": 2615279616 }, { "epoch": 0.59, "learning_rate": 0.00020954023910775896, "loss": 0.0671, "theoretical_loss": 3.3849383633915457, "tokens_seen": 2615410688 }, { "epoch": 0.59, "learning_rate": 0.00020950012035625453, "loss": 0.0665, "theoretical_loss": 3.3849250671876363, "tokens_seen": 2615541760 }, { "epoch": 0.59, "learning_rate": 0.00020946000160475007, "loss": 0.0706, "theoretical_loss": 3.3849117718365758, "tokens_seen": 2615672832 }, { "epoch": 0.59, "learning_rate": 0.0002094198828532456, "loss": 0.0641, "theoretical_loss": 3.384898477338266, "tokens_seen": 2615803904 }, { "epoch": 0.59, "learning_rate": 0.00020937976410174118, "loss": 0.0679, "theoretical_loss": 3.38488518369261, "tokens_seen": 2615934976 }, { "epoch": 0.59, "learning_rate": 0.0002093396453502367, "loss": 0.0657, "theoretical_loss": 3.384871890899511, "tokens_seen": 2616066048 }, { "epoch": 0.59, "learning_rate": 0.00020929952659873226, "loss": 0.0667, "theoretical_loss": 3.384858598958871, "tokens_seen": 2616197120 }, { "epoch": 0.59, "learning_rate": 0.0002092594078472278, "loss": 0.0653, "theoretical_loss": 3.3848453078705925, "tokens_seen": 2616328192 }, { "epoch": 0.59, "learning_rate": 0.00020921928909572334, "loss": 0.0626, "theoretical_loss": 3.384832017634578, "tokens_seen": 2616459264 }, { "epoch": 0.59, "objective/train/advantage_avg": 0.0006252549937926233, "objective/train/docs_used": 951607, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.446777582168579, "objective/train/original_loss": 1.4467777013778687, "objective/train/theoretical_loss": 3.3848187282507305, "objective/train/tokens_used": 987114976, "objective/train/value_avg": -0.007724761962890625, "objective/train/value_loss": 0.00024579960154369473, "objective/train/value_max": -2.6285648345947266e-05, "objective/train/value_min": -0.316162109375, "objective/train/value_reward_corr": 0.6279571018905429, "objective/train/value_std": 0.0122833251953125, "objective/train/weight_avg": 1.000735878944397, "objective/train/weighted_lm_loss": 1.4477310180664062, "objective/train/weights_max": 1.326949119567871, "objective/train/weights_min": 0.373211145401001, "theoretical_loss": 3.3848187282507305, "tokens_seen": 2616590336 }, { "epoch": 0.59, "learning_rate": 0.0002091791703442189, "loss": 0.0683, "theoretical_loss": 3.3848187282507305, "tokens_seen": 2616590336 }, { "epoch": 0.59, "learning_rate": 0.00020913905159271443, "loss": 0.0637, "theoretical_loss": 3.384805439718953, "tokens_seen": 2616721408 }, { "epoch": 0.59, "learning_rate": 0.00020909893284121, "loss": 0.0687, "theoretical_loss": 3.3847921520391475, "tokens_seen": 2616852480 }, { "epoch": 0.59, "learning_rate": 0.00020905881408970554, "loss": 0.065, "theoretical_loss": 3.384778865211217, "tokens_seen": 2616983552 }, { "epoch": 0.59, "learning_rate": 0.00020901869533820108, "loss": 0.0661, "theoretical_loss": 3.3847655792350646, "tokens_seen": 2617114624 }, { "epoch": 0.59, "learning_rate": 0.00020897857658669665, "loss": 0.0716, "theoretical_loss": 3.3847522941105925, "tokens_seen": 2617245696 }, { "epoch": 0.59, "learning_rate": 0.00020893845783519216, "loss": 0.0684, "theoretical_loss": 3.3847390098377037, "tokens_seen": 2617376768 }, { "epoch": 0.59, "learning_rate": 0.00020889833908368773, "loss": 0.0687, "theoretical_loss": 3.384725726416301, "tokens_seen": 2617507840 }, { "epoch": 0.59, "learning_rate": 0.00020885822033218327, "loss": 0.0659, "theoretical_loss": 3.3847124438462868, "tokens_seen": 2617638912 }, { "epoch": 0.59, "learning_rate": 0.0002088181015806788, "loss": 0.0634, "theoretical_loss": 3.384699162127564, "tokens_seen": 2617769984 }, { "epoch": 0.59, "learning_rate": 0.00020877798282917438, "loss": 0.0642, "theoretical_loss": 3.3846858812600362, "tokens_seen": 2617901056 }, { "epoch": 0.59, "learning_rate": 0.0002087378640776699, "loss": 0.0663, "theoretical_loss": 3.3846726012436057, "tokens_seen": 2618032128 }, { "epoch": 0.59, "learning_rate": 0.00020869774532616546, "loss": 0.0664, "theoretical_loss": 3.384659322078175, "tokens_seen": 2618163200 }, { "epoch": 0.59, "learning_rate": 0.000208657626574661, "loss": 0.0662, "theoretical_loss": 3.3846460437636474, "tokens_seen": 2618294272 }, { "epoch": 0.59, "learning_rate": 0.00020861750782315654, "loss": 0.0646, "theoretical_loss": 3.3846327662999256, "tokens_seen": 2618425344 }, { "epoch": 0.59, "learning_rate": 0.0002085773890716521, "loss": 0.0653, "theoretical_loss": 3.3846194896869126, "tokens_seen": 2618556416 }, { "epoch": 0.59, "learning_rate": 0.00020853727032014763, "loss": 0.0671, "theoretical_loss": 3.3846062139245117, "tokens_seen": 2618687488 }, { "epoch": 0.59, "learning_rate": 0.0002084971515686432, "loss": 0.0655, "theoretical_loss": 3.3845929390126246, "tokens_seen": 2618818560 }, { "epoch": 0.59, "learning_rate": 0.00020845703281713874, "loss": 0.0653, "theoretical_loss": 3.3845796649511555, "tokens_seen": 2618949632 }, { "epoch": 0.59, "learning_rate": 0.00020841691406563428, "loss": 0.0677, "theoretical_loss": 3.384566391740007, "tokens_seen": 2619080704 }, { "epoch": 0.59, "learning_rate": 0.00020837679531412985, "loss": 0.0687, "theoretical_loss": 3.384553119379082, "tokens_seen": 2619211776 }, { "epoch": 0.59, "learning_rate": 0.00020833667656262536, "loss": 0.0656, "theoretical_loss": 3.3845398478682833, "tokens_seen": 2619342848 }, { "epoch": 0.59, "learning_rate": 0.00020829655781112093, "loss": 0.0625, "theoretical_loss": 3.3845265772075144, "tokens_seen": 2619473920 }, { "epoch": 0.59, "learning_rate": 0.00020825643905961647, "loss": 0.0658, "theoretical_loss": 3.384513307396678, "tokens_seen": 2619604992 }, { "epoch": 0.59, "learning_rate": 0.000208216320308112, "loss": 0.067, "theoretical_loss": 3.384500038435677, "tokens_seen": 2619736064 }, { "epoch": 0.59, "objective/train/advantage_avg": 0.001980495173484087, "objective/train/docs_used": 952297, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4213767051696777, "objective/train/original_loss": 1.4213767051696777, "objective/train/theoretical_loss": 3.384486770324415, "objective/train/tokens_used": 990391776, "objective/train/value_avg": -0.009796142578125, "objective/train/value_loss": 0.00022778616403229535, "objective/train/value_max": -5.9664249420166016e-05, "objective/train/value_min": -0.361083984375, "objective/train/value_reward_corr": 0.6254695362219034, "objective/train/value_std": 0.01531219482421875, "objective/train/weight_avg": 1.0020880699157715, "objective/train/weighted_lm_loss": 1.4244672060012817, "objective/train/weights_max": 1.402640700340271, "objective/train/weights_min": 0.38629987835884094, "theoretical_loss": 3.384486770324415, "tokens_seen": 2619867136 }, { "epoch": 0.59, "learning_rate": 0.00020817620155660758, "loss": 0.0676, "theoretical_loss": 3.384486770324415, "tokens_seen": 2619867136 }, { "epoch": 0.59, "learning_rate": 0.0002081360828051031, "loss": 0.0656, "theoretical_loss": 3.384473503062794, "tokens_seen": 2619998208 }, { "epoch": 0.59, "learning_rate": 0.00020809596405359866, "loss": 0.0645, "theoretical_loss": 3.3844602366507184, "tokens_seen": 2620129280 }, { "epoch": 0.59, "learning_rate": 0.0002080558453020942, "loss": 0.0628, "theoretical_loss": 3.3844469710880905, "tokens_seen": 2620260352 }, { "epoch": 0.59, "learning_rate": 0.00020801572655058974, "loss": 0.0649, "theoretical_loss": 3.3844337063748138, "tokens_seen": 2620391424 }, { "epoch": 0.59, "learning_rate": 0.0002079756077990853, "loss": 0.0649, "theoretical_loss": 3.384420442510791, "tokens_seen": 2620522496 }, { "epoch": 0.59, "learning_rate": 0.00020793548904758083, "loss": 0.0664, "theoretical_loss": 3.384407179495926, "tokens_seen": 2620653568 }, { "epoch": 0.59, "learning_rate": 0.0002078953702960764, "loss": 0.0627, "theoretical_loss": 3.384393917330121, "tokens_seen": 2620784640 }, { "epoch": 0.59, "learning_rate": 0.00020785525154457194, "loss": 0.064, "theoretical_loss": 3.38438065601328, "tokens_seen": 2620915712 }, { "epoch": 0.59, "learning_rate": 0.00020781513279306748, "loss": 0.0676, "theoretical_loss": 3.3843673955453055, "tokens_seen": 2621046784 }, { "epoch": 0.59, "learning_rate": 0.00020777501404156305, "loss": 0.0653, "theoretical_loss": 3.3843541359261016, "tokens_seen": 2621177856 }, { "epoch": 0.59, "learning_rate": 0.00020773489529005856, "loss": 0.0671, "theoretical_loss": 3.3843408771555707, "tokens_seen": 2621308928 }, { "epoch": 0.59, "learning_rate": 0.00020769477653855413, "loss": 0.0683, "theoretical_loss": 3.3843276192336167, "tokens_seen": 2621440000 }, { "epoch": 0.59, "learning_rate": 0.00020765465778704967, "loss": 0.0711, "theoretical_loss": 3.384314362160142, "tokens_seen": 2621571072 }, { "epoch": 0.59, "learning_rate": 0.0002076145390355452, "loss": 0.0692, "theoretical_loss": 3.3843011059350507, "tokens_seen": 2621702144 }, { "epoch": 0.59, "learning_rate": 0.00020757442028404078, "loss": 0.0721, "theoretical_loss": 3.384287850558246, "tokens_seen": 2621833216 }, { "epoch": 0.59, "learning_rate": 0.0002075343015325363, "loss": 0.0659, "theoretical_loss": 3.3842745960296314, "tokens_seen": 2621964288 }, { "epoch": 0.59, "learning_rate": 0.00020749418278103186, "loss": 0.0628, "theoretical_loss": 3.3842613423491095, "tokens_seen": 2622095360 }, { "epoch": 0.59, "learning_rate": 0.0002074540640295274, "loss": 0.0688, "theoretical_loss": 3.384248089516584, "tokens_seen": 2622226432 }, { "epoch": 0.59, "learning_rate": 0.00020741394527802295, "loss": 0.0671, "theoretical_loss": 3.3842348375319578, "tokens_seen": 2622357504 }, { "epoch": 0.59, "learning_rate": 0.0002073738265265185, "loss": 0.0654, "theoretical_loss": 3.3842215863951353, "tokens_seen": 2622488576 }, { "epoch": 0.59, "learning_rate": 0.00020733370777501403, "loss": 0.0667, "theoretical_loss": 3.3842083361060196, "tokens_seen": 2622619648 }, { "epoch": 0.59, "learning_rate": 0.0002072935890235096, "loss": 0.068, "theoretical_loss": 3.384195086664513, "tokens_seen": 2622750720 }, { "epoch": 0.59, "learning_rate": 0.00020725347027200514, "loss": 0.0646, "theoretical_loss": 3.3841818380705204, "tokens_seen": 2622881792 }, { "epoch": 0.59, "learning_rate": 0.0002072133515205007, "loss": 0.0649, "theoretical_loss": 3.3841685903239447, "tokens_seen": 2623012864 }, { "epoch": 0.59, "objective/train/advantage_avg": 0.0008772517321631312, "objective/train/docs_used": 953489, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3682349920272827, "objective/train/original_loss": 1.3682349920272827, "objective/train/theoretical_loss": 3.384155343424689, "objective/train/tokens_used": 993668576, "objective/train/value_avg": -0.006885528564453125, "objective/train/value_loss": 0.00012387274182401597, "objective/train/value_max": -2.6285648345947266e-05, "objective/train/value_min": -0.466552734375, "objective/train/value_reward_corr": 0.7142739006386163, "objective/train/value_std": 0.01279449462890625, "objective/train/weight_avg": 1.0009351968765259, "objective/train/weighted_lm_loss": 1.3696568012237549, "objective/train/weights_max": 1.297888159751892, "objective/train/weights_min": 0.38186925649642944, "theoretical_loss": 3.384155343424689, "tokens_seen": 2623143936 }, { "epoch": 0.59, "learning_rate": 0.00020717323276899625, "loss": 0.065, "theoretical_loss": 3.384155343424689, "tokens_seen": 2623143936 }, { "epoch": 0.59, "learning_rate": 0.00020713311401749176, "loss": 0.0682, "theoretical_loss": 3.3841420973726573, "tokens_seen": 2623275008 }, { "epoch": 0.59, "learning_rate": 0.00020709299526598733, "loss": 0.0654, "theoretical_loss": 3.3841288521677524, "tokens_seen": 2623406080 }, { "epoch": 0.59, "learning_rate": 0.00020705287651448287, "loss": 0.0656, "theoretical_loss": 3.3841156078098784, "tokens_seen": 2623537152 }, { "epoch": 0.59, "learning_rate": 0.00020701275776297844, "loss": 0.0684, "theoretical_loss": 3.384102364298939, "tokens_seen": 2623668224 }, { "epoch": 0.59, "learning_rate": 0.00020697263901147398, "loss": 0.0654, "theoretical_loss": 3.384089121634837, "tokens_seen": 2623799296 }, { "epoch": 0.59, "learning_rate": 0.0002069325202599695, "loss": 0.0654, "theoretical_loss": 3.384075879817477, "tokens_seen": 2623930368 }, { "epoch": 0.59, "learning_rate": 0.00020689240150846506, "loss": 0.0659, "theoretical_loss": 3.384062638846761, "tokens_seen": 2624061440 }, { "epoch": 0.59, "learning_rate": 0.0002068522827569606, "loss": 0.065, "theoretical_loss": 3.3840493987225937, "tokens_seen": 2624192512 }, { "epoch": 0.59, "learning_rate": 0.00020681216400545617, "loss": 0.0646, "theoretical_loss": 3.3840361594448787, "tokens_seen": 2624323584 }, { "epoch": 0.59, "learning_rate": 0.00020677204525395171, "loss": 0.07, "theoretical_loss": 3.3840229210135195, "tokens_seen": 2624454656 }, { "epoch": 0.59, "learning_rate": 0.00020673192650244723, "loss": 0.0696, "theoretical_loss": 3.3840096834284195, "tokens_seen": 2624585728 }, { "epoch": 0.59, "learning_rate": 0.0002066918077509428, "loss": 0.0634, "theoretical_loss": 3.3839964466894825, "tokens_seen": 2624716800 }, { "epoch": 0.59, "learning_rate": 0.00020665168899943834, "loss": 0.069, "theoretical_loss": 3.383983210796612, "tokens_seen": 2624847872 }, { "epoch": 0.59, "learning_rate": 0.0002066115702479339, "loss": 0.0656, "theoretical_loss": 3.3839699757497117, "tokens_seen": 2624978944 }, { "epoch": 0.59, "learning_rate": 0.00020657145149642945, "loss": 0.0681, "theoretical_loss": 3.383956741548686, "tokens_seen": 2625110016 }, { "epoch": 0.59, "learning_rate": 0.00020653133274492496, "loss": 0.069, "theoretical_loss": 3.3839435081934375, "tokens_seen": 2625241088 }, { "epoch": 0.59, "learning_rate": 0.00020649121399342053, "loss": 0.0628, "theoretical_loss": 3.3839302756838707, "tokens_seen": 2625372160 }, { "epoch": 0.59, "learning_rate": 0.00020645109524191607, "loss": 0.0651, "theoretical_loss": 3.3839170440198885, "tokens_seen": 2625503232 }, { "epoch": 0.59, "learning_rate": 0.00020641097649041164, "loss": 0.0642, "theoretical_loss": 3.383903813201396, "tokens_seen": 2625634304 }, { "epoch": 0.59, "learning_rate": 0.00020637085773890718, "loss": 0.0667, "theoretical_loss": 3.383890583228296, "tokens_seen": 2625765376 }, { "epoch": 0.59, "learning_rate": 0.0002063307389874027, "loss": 0.0647, "theoretical_loss": 3.383877354100492, "tokens_seen": 2625896448 }, { "epoch": 0.59, "learning_rate": 0.00020629062023589826, "loss": 0.0672, "theoretical_loss": 3.383864125817889, "tokens_seen": 2626027520 }, { "epoch": 0.59, "learning_rate": 0.0002062505014843938, "loss": 0.066, "theoretical_loss": 3.3838508983803894, "tokens_seen": 2626158592 }, { "epoch": 0.59, "learning_rate": 0.00020621038273288937, "loss": 0.0626, "theoretical_loss": 3.3838376717878984, "tokens_seen": 2626289664 }, { "epoch": 0.59, "objective/train/advantage_avg": 0.0009033459355123341, "objective/train/docs_used": 954702, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3542100191116333, "objective/train/original_loss": 1.3542096614837646, "objective/train/theoretical_loss": 3.383824446040319, "objective/train/tokens_used": 996945376, "objective/train/value_avg": -0.011627197265625, "objective/train/value_loss": 0.0008022652473300695, "objective/train/value_max": -5.7816505432128906e-05, "objective/train/value_min": -0.98046875, "objective/train/value_reward_corr": 0.6925519587202, "objective/train/value_std": 0.0287628173828125, "objective/train/weight_avg": 1.0012701749801636, "objective/train/weighted_lm_loss": 1.354827642440796, "objective/train/weights_max": 2.0230166912078857, "objective/train/weights_min": 0.3687090575695038, "theoretical_loss": 3.383824446040319, "tokens_seen": 2626420736 }, { "epoch": 0.59, "learning_rate": 0.00020617026398138491, "loss": 0.0653, "theoretical_loss": 3.383824446040319, "tokens_seen": 2626420736 }, { "epoch": 0.59, "learning_rate": 0.00020613014522988043, "loss": 0.0622, "theoretical_loss": 3.383811221137555, "tokens_seen": 2626551808 }, { "epoch": 0.59, "learning_rate": 0.000206090026478376, "loss": 0.0676, "theoretical_loss": 3.383797997079511, "tokens_seen": 2626682880 }, { "epoch": 0.59, "learning_rate": 0.00020604990772687154, "loss": 0.0668, "theoretical_loss": 3.38378477386609, "tokens_seen": 2626813952 }, { "epoch": 0.59, "learning_rate": 0.0002060097889753671, "loss": 0.066, "theoretical_loss": 3.383771551497196, "tokens_seen": 2626945024 }, { "epoch": 0.59, "learning_rate": 0.00020596967022386265, "loss": 0.0642, "theoretical_loss": 3.3837583299727343, "tokens_seen": 2627076096 }, { "epoch": 0.59, "learning_rate": 0.00020592955147235816, "loss": 0.0647, "theoretical_loss": 3.383745109292607, "tokens_seen": 2627207168 }, { "epoch": 0.59, "learning_rate": 0.00020588943272085373, "loss": 0.0645, "theoretical_loss": 3.3837318894567194, "tokens_seen": 2627338240 }, { "epoch": 0.59, "learning_rate": 0.00020584931396934927, "loss": 0.0684, "theoretical_loss": 3.3837186704649747, "tokens_seen": 2627469312 }, { "epoch": 0.59, "learning_rate": 0.00020580919521784484, "loss": 0.0661, "theoretical_loss": 3.3837054523172774, "tokens_seen": 2627600384 }, { "epoch": 0.59, "learning_rate": 0.00020576907646634038, "loss": 0.0704, "theoretical_loss": 3.383692235013531, "tokens_seen": 2627731456 }, { "epoch": 0.59, "learning_rate": 0.0002057289577148359, "loss": 0.0666, "theoretical_loss": 3.38367901855364, "tokens_seen": 2627862528 }, { "epoch": 0.59, "learning_rate": 0.00020568883896333146, "loss": 0.065, "theoretical_loss": 3.3836658029375077, "tokens_seen": 2627993600 }, { "epoch": 0.59, "learning_rate": 0.000205648720211827, "loss": 0.066, "theoretical_loss": 3.383652588165039, "tokens_seen": 2628124672 }, { "epoch": 0.59, "learning_rate": 0.00020560860146032257, "loss": 0.0641, "theoretical_loss": 3.3836393742361377, "tokens_seen": 2628255744 }, { "epoch": 0.59, "learning_rate": 0.00020556848270881812, "loss": 0.068, "theoretical_loss": 3.3836261611507084, "tokens_seen": 2628386816 }, { "epoch": 0.59, "learning_rate": 0.00020552836395731363, "loss": 0.0642, "theoretical_loss": 3.3836129489086537, "tokens_seen": 2628517888 }, { "epoch": 0.59, "learning_rate": 0.0002054882452058092, "loss": 0.0659, "theoretical_loss": 3.383599737509879, "tokens_seen": 2628648960 }, { "epoch": 0.59, "learning_rate": 0.00020544812645430474, "loss": 0.0702, "theoretical_loss": 3.3835865269542884, "tokens_seen": 2628780032 }, { "epoch": 0.59, "learning_rate": 0.0002054080077028003, "loss": 0.0673, "theoretical_loss": 3.383573317241785, "tokens_seen": 2628911104 }, { "epoch": 0.59, "learning_rate": 0.00020536788895129585, "loss": 0.0701, "theoretical_loss": 3.3835601083722744, "tokens_seen": 2629042176 }, { "epoch": 0.59, "learning_rate": 0.00020532777019979136, "loss": 0.0644, "theoretical_loss": 3.3835469003456597, "tokens_seen": 2629173248 }, { "epoch": 0.59, "learning_rate": 0.00020528765144828693, "loss": 0.0667, "theoretical_loss": 3.3835336931618456, "tokens_seen": 2629304320 }, { "epoch": 0.59, "learning_rate": 0.00020524753269678247, "loss": 0.0682, "theoretical_loss": 3.3835204868207365, "tokens_seen": 2629435392 }, { "epoch": 0.59, "learning_rate": 0.00020520741394527804, "loss": 0.068, "theoretical_loss": 3.3835072813222355, "tokens_seen": 2629566464 }, { "epoch": 0.59, "objective/train/advantage_avg": 0.00028478691820055246, "objective/train/docs_used": 955785, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2914948463439941, "objective/train/original_loss": 1.2914949655532837, "objective/train/theoretical_loss": 3.3834940766662482, "objective/train/tokens_used": 1000222176, "objective/train/value_avg": -0.005176544189453125, "objective/train/value_loss": 0.0001989422453334555, "objective/train/value_max": -3.737211227416992e-05, "objective/train/value_min": -0.1951904296875, "objective/train/value_reward_corr": 0.5059389864979877, "objective/train/value_std": 0.00858306884765625, "objective/train/weight_avg": 1.0003677606582642, "objective/train/weighted_lm_loss": 1.2915055751800537, "objective/train/weights_max": 1.1908681392669678, "objective/train/weights_min": 0.3682919144630432, "theoretical_loss": 3.3834940766662482, "tokens_seen": 2629697536 }, { "epoch": 0.59, "learning_rate": 0.00020516729519377358, "loss": 0.0642, "theoretical_loss": 3.3834940766662482, "tokens_seen": 2629697536 }, { "epoch": 0.59, "learning_rate": 0.0002051271764422691, "loss": 0.0661, "theoretical_loss": 3.3834808728526786, "tokens_seen": 2629828608 }, { "epoch": 0.59, "learning_rate": 0.00020508705769076466, "loss": 0.065, "theoretical_loss": 3.3834676698814303, "tokens_seen": 2629959680 }, { "epoch": 0.59, "learning_rate": 0.0002050469389392602, "loss": 0.0666, "theoretical_loss": 3.383454467752408, "tokens_seen": 2630090752 }, { "epoch": 0.59, "learning_rate": 0.00020500682018775577, "loss": 0.0667, "theoretical_loss": 3.383441266465516, "tokens_seen": 2630221824 }, { "epoch": 0.59, "learning_rate": 0.00020496670143625132, "loss": 0.0657, "theoretical_loss": 3.3834280660206586, "tokens_seen": 2630352896 }, { "epoch": 0.59, "learning_rate": 0.00020492658268474683, "loss": 0.0637, "theoretical_loss": 3.38341486641774, "tokens_seen": 2630483968 }, { "epoch": 0.59, "learning_rate": 0.0002048864639332424, "loss": 0.0638, "theoretical_loss": 3.3834016676566647, "tokens_seen": 2630615040 }, { "epoch": 0.59, "learning_rate": 0.00020484634518173794, "loss": 0.0654, "theoretical_loss": 3.383388469737337, "tokens_seen": 2630746112 }, { "epoch": 0.59, "learning_rate": 0.0002048062264302335, "loss": 0.0649, "theoretical_loss": 3.383375272659662, "tokens_seen": 2630877184 }, { "epoch": 0.59, "learning_rate": 0.00020476610767872905, "loss": 0.0606, "theoretical_loss": 3.3833620764235426, "tokens_seen": 2631008256 }, { "epoch": 0.59, "learning_rate": 0.00020472598892722456, "loss": 0.0647, "theoretical_loss": 3.3833488810288843, "tokens_seen": 2631139328 }, { "epoch": 0.59, "learning_rate": 0.00020468587017572013, "loss": 0.0665, "theoretical_loss": 3.3833356864755917, "tokens_seen": 2631270400 }, { "epoch": 0.59, "learning_rate": 0.00020464575142421567, "loss": 0.0635, "theoretical_loss": 3.3833224927635683, "tokens_seen": 2631401472 }, { "epoch": 0.59, "learning_rate": 0.00020460563267271124, "loss": 0.0645, "theoretical_loss": 3.3833092998927197, "tokens_seen": 2631532544 }, { "epoch": 0.59, "learning_rate": 0.00020456551392120678, "loss": 0.0645, "theoretical_loss": 3.3832961078629493, "tokens_seen": 2631663616 }, { "epoch": 0.6, "learning_rate": 0.00020452539516970232, "loss": 0.0663, "theoretical_loss": 3.383282916674162, "tokens_seen": 2631794688 }, { "epoch": 0.6, "learning_rate": 0.00020448527641819787, "loss": 0.0669, "theoretical_loss": 3.3832697263262625, "tokens_seen": 2631925760 }, { "epoch": 0.6, "learning_rate": 0.0002044451576666934, "loss": 0.0634, "theoretical_loss": 3.383256536819155, "tokens_seen": 2632056832 }, { "epoch": 0.6, "learning_rate": 0.00020440503891518898, "loss": 0.0659, "theoretical_loss": 3.3832433481527446, "tokens_seen": 2632187904 }, { "epoch": 0.6, "learning_rate": 0.00020436492016368452, "loss": 0.0648, "theoretical_loss": 3.3832301603269355, "tokens_seen": 2632318976 }, { "epoch": 0.6, "learning_rate": 0.00020432480141218006, "loss": 0.0638, "theoretical_loss": 3.383216973341632, "tokens_seen": 2632450048 }, { "epoch": 0.6, "learning_rate": 0.0002042846826606756, "loss": 0.0681, "theoretical_loss": 3.383203787196739, "tokens_seen": 2632581120 }, { "epoch": 0.6, "learning_rate": 0.00020424456390917114, "loss": 0.0639, "theoretical_loss": 3.383190601892161, "tokens_seen": 2632712192 }, { "epoch": 0.6, "learning_rate": 0.0002042044451576667, "loss": 0.0643, "theoretical_loss": 3.383177417427803, "tokens_seen": 2632843264 }, { "debugging/Compilability": 0.9333333333333333, "debugging/distinct-1-grams": 0.7589863661874164, "debugging/entropy-1-grams": 5.39104271171916, "debugging/length": 456.8, "debugging/num_segments": 15, "debugging/raw_token_scores_avg": 0.017262427136301994, "debugging/raw_token_scores_std": 0.06595400720834732, "debugging/score": 0.004792635764407888, "debugging/score_std": 0.003961630245099227, "epoch": 0.6, "objective/train/advantage_avg": 0.002751849591732025, "objective/train/docs_used": 956895, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3897794485092163, "objective/train/original_loss": 1.3897793292999268, "objective/train/theoretical_loss": 3.383164233803569, "objective/train/tokens_used": 1003498976, "objective/train/value_avg": -0.02001953125, "objective/train/value_loss": 0.0003571594716049731, "objective/train/value_max": -4.792213439941406e-05, "objective/train/value_min": -0.57177734375, "objective/train/value_reward_corr": 0.9703633726173823, "objective/train/value_std": 0.07379150390625, "objective/train/weight_avg": 1.002924919128418, "objective/train/weighted_lm_loss": 1.394389271736145, "objective/train/weights_max": 1.181571125984192, "objective/train/weights_min": 0.37133094668388367, "theoretical_loss": 3.383164233803569, "tokens_seen": 2632974336 }, { "epoch": 0.6, "learning_rate": 0.00020416432640616225, "loss": 0.0677, "theoretical_loss": 3.383164233803569, "tokens_seen": 2632974336 }, { "epoch": 0.6, "learning_rate": 0.0002041242076546578, "loss": 0.0615, "theoretical_loss": 3.3831510510193636, "tokens_seen": 2633105408 }, { "epoch": 0.6, "learning_rate": 0.00020408408890315333, "loss": 0.064, "theoretical_loss": 3.3831378690750924, "tokens_seen": 2633236480 }, { "epoch": 0.6, "learning_rate": 0.00020404397015164887, "loss": 0.0631, "theoretical_loss": 3.383124687970659, "tokens_seen": 2633367552 }, { "epoch": 0.6, "learning_rate": 0.00020400385140014444, "loss": 0.0641, "theoretical_loss": 3.383111507705969, "tokens_seen": 2633498624 }, { "epoch": 0.6, "learning_rate": 0.00020396373264863998, "loss": 0.0634, "theoretical_loss": 3.3830983282809264, "tokens_seen": 2633629696 }, { "epoch": 0.6, "learning_rate": 0.00020392361389713552, "loss": 0.0644, "theoretical_loss": 3.3830851496954364, "tokens_seen": 2633760768 }, { "epoch": 0.6, "learning_rate": 0.00020388349514563107, "loss": 0.0641, "theoretical_loss": 3.383071971949404, "tokens_seen": 2633891840 }, { "epoch": 0.6, "learning_rate": 0.0002038433763941266, "loss": 0.0663, "theoretical_loss": 3.383058795042732, "tokens_seen": 2634022912 }, { "epoch": 0.6, "learning_rate": 0.00020380325764262218, "loss": 0.0657, "theoretical_loss": 3.383045618975328, "tokens_seen": 2634153984 }, { "epoch": 0.6, "learning_rate": 0.00020376313889111772, "loss": 0.0656, "theoretical_loss": 3.3830324437470956, "tokens_seen": 2634285056 }, { "epoch": 0.6, "learning_rate": 0.00020372302013961326, "loss": 0.0695, "theoretical_loss": 3.3830192693579386, "tokens_seen": 2634416128 }, { "epoch": 0.6, "learning_rate": 0.0002036829013881088, "loss": 0.0655, "theoretical_loss": 3.3830060958077635, "tokens_seen": 2634547200 }, { "epoch": 0.6, "learning_rate": 0.00020364278263660434, "loss": 0.0636, "theoretical_loss": 3.3829929230964737, "tokens_seen": 2634678272 }, { "epoch": 0.6, "learning_rate": 0.0002036026638850999, "loss": 0.0664, "theoretical_loss": 3.382979751223975, "tokens_seen": 2634809344 }, { "epoch": 0.6, "learning_rate": 0.00020356254513359545, "loss": 0.0623, "theoretical_loss": 3.382966580190172, "tokens_seen": 2634940416 }, { "epoch": 0.6, "learning_rate": 0.000203522426382091, "loss": 0.0689, "theoretical_loss": 3.3829534099949696, "tokens_seen": 2635071488 }, { "epoch": 0.6, "learning_rate": 0.00020348230763058653, "loss": 0.0617, "theoretical_loss": 3.3829402406382725, "tokens_seen": 2635202560 }, { "epoch": 0.6, "learning_rate": 0.00020344218887908207, "loss": 0.0714, "theoretical_loss": 3.382927072119986, "tokens_seen": 2635333632 }, { "epoch": 0.6, "learning_rate": 0.00020340207012757764, "loss": 0.0627, "theoretical_loss": 3.3829139044400143, "tokens_seen": 2635464704 }, { "epoch": 0.6, "learning_rate": 0.00020336195137607318, "loss": 0.0638, "theoretical_loss": 3.382900737598263, "tokens_seen": 2635595776 }, { "epoch": 0.6, "learning_rate": 0.00020332183262456873, "loss": 0.0625, "theoretical_loss": 3.382887571594637, "tokens_seen": 2635726848 }, { "epoch": 0.6, "learning_rate": 0.00020328171387306427, "loss": 0.0656, "theoretical_loss": 3.3828744064290412, "tokens_seen": 2635857920 }, { "epoch": 0.6, "learning_rate": 0.0002032415951215598, "loss": 0.0645, "theoretical_loss": 3.3828612421013804, "tokens_seen": 2635988992 }, { "epoch": 0.6, "learning_rate": 0.00020320147637005538, "loss": 0.0674, "theoretical_loss": 3.38284807861156, "tokens_seen": 2636120064 }, { "epoch": 0.6, "objective/train/advantage_avg": 0.0011783711379393935, "objective/train/docs_used": 957995, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.1515958309173584, "objective/train/original_loss": 1.151595950126648, "objective/train/theoretical_loss": 3.382834915959484, "objective/train/tokens_used": 1006775776, "objective/train/value_avg": -0.00928497314453125, "objective/train/value_loss": 0.0001814767747418955, "objective/train/value_max": -6.300210952758789e-05, "objective/train/value_min": -0.63232421875, "objective/train/value_reward_corr": 0.7432217052004202, "objective/train/value_std": 0.0160369873046875, "objective/train/weight_avg": 1.0012649297714233, "objective/train/weighted_lm_loss": 1.1525611877441406, "objective/train/weights_max": 1.2536687850952148, "objective/train/weights_min": 0.5387794971466064, "theoretical_loss": 3.382834915959484, "tokens_seen": 2636251136 }, { "epoch": 0.6, "learning_rate": 0.00020316135761855092, "loss": 0.0625, "theoretical_loss": 3.382834915959484, "tokens_seen": 2636251136 }, { "epoch": 0.6, "learning_rate": 0.00020312123886704646, "loss": 0.0663, "theoretical_loss": 3.382821754145059, "tokens_seen": 2636382208 }, { "epoch": 0.6, "learning_rate": 0.000203081120115542, "loss": 0.0653, "theoretical_loss": 3.382808593168189, "tokens_seen": 2636513280 }, { "epoch": 0.6, "learning_rate": 0.00020304100136403754, "loss": 0.0642, "theoretical_loss": 3.3827954330287797, "tokens_seen": 2636644352 }, { "epoch": 0.6, "learning_rate": 0.0002030008826125331, "loss": 0.0675, "theoretical_loss": 3.3827822737267352, "tokens_seen": 2636775424 }, { "epoch": 0.6, "learning_rate": 0.00020296076386102865, "loss": 0.0641, "theoretical_loss": 3.3827691152619614, "tokens_seen": 2636906496 }, { "epoch": 0.6, "learning_rate": 0.0002029206451095242, "loss": 0.0633, "theoretical_loss": 3.3827559576343633, "tokens_seen": 2637037568 }, { "epoch": 0.6, "learning_rate": 0.00020288052635801973, "loss": 0.0625, "theoretical_loss": 3.382742800843846, "tokens_seen": 2637168640 }, { "epoch": 0.6, "learning_rate": 0.00020284040760651527, "loss": 0.0624, "theoretical_loss": 3.3827296448903144, "tokens_seen": 2637299712 }, { "epoch": 0.6, "learning_rate": 0.00020280028885501084, "loss": 0.0614, "theoretical_loss": 3.382716489773674, "tokens_seen": 2637430784 }, { "epoch": 0.6, "learning_rate": 0.00020276017010350638, "loss": 0.0642, "theoretical_loss": 3.38270333549383, "tokens_seen": 2637561856 }, { "epoch": 0.6, "learning_rate": 0.00020272005135200193, "loss": 0.0651, "theoretical_loss": 3.3826901820506867, "tokens_seen": 2637692928 }, { "epoch": 0.6, "learning_rate": 0.00020267993260049747, "loss": 0.0628, "theoretical_loss": 3.382677029444151, "tokens_seen": 2637824000 }, { "epoch": 0.6, "learning_rate": 0.000202639813848993, "loss": 0.0633, "theoretical_loss": 3.3826638776741262, "tokens_seen": 2637955072 }, { "epoch": 0.6, "learning_rate": 0.00020259969509748858, "loss": 0.0638, "theoretical_loss": 3.3826507267405193, "tokens_seen": 2638086144 }, { "epoch": 0.6, "learning_rate": 0.00020255957634598412, "loss": 0.0678, "theoretical_loss": 3.3826375766432344, "tokens_seen": 2638217216 }, { "epoch": 0.6, "learning_rate": 0.00020251945759447966, "loss": 0.0672, "theoretical_loss": 3.382624427382177, "tokens_seen": 2638348288 }, { "epoch": 0.6, "learning_rate": 0.0002024793388429752, "loss": 0.066, "theoretical_loss": 3.3826112789572527, "tokens_seen": 2638479360 }, { "epoch": 0.6, "learning_rate": 0.00020243922009147074, "loss": 0.0634, "theoretical_loss": 3.3825981313683666, "tokens_seen": 2638610432 }, { "epoch": 0.6, "learning_rate": 0.0002023991013399663, "loss": 0.0625, "theoretical_loss": 3.382584984615424, "tokens_seen": 2638741504 }, { "epoch": 0.6, "learning_rate": 0.00020235898258846185, "loss": 0.0629, "theoretical_loss": 3.38257183869833, "tokens_seen": 2638872576 }, { "epoch": 0.6, "learning_rate": 0.00020231886383695742, "loss": 0.0622, "theoretical_loss": 3.3825586936169905, "tokens_seen": 2639003648 }, { "epoch": 0.6, "learning_rate": 0.00020227874508545293, "loss": 0.0657, "theoretical_loss": 3.3825455493713106, "tokens_seen": 2639134720 }, { "epoch": 0.6, "learning_rate": 0.00020223862633394848, "loss": 0.0644, "theoretical_loss": 3.382532405961195, "tokens_seen": 2639265792 }, { "epoch": 0.6, "learning_rate": 0.00020219850758244404, "loss": 0.0631, "theoretical_loss": 3.38251926338655, "tokens_seen": 2639396864 }, { "epoch": 0.6, "objective/train/advantage_avg": 0.0004104752151761204, "objective/train/docs_used": 959162, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.1879522800445557, "objective/train/original_loss": 1.1879521608352661, "objective/train/theoretical_loss": 3.3825061216472814, "objective/train/tokens_used": 1010052576, "objective/train/value_avg": -0.01200103759765625, "objective/train/value_loss": 0.0002243581402581185, "objective/train/value_max": -4.5418739318847656e-05, "objective/train/value_min": -0.31298828125, "objective/train/value_reward_corr": 0.8316446162861906, "objective/train/value_std": 0.025726318359375, "objective/train/weight_avg": 1.0005146265029907, "objective/train/weighted_lm_loss": 1.1891281604766846, "objective/train/weights_max": 1.1615123748779297, "objective/train/weights_min": 0.36991289258003235, "theoretical_loss": 3.3825061216472814, "tokens_seen": 2639527936 }, { "epoch": 0.6, "learning_rate": 0.00020215838883093958, "loss": 0.0638, "theoretical_loss": 3.3825061216472814, "tokens_seen": 2639527936 }, { "epoch": 0.6, "learning_rate": 0.00020211827007943515, "loss": 0.0642, "theoretical_loss": 3.382492980743293, "tokens_seen": 2639659008 }, { "epoch": 0.6, "learning_rate": 0.00020207815132793067, "loss": 0.0639, "theoretical_loss": 3.382479840674492, "tokens_seen": 2639790080 }, { "epoch": 0.6, "learning_rate": 0.00020203803257642624, "loss": 0.0632, "theoretical_loss": 3.3824667014407823, "tokens_seen": 2639921152 }, { "epoch": 0.6, "learning_rate": 0.00020199791382492178, "loss": 0.0669, "theoretical_loss": 3.3824535630420707, "tokens_seen": 2640052224 }, { "epoch": 0.6, "learning_rate": 0.00020195779507341732, "loss": 0.0652, "theoretical_loss": 3.382440425478262, "tokens_seen": 2640183296 }, { "epoch": 0.6, "learning_rate": 0.0002019176763219129, "loss": 0.063, "theoretical_loss": 3.3824272887492612, "tokens_seen": 2640314368 }, { "epoch": 0.6, "learning_rate": 0.0002018775575704084, "loss": 0.0627, "theoretical_loss": 3.3824141528549747, "tokens_seen": 2640445440 }, { "epoch": 0.6, "learning_rate": 0.00020183743881890397, "loss": 0.0632, "theoretical_loss": 3.382401017795308, "tokens_seen": 2640576512 }, { "epoch": 0.6, "learning_rate": 0.0002017973200673995, "loss": 0.068, "theoretical_loss": 3.382387883570166, "tokens_seen": 2640707584 }, { "epoch": 0.6, "learning_rate": 0.00020175720131589505, "loss": 0.0609, "theoretical_loss": 3.382374750179455, "tokens_seen": 2640838656 }, { "epoch": 0.6, "learning_rate": 0.00020171708256439062, "loss": 0.0647, "theoretical_loss": 3.38236161762308, "tokens_seen": 2640969728 }, { "epoch": 0.6, "learning_rate": 0.00020167696381288613, "loss": 0.0616, "theoretical_loss": 3.3823484859009474, "tokens_seen": 2641100800 }, { "epoch": 0.6, "learning_rate": 0.0002016368450613817, "loss": 0.0665, "theoretical_loss": 3.3823353550129616, "tokens_seen": 2641231872 }, { "epoch": 0.6, "learning_rate": 0.00020159672630987724, "loss": 0.0641, "theoretical_loss": 3.3823222249590286, "tokens_seen": 2641362944 }, { "epoch": 0.6, "learning_rate": 0.00020155660755837279, "loss": 0.0653, "theoretical_loss": 3.3823090957390547, "tokens_seen": 2641494016 }, { "epoch": 0.6, "learning_rate": 0.00020151648880686835, "loss": 0.0658, "theoretical_loss": 3.3822959673529454, "tokens_seen": 2641625088 }, { "epoch": 0.6, "learning_rate": 0.00020147637005536387, "loss": 0.0653, "theoretical_loss": 3.3822828398006055, "tokens_seen": 2641756160 }, { "epoch": 0.6, "learning_rate": 0.00020143625130385944, "loss": 0.0669, "theoretical_loss": 3.3822697130819415, "tokens_seen": 2641887232 }, { "epoch": 0.6, "learning_rate": 0.00020139613255235498, "loss": 0.0659, "theoretical_loss": 3.382256587196859, "tokens_seen": 2642018304 }, { "epoch": 0.6, "learning_rate": 0.00020135601380085052, "loss": 0.0632, "theoretical_loss": 3.3822434621452637, "tokens_seen": 2642149376 }, { "epoch": 0.6, "learning_rate": 0.0002013158950493461, "loss": 0.0636, "theoretical_loss": 3.3822303379270613, "tokens_seen": 2642280448 }, { "epoch": 0.6, "learning_rate": 0.0002012757762978416, "loss": 0.0701, "theoretical_loss": 3.382217214542157, "tokens_seen": 2642411520 }, { "epoch": 0.6, "learning_rate": 0.00020123565754633717, "loss": 0.064, "theoretical_loss": 3.3822040919904572, "tokens_seen": 2642542592 }, { "epoch": 0.6, "learning_rate": 0.0002011955387948327, "loss": 0.069, "theoretical_loss": 3.3821909702718678, "tokens_seen": 2642673664 }, { "epoch": 0.6, "objective/train/advantage_avg": 0.000574038305785507, "objective/train/docs_used": 960167, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.256800651550293, "objective/train/original_loss": 1.2568004131317139, "objective/train/theoretical_loss": 3.382177849386294, "objective/train/tokens_used": 1013329376, "objective/train/value_avg": -0.00861358642578125, "objective/train/value_loss": 8.081713895080611e-05, "objective/train/value_max": -7.140636444091797e-05, "objective/train/value_min": -0.239013671875, "objective/train/value_reward_corr": 0.8583842502159671, "objective/train/value_std": 0.0147857666015625, "objective/train/weight_avg": 1.000613808631897, "objective/train/weighted_lm_loss": 1.2574775218963623, "objective/train/weights_max": 1.0863513946533203, "objective/train/weights_min": 0.6266248226165771, "theoretical_loss": 3.382177849386294, "tokens_seen": 2642804736 }, { "epoch": 0.6, "learning_rate": 0.00020115542004332825, "loss": 0.0643, "theoretical_loss": 3.382177849386294, "tokens_seen": 2642804736 }, { "epoch": 0.6, "learning_rate": 0.00020111530129182382, "loss": 0.066, "theoretical_loss": 3.382164729333642, "tokens_seen": 2642935808 }, { "epoch": 0.6, "learning_rate": 0.00020107518254031934, "loss": 0.0634, "theoretical_loss": 3.3821516101138176, "tokens_seen": 2643066880 }, { "epoch": 0.6, "learning_rate": 0.0002010350637888149, "loss": 0.0648, "theoretical_loss": 3.3821384917267263, "tokens_seen": 2643197952 }, { "epoch": 0.6, "learning_rate": 0.00020099494503731044, "loss": 0.0629, "theoretical_loss": 3.3821253741722748, "tokens_seen": 2643329024 }, { "epoch": 0.6, "learning_rate": 0.00020095482628580599, "loss": 0.0686, "theoretical_loss": 3.382112257450368, "tokens_seen": 2643460096 }, { "epoch": 0.6, "learning_rate": 0.00020091470753430155, "loss": 0.0634, "theoretical_loss": 3.382099141560912, "tokens_seen": 2643591168 }, { "epoch": 0.6, "learning_rate": 0.00020087458878279707, "loss": 0.0647, "theoretical_loss": 3.382086026503813, "tokens_seen": 2643722240 }, { "epoch": 0.6, "learning_rate": 0.00020083447003129264, "loss": 0.0638, "theoretical_loss": 3.3820729122789768, "tokens_seen": 2643853312 }, { "epoch": 0.6, "learning_rate": 0.00020079435127978818, "loss": 0.0636, "theoretical_loss": 3.3820597988863095, "tokens_seen": 2643984384 }, { "epoch": 0.6, "learning_rate": 0.00020075423252828372, "loss": 0.0632, "theoretical_loss": 3.382046686325717, "tokens_seen": 2644115456 }, { "epoch": 0.6, "learning_rate": 0.0002007141137767793, "loss": 0.0614, "theoretical_loss": 3.382033574597105, "tokens_seen": 2644246528 }, { "epoch": 0.6, "learning_rate": 0.0002006739950252748, "loss": 0.0682, "theoretical_loss": 3.3820204637003792, "tokens_seen": 2644377600 }, { "epoch": 0.6, "learning_rate": 0.00020063387627377037, "loss": 0.0648, "theoretical_loss": 3.3820073536354465, "tokens_seen": 2644508672 }, { "epoch": 0.6, "learning_rate": 0.0002005937575222659, "loss": 0.0616, "theoretical_loss": 3.381994244402212, "tokens_seen": 2644639744 }, { "epoch": 0.6, "learning_rate": 0.00020055363877076145, "loss": 0.0624, "theoretical_loss": 3.381981136000582, "tokens_seen": 2644770816 }, { "epoch": 0.6, "learning_rate": 0.00020051352001925702, "loss": 0.0637, "theoretical_loss": 3.381968028430463, "tokens_seen": 2644901888 }, { "epoch": 0.6, "learning_rate": 0.00020047340126775254, "loss": 0.0675, "theoretical_loss": 3.3819549216917606, "tokens_seen": 2645032960 }, { "epoch": 0.6, "learning_rate": 0.0002004332825162481, "loss": 0.0613, "theoretical_loss": 3.3819418157843812, "tokens_seen": 2645164032 }, { "epoch": 0.6, "learning_rate": 0.00020039316376474365, "loss": 0.0672, "theoretical_loss": 3.3819287107082303, "tokens_seen": 2645295104 }, { "epoch": 0.6, "learning_rate": 0.0002003530450132392, "loss": 0.0668, "theoretical_loss": 3.381915606463215, "tokens_seen": 2645426176 }, { "epoch": 0.6, "learning_rate": 0.00020031292626173476, "loss": 0.0642, "theoretical_loss": 3.38190250304924, "tokens_seen": 2645557248 }, { "epoch": 0.6, "learning_rate": 0.00020027280751023027, "loss": 0.0614, "theoretical_loss": 3.3818894004662123, "tokens_seen": 2645688320 }, { "epoch": 0.6, "learning_rate": 0.00020023268875872584, "loss": 0.0626, "theoretical_loss": 3.381876298714038, "tokens_seen": 2645819392 }, { "epoch": 0.6, "learning_rate": 0.00020019257000722138, "loss": 0.0618, "theoretical_loss": 3.3818631977926237, "tokens_seen": 2645950464 }, { "epoch": 0.6, "objective/train/advantage_avg": -0.00027237573522143066, "objective/train/docs_used": 961281, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.1427478790283203, "objective/train/original_loss": 1.1427477598190308, "objective/train/theoretical_loss": 3.3818500977018746, "objective/train/tokens_used": 1016606176, "objective/train/value_avg": -0.005382537841796875, "objective/train/value_loss": 0.00020796252647414804, "objective/train/value_max": -3.534555435180664e-05, "objective/train/value_min": -0.230712890625, "objective/train/value_reward_corr": 0.5117224359506622, "objective/train/value_std": 0.0084075927734375, "objective/train/weight_avg": 0.9998123049736023, "objective/train/weighted_lm_loss": 1.1425399780273438, "objective/train/weights_max": 1.1115026473999023, "objective/train/weights_min": 0.36883002519607544, "theoretical_loss": 3.3818500977018746, "tokens_seen": 2646081536 }, { "epoch": 0.6, "learning_rate": 0.00020015245125571692, "loss": 0.0586, "theoretical_loss": 3.3818500977018746, "tokens_seen": 2646081536 }, { "epoch": 0.6, "learning_rate": 0.0002001123325042125, "loss": 0.0606, "theoretical_loss": 3.381836998441697, "tokens_seen": 2646212608 }, { "epoch": 0.6, "learning_rate": 0.000200072213752708, "loss": 0.0615, "theoretical_loss": 3.381823900011998, "tokens_seen": 2646343680 }, { "epoch": 0.6, "learning_rate": 0.00020003209500120357, "loss": 0.0636, "theoretical_loss": 3.381810802412683, "tokens_seen": 2646474752 }, { "epoch": 0.6, "learning_rate": 0.0001999919762496991, "loss": 0.0645, "theoretical_loss": 3.381797705643659, "tokens_seen": 2646605824 }, { "epoch": 0.6, "learning_rate": 0.00019995185749819465, "loss": 0.0624, "theoretical_loss": 3.381784609704831, "tokens_seen": 2646736896 }, { "epoch": 0.6, "learning_rate": 0.00019991173874669022, "loss": 0.0625, "theoretical_loss": 3.3817715145961067, "tokens_seen": 2646867968 }, { "epoch": 0.6, "learning_rate": 0.00019987161999518574, "loss": 0.0617, "theoretical_loss": 3.3817584203173916, "tokens_seen": 2646999040 }, { "epoch": 0.6, "learning_rate": 0.0001998315012436813, "loss": 0.0651, "theoretical_loss": 3.3817453268685918, "tokens_seen": 2647130112 }, { "epoch": 0.6, "learning_rate": 0.00019979138249217685, "loss": 0.0634, "theoretical_loss": 3.381732234249614, "tokens_seen": 2647261184 }, { "epoch": 0.6, "learning_rate": 0.00019975126374067241, "loss": 0.0669, "theoretical_loss": 3.3817191424603648, "tokens_seen": 2647392256 }, { "epoch": 0.6, "learning_rate": 0.00019971114498916796, "loss": 0.0646, "theoretical_loss": 3.3817060515007498, "tokens_seen": 2647523328 }, { "epoch": 0.6, "learning_rate": 0.00019967102623766347, "loss": 0.0635, "theoretical_loss": 3.3816929613706757, "tokens_seen": 2647654400 }, { "epoch": 0.6, "learning_rate": 0.00019963090748615904, "loss": 0.0617, "theoretical_loss": 3.381679872070049, "tokens_seen": 2647785472 }, { "epoch": 0.6, "learning_rate": 0.00019959078873465458, "loss": 0.0597, "theoretical_loss": 3.381666783598776, "tokens_seen": 2647916544 }, { "epoch": 0.6, "learning_rate": 0.00019955066998315015, "loss": 0.0646, "theoretical_loss": 3.3816536959567634, "tokens_seen": 2648047616 }, { "epoch": 0.6, "learning_rate": 0.0001995105512316457, "loss": 0.0598, "theoretical_loss": 3.381640609143917, "tokens_seen": 2648178688 }, { "epoch": 0.61, "learning_rate": 0.0001994704324801412, "loss": 0.0642, "theoretical_loss": 3.3816275231601436, "tokens_seen": 2648309760 }, { "epoch": 0.61, "learning_rate": 0.00019943031372863677, "loss": 0.0621, "theoretical_loss": 3.3816144380053497, "tokens_seen": 2648440832 }, { "epoch": 0.61, "learning_rate": 0.0001993901949771323, "loss": 0.0601, "theoretical_loss": 3.381601353679441, "tokens_seen": 2648571904 }, { "epoch": 0.61, "learning_rate": 0.00019935007622562788, "loss": 0.0629, "theoretical_loss": 3.3815882701823257, "tokens_seen": 2648702976 }, { "epoch": 0.61, "learning_rate": 0.00019930995747412342, "loss": 0.0677, "theoretical_loss": 3.3815751875139086, "tokens_seen": 2648834048 }, { "epoch": 0.61, "learning_rate": 0.00019926983872261894, "loss": 0.0627, "theoretical_loss": 3.3815621056740968, "tokens_seen": 2648965120 }, { "epoch": 0.61, "learning_rate": 0.0001992297199711145, "loss": 0.0587, "theoretical_loss": 3.3815490246627973, "tokens_seen": 2649096192 }, { "epoch": 0.61, "learning_rate": 0.00019918960121961005, "loss": 0.0658, "theoretical_loss": 3.381535944479916, "tokens_seen": 2649227264 }, { "epoch": 0.61, "objective/train/advantage_avg": 0.0004255092644598335, "objective/train/docs_used": 962524, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.1780362129211426, "objective/train/original_loss": 1.1780362129211426, "objective/train/theoretical_loss": 3.3815228651253593, "objective/train/tokens_used": 1019882976, "objective/train/value_avg": -0.006443023681640625, "objective/train/value_loss": 0.00012856724788434803, "objective/train/value_max": -4.166364669799805e-05, "objective/train/value_min": -0.2161865234375, "objective/train/value_reward_corr": 0.6792854567934093, "objective/train/value_std": 0.01100921630859375, "objective/train/weight_avg": 1.0004843473434448, "objective/train/weighted_lm_loss": 1.178755760192871, "objective/train/weights_max": 1.2253751754760742, "objective/train/weights_min": 0.37631624937057495, "theoretical_loss": 3.3815228651253593, "tokens_seen": 2649358336 }, { "epoch": 0.61, "learning_rate": 0.00019914948246810561, "loss": 0.0597, "theoretical_loss": 3.3815228651253593, "tokens_seen": 2649358336 }, { "epoch": 0.61, "learning_rate": 0.00019910936371660116, "loss": 0.0643, "theoretical_loss": 3.3815097865990342, "tokens_seen": 2649489408 }, { "epoch": 0.61, "learning_rate": 0.00019906924496509667, "loss": 0.0643, "theoretical_loss": 3.3814967089008476, "tokens_seen": 2649620480 }, { "epoch": 0.61, "learning_rate": 0.00019902912621359224, "loss": 0.0633, "theoretical_loss": 3.3814836320307053, "tokens_seen": 2649751552 }, { "epoch": 0.61, "learning_rate": 0.00019898900746208778, "loss": 0.0668, "theoretical_loss": 3.3814705559885145, "tokens_seen": 2649882624 }, { "epoch": 0.61, "learning_rate": 0.00019894888871058335, "loss": 0.0671, "theoretical_loss": 3.381457480774182, "tokens_seen": 2650013696 }, { "epoch": 0.61, "learning_rate": 0.0001989087699590789, "loss": 0.0649, "theoretical_loss": 3.3814444063876135, "tokens_seen": 2650144768 }, { "epoch": 0.61, "learning_rate": 0.0001988686512075744, "loss": 0.0628, "theoretical_loss": 3.3814313328287167, "tokens_seen": 2650275840 }, { "epoch": 0.61, "learning_rate": 0.00019882853245606997, "loss": 0.0628, "theoretical_loss": 3.381418260097398, "tokens_seen": 2650406912 }, { "epoch": 0.61, "learning_rate": 0.0001987884137045655, "loss": 0.0633, "theoretical_loss": 3.381405188193563, "tokens_seen": 2650537984 }, { "epoch": 0.61, "learning_rate": 0.00019874829495306108, "loss": 0.0609, "theoretical_loss": 3.3813921171171204, "tokens_seen": 2650669056 }, { "epoch": 0.61, "learning_rate": 0.00019870817620155662, "loss": 0.0647, "theoretical_loss": 3.381379046867975, "tokens_seen": 2650800128 }, { "epoch": 0.61, "learning_rate": 0.00019866805745005214, "loss": 0.0679, "theoretical_loss": 3.381365977446035, "tokens_seen": 2650931200 }, { "epoch": 0.61, "learning_rate": 0.0001986279386985477, "loss": 0.0649, "theoretical_loss": 3.381352908851206, "tokens_seen": 2651062272 }, { "epoch": 0.61, "learning_rate": 0.00019858781994704325, "loss": 0.0617, "theoretical_loss": 3.3813398410833955, "tokens_seen": 2651193344 }, { "epoch": 0.61, "learning_rate": 0.00019854770119553882, "loss": 0.0618, "theoretical_loss": 3.38132677414251, "tokens_seen": 2651324416 }, { "epoch": 0.61, "learning_rate": 0.00019850758244403436, "loss": 0.064, "theoretical_loss": 3.3813137080284568, "tokens_seen": 2651455488 }, { "epoch": 0.61, "learning_rate": 0.00019846746369252987, "loss": 0.0652, "theoretical_loss": 3.381300642741142, "tokens_seen": 2651586560 }, { "epoch": 0.61, "learning_rate": 0.00019842734494102544, "loss": 0.0624, "theoretical_loss": 3.3812875782804728, "tokens_seen": 2651717632 }, { "epoch": 0.61, "learning_rate": 0.00019838722618952098, "loss": 0.063, "theoretical_loss": 3.3812745146463556, "tokens_seen": 2651848704 }, { "epoch": 0.61, "learning_rate": 0.00019834710743801655, "loss": 0.0629, "theoretical_loss": 3.381261451838698, "tokens_seen": 2651979776 }, { "epoch": 0.61, "learning_rate": 0.0001983069886865121, "loss": 0.0651, "theoretical_loss": 3.381248389857406, "tokens_seen": 2652110848 }, { "epoch": 0.61, "learning_rate": 0.0001982668699350076, "loss": 0.0646, "theoretical_loss": 3.3812353287023877, "tokens_seen": 2652241920 }, { "epoch": 0.61, "learning_rate": 0.00019822675118350317, "loss": 0.062, "theoretical_loss": 3.381222268373549, "tokens_seen": 2652372992 }, { "epoch": 0.61, "learning_rate": 0.00019818663243199871, "loss": 0.0687, "theoretical_loss": 3.381209208870797, "tokens_seen": 2652504064 }, { "epoch": 0.61, "objective/train/advantage_avg": 0.0005185392219573259, "objective/train/docs_used": 963694, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.1010911464691162, "objective/train/original_loss": 1.1010911464691162, "objective/train/theoretical_loss": 3.3811961501940386, "objective/train/tokens_used": 1023159776, "objective/train/value_avg": -0.0068511962890625, "objective/train/value_loss": 0.0001646088348934427, "objective/train/value_max": -2.777576446533203e-05, "objective/train/value_min": -0.2264404296875, "objective/train/value_reward_corr": 0.6204666295403409, "objective/train/value_std": 0.0102691650390625, "objective/train/weight_avg": 1.0005900859832764, "objective/train/weighted_lm_loss": 1.1010620594024658, "objective/train/weights_max": 1.172405481338501, "objective/train/weights_min": 0.3681500554084778, "theoretical_loss": 3.3811961501940386, "tokens_seen": 2652635136 }, { "epoch": 0.61, "learning_rate": 0.00019814651368049428, "loss": 0.064, "theoretical_loss": 3.3811961501940386, "tokens_seen": 2652635136 }, { "epoch": 0.61, "learning_rate": 0.00019810639492898982, "loss": 0.0685, "theoretical_loss": 3.381183092343181, "tokens_seen": 2652766208 }, { "epoch": 0.61, "learning_rate": 0.00019806627617748534, "loss": 0.0626, "theoretical_loss": 3.3811700353181307, "tokens_seen": 2652897280 }, { "epoch": 0.61, "learning_rate": 0.0001980261574259809, "loss": 0.0644, "theoretical_loss": 3.3811569791187956, "tokens_seen": 2653028352 }, { "epoch": 0.61, "learning_rate": 0.00019798603867447645, "loss": 0.0624, "theoretical_loss": 3.3811439237450815, "tokens_seen": 2653159424 }, { "epoch": 0.61, "learning_rate": 0.00019794591992297202, "loss": 0.0686, "theoretical_loss": 3.3811308691968964, "tokens_seen": 2653290496 }, { "epoch": 0.61, "learning_rate": 0.00019790580117146756, "loss": 0.0654, "theoretical_loss": 3.381117815474147, "tokens_seen": 2653421568 }, { "epoch": 0.61, "learning_rate": 0.00019786568241996307, "loss": 0.0631, "theoretical_loss": 3.38110476257674, "tokens_seen": 2653552640 }, { "epoch": 0.61, "learning_rate": 0.00019782556366845864, "loss": 0.0708, "theoretical_loss": 3.381091710504583, "tokens_seen": 2653683712 }, { "epoch": 0.61, "learning_rate": 0.00019778544491695418, "loss": 0.0648, "theoretical_loss": 3.3810786592575823, "tokens_seen": 2653814784 }, { "epoch": 0.61, "learning_rate": 0.00019774532616544975, "loss": 0.0657, "theoretical_loss": 3.381065608835646, "tokens_seen": 2653945856 }, { "epoch": 0.61, "learning_rate": 0.0001977052074139453, "loss": 0.0654, "theoretical_loss": 3.38105255923868, "tokens_seen": 2654076928 }, { "epoch": 0.61, "learning_rate": 0.0001976650886624408, "loss": 0.0658, "theoretical_loss": 3.381039510466593, "tokens_seen": 2654208000 }, { "epoch": 0.61, "learning_rate": 0.00019762496991093637, "loss": 0.0669, "theoretical_loss": 3.3810264625192907, "tokens_seen": 2654339072 }, { "epoch": 0.61, "learning_rate": 0.00019758485115943191, "loss": 0.0637, "theoretical_loss": 3.3810134153966804, "tokens_seen": 2654470144 }, { "epoch": 0.61, "learning_rate": 0.00019754473240792748, "loss": 0.0691, "theoretical_loss": 3.38100036909867, "tokens_seen": 2654601216 }, { "epoch": 0.61, "learning_rate": 0.00019750461365642302, "loss": 0.0681, "theoretical_loss": 3.3809873236251664, "tokens_seen": 2654732288 }, { "epoch": 0.61, "learning_rate": 0.00019746449490491854, "loss": 0.0683, "theoretical_loss": 3.3809742789760766, "tokens_seen": 2654863360 }, { "epoch": 0.61, "learning_rate": 0.0001974243761534141, "loss": 0.0654, "theoretical_loss": 3.380961235151308, "tokens_seen": 2654994432 }, { "epoch": 0.61, "learning_rate": 0.00019738425740190965, "loss": 0.0647, "theoretical_loss": 3.3809481921507674, "tokens_seen": 2655125504 }, { "epoch": 0.61, "learning_rate": 0.00019734413865040522, "loss": 0.0639, "theoretical_loss": 3.3809351499743623, "tokens_seen": 2655256576 }, { "epoch": 0.61, "learning_rate": 0.00019730401989890076, "loss": 0.0663, "theoretical_loss": 3.380922108622, "tokens_seen": 2655387648 }, { "epoch": 0.61, "learning_rate": 0.00019726390114739627, "loss": 0.0661, "theoretical_loss": 3.3809090680935876, "tokens_seen": 2655518720 }, { "epoch": 0.61, "learning_rate": 0.00019722378239589184, "loss": 0.0638, "theoretical_loss": 3.380896028389033, "tokens_seen": 2655649792 }, { "epoch": 0.61, "learning_rate": 0.00019718366364438738, "loss": 0.0646, "theoretical_loss": 3.3808829895082426, "tokens_seen": 2655780864 }, { "epoch": 0.61, "objective/train/advantage_avg": 0.0004935257602483034, "objective/train/docs_used": 964907, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3989348411560059, "objective/train/original_loss": 1.3989349603652954, "objective/train/theoretical_loss": 3.3808699514511242, "objective/train/tokens_used": 1026436576, "objective/train/value_avg": -0.006824493408203125, "objective/train/value_loss": 0.00018413456564303488, "objective/train/value_max": -5.692243576049805e-05, "objective/train/value_min": -0.447998046875, "objective/train/value_reward_corr": 0.6445774503683082, "objective/train/value_std": 0.01100921630859375, "objective/train/weight_avg": 1.000573992729187, "objective/train/weighted_lm_loss": 1.399327039718628, "objective/train/weights_max": 1.121497392654419, "objective/train/weights_min": 0.37181854248046875, "theoretical_loss": 3.3808699514511242, "tokens_seen": 2655911936 }, { "epoch": 0.61, "learning_rate": 0.00019714354489288295, "loss": 0.0649, "theoretical_loss": 3.3808699514511242, "tokens_seen": 2655911936 }, { "epoch": 0.61, "learning_rate": 0.0001971034261413785, "loss": 0.0675, "theoretical_loss": 3.380856914217585, "tokens_seen": 2656043008 }, { "epoch": 0.61, "learning_rate": 0.00019706330738987403, "loss": 0.062, "theoretical_loss": 3.3808438778075325, "tokens_seen": 2656174080 }, { "epoch": 0.61, "learning_rate": 0.00019702318863836957, "loss": 0.0616, "theoretical_loss": 3.380830842220874, "tokens_seen": 2656305152 }, { "epoch": 0.61, "learning_rate": 0.00019698306988686512, "loss": 0.0625, "theoretical_loss": 3.3808178074575164, "tokens_seen": 2656436224 }, { "epoch": 0.61, "learning_rate": 0.00019694295113536068, "loss": 0.0691, "theoretical_loss": 3.380804773517368, "tokens_seen": 2656567296 }, { "epoch": 0.61, "learning_rate": 0.00019690283238385622, "loss": 0.0665, "theoretical_loss": 3.380791740400335, "tokens_seen": 2656698368 }, { "epoch": 0.61, "learning_rate": 0.00019686271363235177, "loss": 0.0663, "theoretical_loss": 3.380778708106326, "tokens_seen": 2656829440 }, { "epoch": 0.61, "learning_rate": 0.0001968225948808473, "loss": 0.0665, "theoretical_loss": 3.380765676635248, "tokens_seen": 2656960512 }, { "epoch": 0.61, "learning_rate": 0.00019678247612934285, "loss": 0.0625, "theoretical_loss": 3.380752645987008, "tokens_seen": 2657091584 }, { "epoch": 0.61, "learning_rate": 0.00019674235737783842, "loss": 0.0679, "theoretical_loss": 3.380739616161514, "tokens_seen": 2657222656 }, { "epoch": 0.61, "learning_rate": 0.00019670223862633396, "loss": 0.0679, "theoretical_loss": 3.3807265871586734, "tokens_seen": 2657353728 }, { "epoch": 0.61, "learning_rate": 0.0001966621198748295, "loss": 0.0652, "theoretical_loss": 3.3807135589783934, "tokens_seen": 2657484800 }, { "epoch": 0.61, "learning_rate": 0.00019662200112332504, "loss": 0.069, "theoretical_loss": 3.380700531620582, "tokens_seen": 2657615872 }, { "epoch": 0.61, "learning_rate": 0.00019658188237182058, "loss": 0.0695, "theoretical_loss": 3.3806875050851457, "tokens_seen": 2657746944 }, { "epoch": 0.61, "learning_rate": 0.00019654176362031615, "loss": 0.0645, "theoretical_loss": 3.380674479371993, "tokens_seen": 2657878016 }, { "epoch": 0.61, "learning_rate": 0.0001965016448688117, "loss": 0.0633, "theoretical_loss": 3.3806614544810314, "tokens_seen": 2658009088 }, { "epoch": 0.61, "learning_rate": 0.00019646152611730723, "loss": 0.0688, "theoretical_loss": 3.3806484304121684, "tokens_seen": 2658140160 }, { "epoch": 0.61, "learning_rate": 0.00019642140736580277, "loss": 0.0684, "theoretical_loss": 3.380635407165311, "tokens_seen": 2658271232 }, { "epoch": 0.61, "learning_rate": 0.00019638128861429832, "loss": 0.0675, "theoretical_loss": 3.3806223847403674, "tokens_seen": 2658402304 }, { "epoch": 0.61, "learning_rate": 0.00019634116986279388, "loss": 0.0682, "theoretical_loss": 3.3806093631372454, "tokens_seen": 2658533376 }, { "epoch": 0.61, "learning_rate": 0.00019630105111128943, "loss": 0.0649, "theoretical_loss": 3.380596342355852, "tokens_seen": 2658664448 }, { "epoch": 0.61, "learning_rate": 0.00019626093235978497, "loss": 0.0671, "theoretical_loss": 3.3805833223960944, "tokens_seen": 2658795520 }, { "epoch": 0.61, "learning_rate": 0.0001962208136082805, "loss": 0.0628, "theoretical_loss": 3.3805703032578815, "tokens_seen": 2658926592 }, { "epoch": 0.61, "learning_rate": 0.00019618069485677605, "loss": 0.0654, "theoretical_loss": 3.3805572849411205, "tokens_seen": 2659057664 }, { "epoch": 0.61, "objective/train/advantage_avg": 0.0008981440332718194, "objective/train/docs_used": 966064, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3902150392532349, "objective/train/original_loss": 1.3902146816253662, "objective/train/theoretical_loss": 3.380544267445719, "objective/train/tokens_used": 1029713376, "objective/train/value_avg": -0.006011962890625, "objective/train/value_loss": 0.0003463196335360408, "objective/train/value_max": -4.83393669128418e-05, "objective/train/value_min": -0.95166015625, "objective/train/value_reward_corr": 0.7616251056106927, "objective/train/value_std": 0.018157958984375, "objective/train/weight_avg": 1.0010457038879395, "objective/train/weighted_lm_loss": 1.3917675018310547, "objective/train/weights_max": 1.769683599472046, "objective/train/weights_min": 0.36875197291374207, "theoretical_loss": 3.380544267445719, "tokens_seen": 2659188736 }, { "epoch": 0.61, "learning_rate": 0.00019614057610527162, "loss": 0.0669, "theoretical_loss": 3.380544267445719, "tokens_seen": 2659188736 }, { "epoch": 0.61, "learning_rate": 0.00019610045735376716, "loss": 0.067, "theoretical_loss": 3.3805312507715843, "tokens_seen": 2659319808 }, { "epoch": 0.61, "learning_rate": 0.0001960603386022627, "loss": 0.0663, "theoretical_loss": 3.380518234918625, "tokens_seen": 2659450880 }, { "epoch": 0.61, "learning_rate": 0.00019602021985075824, "loss": 0.0645, "theoretical_loss": 3.380505219886748, "tokens_seen": 2659581952 }, { "epoch": 0.61, "learning_rate": 0.00019598010109925378, "loss": 0.0636, "theoretical_loss": 3.3804922056758615, "tokens_seen": 2659713024 }, { "epoch": 0.61, "learning_rate": 0.00019593998234774935, "loss": 0.0659, "theoretical_loss": 3.380479192285873, "tokens_seen": 2659844096 }, { "epoch": 0.61, "learning_rate": 0.0001958998635962449, "loss": 0.0674, "theoretical_loss": 3.38046617971669, "tokens_seen": 2659975168 }, { "epoch": 0.61, "learning_rate": 0.00019585974484474043, "loss": 0.0641, "theoretical_loss": 3.3804531679682213, "tokens_seen": 2660106240 }, { "epoch": 0.61, "learning_rate": 0.00019581962609323598, "loss": 0.0667, "theoretical_loss": 3.3804401570403737, "tokens_seen": 2660237312 }, { "epoch": 0.61, "learning_rate": 0.00019577950734173152, "loss": 0.065, "theoretical_loss": 3.3804271469330556, "tokens_seen": 2660368384 }, { "epoch": 0.61, "learning_rate": 0.00019573938859022708, "loss": 0.067, "theoretical_loss": 3.3804141376461745, "tokens_seen": 2660499456 }, { "epoch": 0.61, "learning_rate": 0.00019569926983872263, "loss": 0.0622, "theoretical_loss": 3.3804011291796385, "tokens_seen": 2660630528 }, { "epoch": 0.61, "learning_rate": 0.00019565915108721817, "loss": 0.0646, "theoretical_loss": 3.3803881215333553, "tokens_seen": 2660761600 }, { "epoch": 0.61, "learning_rate": 0.0001956190323357137, "loss": 0.0647, "theoretical_loss": 3.380375114707233, "tokens_seen": 2660892672 }, { "epoch": 0.61, "learning_rate": 0.00019557891358420925, "loss": 0.0661, "theoretical_loss": 3.3803621087011786, "tokens_seen": 2661023744 }, { "epoch": 0.61, "learning_rate": 0.00019553879483270482, "loss": 0.0654, "theoretical_loss": 3.3803491035151016, "tokens_seen": 2661154816 }, { "epoch": 0.61, "learning_rate": 0.00019549867608120036, "loss": 0.0652, "theoretical_loss": 3.3803360991489084, "tokens_seen": 2661285888 }, { "epoch": 0.61, "learning_rate": 0.0001954585573296959, "loss": 0.0686, "theoretical_loss": 3.380323095602508, "tokens_seen": 2661416960 }, { "epoch": 0.61, "learning_rate": 0.00019541843857819144, "loss": 0.0679, "theoretical_loss": 3.3803100928758076, "tokens_seen": 2661548032 }, { "epoch": 0.61, "learning_rate": 0.00019537831982668698, "loss": 0.0625, "theoretical_loss": 3.3802970909687153, "tokens_seen": 2661679104 }, { "epoch": 0.61, "learning_rate": 0.00019533820107518255, "loss": 0.0661, "theoretical_loss": 3.3802840898811395, "tokens_seen": 2661810176 }, { "epoch": 0.61, "learning_rate": 0.0001952980823236781, "loss": 0.068, "theoretical_loss": 3.380271089612988, "tokens_seen": 2661941248 }, { "epoch": 0.61, "learning_rate": 0.00019525796357217363, "loss": 0.0629, "theoretical_loss": 3.380258090164169, "tokens_seen": 2662072320 }, { "epoch": 0.61, "learning_rate": 0.00019521784482066918, "loss": 0.0636, "theoretical_loss": 3.38024509153459, "tokens_seen": 2662203392 }, { "epoch": 0.61, "learning_rate": 0.00019517772606916472, "loss": 0.0662, "theoretical_loss": 3.380232093724159, "tokens_seen": 2662334464 }, { "epoch": 0.61, "objective/train/advantage_avg": 0.0009510324453003705, "objective/train/docs_used": 967352, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4020981788635254, "objective/train/original_loss": 1.4020981788635254, "objective/train/theoretical_loss": 3.3802190967327848, "objective/train/tokens_used": 1032990176, "objective/train/value_avg": -0.007404327392578125, "objective/train/value_loss": 0.0002571267250459641, "objective/train/value_max": -5.519390106201172e-05, "objective/train/value_min": -0.6953125, "objective/train/value_reward_corr": 0.729641188964472, "objective/train/value_std": 0.0177001953125, "objective/train/weight_avg": 1.0010684728622437, "objective/train/weighted_lm_loss": 1.4030134677886963, "objective/train/weights_max": 1.3688416481018066, "objective/train/weights_min": 0.36978310346603394, "theoretical_loss": 3.3802190967327848, "tokens_seen": 2662465536 }, { "epoch": 0.61, "learning_rate": 0.00019513760731766029, "loss": 0.0654, "theoretical_loss": 3.3802190967327848, "tokens_seen": 2662465536 }, { "epoch": 0.61, "learning_rate": 0.00019509748856615583, "loss": 0.0663, "theoretical_loss": 3.380206100560375, "tokens_seen": 2662596608 }, { "epoch": 0.61, "learning_rate": 0.00019505736981465137, "loss": 0.0655, "theoretical_loss": 3.3801931052068377, "tokens_seen": 2662727680 }, { "epoch": 0.61, "learning_rate": 0.0001950172510631469, "loss": 0.0701, "theoretical_loss": 3.380180110672081, "tokens_seen": 2662858752 }, { "epoch": 0.61, "learning_rate": 0.00019497713231164245, "loss": 0.0671, "theoretical_loss": 3.380167116956013, "tokens_seen": 2662989824 }, { "epoch": 0.61, "learning_rate": 0.00019493701356013802, "loss": 0.0669, "theoretical_loss": 3.3801541240585418, "tokens_seen": 2663120896 }, { "epoch": 0.61, "learning_rate": 0.00019489689480863356, "loss": 0.0607, "theoretical_loss": 3.3801411319795758, "tokens_seen": 2663251968 }, { "epoch": 0.61, "learning_rate": 0.0001948567760571291, "loss": 0.0648, "theoretical_loss": 3.380128140719023, "tokens_seen": 2663383040 }, { "epoch": 0.61, "learning_rate": 0.00019481665730562464, "loss": 0.068, "theoretical_loss": 3.3801151502767914, "tokens_seen": 2663514112 }, { "epoch": 0.61, "learning_rate": 0.00019477653855412018, "loss": 0.0614, "theoretical_loss": 3.3801021606527897, "tokens_seen": 2663645184 }, { "epoch": 0.61, "learning_rate": 0.00019473641980261575, "loss": 0.0658, "theoretical_loss": 3.380089171846925, "tokens_seen": 2663776256 }, { "epoch": 0.61, "learning_rate": 0.0001946963010511113, "loss": 0.0657, "theoretical_loss": 3.380076183859107, "tokens_seen": 2663907328 }, { "epoch": 0.61, "learning_rate": 0.00019465618229960683, "loss": 0.0646, "theoretical_loss": 3.3800631966892425, "tokens_seen": 2664038400 }, { "epoch": 0.61, "learning_rate": 0.00019461606354810238, "loss": 0.063, "theoretical_loss": 3.380050210337241, "tokens_seen": 2664169472 }, { "epoch": 0.61, "learning_rate": 0.00019457594479659794, "loss": 0.0674, "theoretical_loss": 3.38003722480301, "tokens_seen": 2664300544 }, { "epoch": 0.61, "learning_rate": 0.00019453582604509349, "loss": 0.0653, "theoretical_loss": 3.380024240086458, "tokens_seen": 2664431616 }, { "epoch": 0.61, "learning_rate": 0.00019449570729358903, "loss": 0.0624, "theoretical_loss": 3.380011256187493, "tokens_seen": 2664562688 }, { "epoch": 0.61, "learning_rate": 0.00019445558854208457, "loss": 0.0667, "theoretical_loss": 3.379998273106024, "tokens_seen": 2664693760 }, { "epoch": 0.62, "learning_rate": 0.0001944154697905801, "loss": 0.0644, "theoretical_loss": 3.379985290841959, "tokens_seen": 2664824832 }, { "epoch": 0.62, "learning_rate": 0.00019437535103907568, "loss": 0.0657, "theoretical_loss": 3.379972309395206, "tokens_seen": 2664955904 }, { "epoch": 0.62, "learning_rate": 0.00019433523228757122, "loss": 0.063, "theoretical_loss": 3.3799593287656733, "tokens_seen": 2665086976 }, { "epoch": 0.62, "learning_rate": 0.00019429511353606676, "loss": 0.0666, "theoretical_loss": 3.3799463489532697, "tokens_seen": 2665218048 }, { "epoch": 0.62, "learning_rate": 0.00019425499478456233, "loss": 0.0714, "theoretical_loss": 3.3799333699579037, "tokens_seen": 2665349120 }, { "epoch": 0.62, "learning_rate": 0.00019421487603305784, "loss": 0.0629, "theoretical_loss": 3.379920391779483, "tokens_seen": 2665480192 }, { "epoch": 0.62, "learning_rate": 0.0001941747572815534, "loss": 0.0663, "theoretical_loss": 3.3799074144179166, "tokens_seen": 2665611264 }, { "epoch": 0.62, "objective/train/advantage_avg": -0.0003150572592858225, "objective/train/docs_used": 968517, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3106569051742554, "objective/train/original_loss": 1.310657024383545, "objective/train/theoretical_loss": 3.379894437873113, "objective/train/tokens_used": 1036266976, "objective/train/value_avg": -0.006114959716796875, "objective/train/value_loss": 0.00015085435006767511, "objective/train/value_max": -4.6133995056152344e-05, "objective/train/value_min": -0.358154296875, "objective/train/value_reward_corr": 0.7377772095007864, "objective/train/value_std": 0.01290130615234375, "objective/train/weight_avg": 0.9997545480728149, "objective/train/weighted_lm_loss": 1.3095524311065674, "objective/train/weights_max": 1.1575963497161865, "objective/train/weights_min": 0.3701203465461731, "theoretical_loss": 3.379894437873113, "tokens_seen": 2665742336 }, { "epoch": 0.62, "learning_rate": 0.00019413463853004895, "loss": 0.0625, "theoretical_loss": 3.379894437873113, "tokens_seen": 2665742336 }, { "epoch": 0.62, "learning_rate": 0.0001940945197785445, "loss": 0.0634, "theoretical_loss": 3.37988146214498, "tokens_seen": 2665873408 }, { "epoch": 0.62, "learning_rate": 0.00019405440102704006, "loss": 0.0675, "theoretical_loss": 3.3798684872334266, "tokens_seen": 2666004480 }, { "epoch": 0.62, "learning_rate": 0.00019401428227553558, "loss": 0.0648, "theoretical_loss": 3.379855513138361, "tokens_seen": 2666135552 }, { "epoch": 0.62, "learning_rate": 0.00019397416352403115, "loss": 0.0659, "theoretical_loss": 3.379842539859692, "tokens_seen": 2666266624 }, { "epoch": 0.62, "learning_rate": 0.00019393404477252669, "loss": 0.0668, "theoretical_loss": 3.379829567397328, "tokens_seen": 2666397696 }, { "epoch": 0.62, "learning_rate": 0.00019389392602102223, "loss": 0.066, "theoretical_loss": 3.379816595751177, "tokens_seen": 2666528768 }, { "epoch": 0.62, "learning_rate": 0.0001938538072695178, "loss": 0.0607, "theoretical_loss": 3.3798036249211485, "tokens_seen": 2666659840 }, { "epoch": 0.62, "learning_rate": 0.0001938136885180133, "loss": 0.0684, "theoretical_loss": 3.3797906549071506, "tokens_seen": 2666790912 }, { "epoch": 0.62, "learning_rate": 0.00019377356976650888, "loss": 0.0681, "theoretical_loss": 3.379777685709091, "tokens_seen": 2666921984 }, { "epoch": 0.62, "learning_rate": 0.00019373345101500442, "loss": 0.0639, "theoretical_loss": 3.3797647173268794, "tokens_seen": 2667053056 }, { "epoch": 0.62, "learning_rate": 0.00019369333226349996, "loss": 0.0657, "theoretical_loss": 3.379751749760424, "tokens_seen": 2667184128 }, { "epoch": 0.62, "learning_rate": 0.00019365321351199553, "loss": 0.0647, "theoretical_loss": 3.3797387830096337, "tokens_seen": 2667315200 }, { "epoch": 0.62, "learning_rate": 0.00019361309476049104, "loss": 0.0675, "theoretical_loss": 3.379725817074416, "tokens_seen": 2667446272 }, { "epoch": 0.62, "learning_rate": 0.0001935729760089866, "loss": 0.0646, "theoretical_loss": 3.379712851954681, "tokens_seen": 2667577344 }, { "epoch": 0.62, "learning_rate": 0.00019353285725748215, "loss": 0.0669, "theoretical_loss": 3.379699887650337, "tokens_seen": 2667708416 }, { "epoch": 0.62, "learning_rate": 0.0001934927385059777, "loss": 0.0622, "theoretical_loss": 3.3796869241612915, "tokens_seen": 2667839488 }, { "epoch": 0.62, "learning_rate": 0.00019345261975447326, "loss": 0.0628, "theoretical_loss": 3.3796739614874545, "tokens_seen": 2667970560 }, { "epoch": 0.62, "learning_rate": 0.00019341250100296878, "loss": 0.0613, "theoretical_loss": 3.379660999628734, "tokens_seen": 2668101632 }, { "epoch": 0.62, "learning_rate": 0.00019337238225146435, "loss": 0.0611, "theoretical_loss": 3.379648038585039, "tokens_seen": 2668232704 }, { "epoch": 0.62, "learning_rate": 0.0001933322634999599, "loss": 0.0653, "theoretical_loss": 3.3796350783562783, "tokens_seen": 2668363776 }, { "epoch": 0.62, "learning_rate": 0.00019329214474845543, "loss": 0.0688, "theoretical_loss": 3.3796221189423603, "tokens_seen": 2668494848 }, { "epoch": 0.62, "learning_rate": 0.000193252025996951, "loss": 0.0605, "theoretical_loss": 3.379609160343194, "tokens_seen": 2668625920 }, { "epoch": 0.62, "learning_rate": 0.0001932119072454465, "loss": 0.0662, "theoretical_loss": 3.3795962025586883, "tokens_seen": 2668756992 }, { "epoch": 0.62, "learning_rate": 0.00019317178849394208, "loss": 0.0641, "theoretical_loss": 3.3795832455887513, "tokens_seen": 2668888064 }, { "epoch": 0.62, "objective/train/advantage_avg": 8.085087756626308e-05, "objective/train/docs_used": 969669, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2883126735687256, "objective/train/original_loss": 1.2883124351501465, "objective/train/theoretical_loss": 3.3795702894332926, "objective/train/tokens_used": 1039543776, "objective/train/value_avg": -0.005352020263671875, "objective/train/value_loss": 6.927490176167339e-05, "objective/train/value_max": -4.3332576751708984e-05, "objective/train/value_min": -0.2384033203125, "objective/train/value_reward_corr": 0.787673952907743, "objective/train/value_std": 0.0103759765625, "objective/train/weight_avg": 1.000114917755127, "objective/train/weighted_lm_loss": 1.288221001625061, "objective/train/weights_max": 1.1297645568847656, "objective/train/weights_min": 0.7916809916496277, "theoretical_loss": 3.3795702894332926, "tokens_seen": 2669019136 }, { "epoch": 0.62, "learning_rate": 0.00019313166974243762, "loss": 0.0636, "theoretical_loss": 3.3795702894332926, "tokens_seen": 2669019136 }, { "epoch": 0.62, "learning_rate": 0.00019309155099093316, "loss": 0.066, "theoretical_loss": 3.3795573340922207, "tokens_seen": 2669150208 }, { "epoch": 0.62, "learning_rate": 0.00019305143223942873, "loss": 0.0667, "theoretical_loss": 3.379544379565444, "tokens_seen": 2669281280 }, { "epoch": 0.62, "learning_rate": 0.00019301131348792424, "loss": 0.0608, "theoretical_loss": 3.379531425852872, "tokens_seen": 2669412352 }, { "epoch": 0.62, "learning_rate": 0.0001929711947364198, "loss": 0.0634, "theoretical_loss": 3.3795184729544134, "tokens_seen": 2669543424 }, { "epoch": 0.62, "learning_rate": 0.00019293107598491535, "loss": 0.0651, "theoretical_loss": 3.379505520869977, "tokens_seen": 2669674496 }, { "epoch": 0.62, "learning_rate": 0.0001928909572334109, "loss": 0.0654, "theoretical_loss": 3.3794925695994715, "tokens_seen": 2669805568 }, { "epoch": 0.62, "learning_rate": 0.00019285083848190646, "loss": 0.0636, "theoretical_loss": 3.379479619142806, "tokens_seen": 2669936640 }, { "epoch": 0.62, "learning_rate": 0.00019281071973040198, "loss": 0.063, "theoretical_loss": 3.379466669499889, "tokens_seen": 2670067712 }, { "epoch": 0.62, "learning_rate": 0.00019277060097889755, "loss": 0.0611, "theoretical_loss": 3.3794537206706297, "tokens_seen": 2670198784 }, { "epoch": 0.62, "learning_rate": 0.0001927304822273931, "loss": 0.0621, "theoretical_loss": 3.3794407726549376, "tokens_seen": 2670329856 }, { "epoch": 0.62, "learning_rate": 0.00019269036347588863, "loss": 0.0602, "theoretical_loss": 3.379427825452721, "tokens_seen": 2670460928 }, { "epoch": 0.62, "learning_rate": 0.0001926502447243842, "loss": 0.0657, "theoretical_loss": 3.379414879063889, "tokens_seen": 2670592000 }, { "epoch": 0.62, "learning_rate": 0.0001926101259728797, "loss": 0.066, "theoretical_loss": 3.3794019334883503, "tokens_seen": 2670723072 }, { "epoch": 0.62, "learning_rate": 0.00019257000722137528, "loss": 0.0629, "theoretical_loss": 3.3793889887260145, "tokens_seen": 2670854144 }, { "epoch": 0.62, "learning_rate": 0.00019252988846987082, "loss": 0.0639, "theoretical_loss": 3.3793760447767904, "tokens_seen": 2670985216 }, { "epoch": 0.62, "learning_rate": 0.00019248976971836636, "loss": 0.0665, "theoretical_loss": 3.3793631016405867, "tokens_seen": 2671116288 }, { "epoch": 0.62, "learning_rate": 0.00019244965096686193, "loss": 0.0653, "theoretical_loss": 3.3793501593173128, "tokens_seen": 2671247360 }, { "epoch": 0.62, "learning_rate": 0.00019240953221535744, "loss": 0.062, "theoretical_loss": 3.379337217806878, "tokens_seen": 2671378432 }, { "epoch": 0.62, "learning_rate": 0.000192369413463853, "loss": 0.0652, "theoretical_loss": 3.3793242771091903, "tokens_seen": 2671509504 }, { "epoch": 0.62, "learning_rate": 0.00019232929471234855, "loss": 0.0628, "theoretical_loss": 3.37931133722416, "tokens_seen": 2671640576 }, { "epoch": 0.62, "learning_rate": 0.0001922891759608441, "loss": 0.0632, "theoretical_loss": 3.3792983981516955, "tokens_seen": 2671771648 }, { "epoch": 0.62, "learning_rate": 0.00019224905720933966, "loss": 0.0664, "theoretical_loss": 3.3792854598917064, "tokens_seen": 2671902720 }, { "epoch": 0.62, "learning_rate": 0.00019220893845783518, "loss": 0.0631, "theoretical_loss": 3.379272522444101, "tokens_seen": 2672033792 }, { "epoch": 0.62, "learning_rate": 0.00019216881970633075, "loss": 0.0658, "theoretical_loss": 3.3792595858087893, "tokens_seen": 2672164864 }, { "epoch": 0.62, "objective/train/advantage_avg": 0.00014021573588252068, "objective/train/docs_used": 970931, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3969523906707764, "objective/train/original_loss": 1.3969523906707764, "objective/train/theoretical_loss": 3.3792466499856806, "objective/train/tokens_used": 1042820576, "objective/train/value_avg": -0.0079193115234375, "objective/train/value_loss": 0.0002478777023497969, "objective/train/value_max": -3.147125244140625e-05, "objective/train/value_min": -0.95361328125, "objective/train/value_reward_corr": 0.7539731849483176, "objective/train/value_std": 0.0177764892578125, "objective/train/weight_avg": 1.0002477169036865, "objective/train/weighted_lm_loss": 1.3972065448760986, "objective/train/weights_max": 1.4282433986663818, "objective/train/weights_min": 0.3710364103317261, "theoretical_loss": 3.3792466499856806, "tokens_seen": 2672295936 }, { "epoch": 0.62, "learning_rate": 0.0001921287009548263, "loss": 0.0662, "theoretical_loss": 3.3792466499856806, "tokens_seen": 2672295936 }, { "epoch": 0.62, "learning_rate": 0.00019208858220332186, "loss": 0.0619, "theoretical_loss": 3.379233714974683, "tokens_seen": 2672427008 }, { "epoch": 0.62, "learning_rate": 0.0001920484634518174, "loss": 0.0653, "theoretical_loss": 3.3792207807757064, "tokens_seen": 2672558080 }, { "epoch": 0.62, "learning_rate": 0.0001920083447003129, "loss": 0.0659, "theoretical_loss": 3.37920784738866, "tokens_seen": 2672689152 }, { "epoch": 0.62, "learning_rate": 0.00019196822594880848, "loss": 0.0669, "theoretical_loss": 3.379194914813453, "tokens_seen": 2672820224 }, { "epoch": 0.62, "learning_rate": 0.00019192810719730402, "loss": 0.0672, "theoretical_loss": 3.3791819830499943, "tokens_seen": 2672951296 }, { "epoch": 0.62, "learning_rate": 0.0001918879884457996, "loss": 0.0674, "theoretical_loss": 3.379169052098194, "tokens_seen": 2673082368 }, { "epoch": 0.62, "learning_rate": 0.00019184786969429513, "loss": 0.0653, "theoretical_loss": 3.3791561219579602, "tokens_seen": 2673213440 }, { "epoch": 0.62, "learning_rate": 0.00019180775094279065, "loss": 0.0657, "theoretical_loss": 3.3791431926292033, "tokens_seen": 2673344512 }, { "epoch": 0.62, "learning_rate": 0.00019176763219128621, "loss": 0.0702, "theoretical_loss": 3.3791302641118315, "tokens_seen": 2673475584 }, { "epoch": 0.62, "learning_rate": 0.00019172751343978176, "loss": 0.0621, "theoretical_loss": 3.3791173364057547, "tokens_seen": 2673606656 }, { "epoch": 0.62, "learning_rate": 0.00019168739468827732, "loss": 0.0588, "theoretical_loss": 3.3791044095108824, "tokens_seen": 2673737728 }, { "epoch": 0.62, "learning_rate": 0.00019164727593677286, "loss": 0.0663, "theoretical_loss": 3.3790914834271235, "tokens_seen": 2673868800 }, { "epoch": 0.62, "learning_rate": 0.00019160715718526838, "loss": 0.0656, "theoretical_loss": 3.379078558154388, "tokens_seen": 2673999872 }, { "epoch": 0.62, "learning_rate": 0.00019156703843376395, "loss": 0.0661, "theoretical_loss": 3.3790656336925844, "tokens_seen": 2674130944 }, { "epoch": 0.62, "learning_rate": 0.0001915269196822595, "loss": 0.0663, "theoretical_loss": 3.3790527100416226, "tokens_seen": 2674262016 }, { "epoch": 0.62, "learning_rate": 0.00019148680093075506, "loss": 0.0666, "theoretical_loss": 3.3790397872014117, "tokens_seen": 2674393088 }, { "epoch": 0.62, "learning_rate": 0.0001914466821792506, "loss": 0.0655, "theoretical_loss": 3.3790268651718613, "tokens_seen": 2674524160 }, { "epoch": 0.62, "learning_rate": 0.0001914065634277461, "loss": 0.0604, "theoretical_loss": 3.379013943952881, "tokens_seen": 2674655232 }, { "epoch": 0.62, "learning_rate": 0.00019136644467624168, "loss": 0.0635, "theoretical_loss": 3.37900102354438, "tokens_seen": 2674786304 }, { "epoch": 0.62, "learning_rate": 0.00019132632592473722, "loss": 0.0634, "theoretical_loss": 3.3789881039462677, "tokens_seen": 2674917376 }, { "epoch": 0.62, "learning_rate": 0.0001912862071732328, "loss": 0.0638, "theoretical_loss": 3.378975185158454, "tokens_seen": 2675048448 }, { "epoch": 0.62, "learning_rate": 0.00019124608842172833, "loss": 0.0621, "theoretical_loss": 3.3789622671808477, "tokens_seen": 2675179520 }, { "epoch": 0.62, "learning_rate": 0.00019120596967022385, "loss": 0.0599, "theoretical_loss": 3.3789493500133587, "tokens_seen": 2675310592 }, { "epoch": 0.62, "learning_rate": 0.00019116585091871941, "loss": 0.0667, "theoretical_loss": 3.3789364336558965, "tokens_seen": 2675441664 }, { "epoch": 0.62, "objective/train/advantage_avg": 0.0002896097139455378, "objective/train/docs_used": 972181, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.186660885810852, "objective/train/original_loss": 1.1866607666015625, "objective/train/theoretical_loss": 3.3789235181083708, "objective/train/tokens_used": 1046097376, "objective/train/value_avg": -0.00577545166015625, "objective/train/value_loss": 0.00032728290534578264, "objective/train/value_max": -2.5272369384765625e-05, "objective/train/value_min": -0.60693359375, "objective/train/value_reward_corr": 0.6638889198872486, "objective/train/value_std": 0.01361083984375, "objective/train/weight_avg": 1.0004241466522217, "objective/train/weighted_lm_loss": 1.1869040727615356, "objective/train/weights_max": 1.6303558349609375, "objective/train/weights_min": 0.22904279828071594, "theoretical_loss": 3.3789235181083708, "tokens_seen": 2675572736 }, { "epoch": 0.62, "learning_rate": 0.00019112573216721496, "loss": 0.0623, "theoretical_loss": 3.3789235181083708, "tokens_seen": 2675572736 }, { "epoch": 0.62, "learning_rate": 0.00019108561341571052, "loss": 0.0614, "theoretical_loss": 3.3789106033706906, "tokens_seen": 2675703808 }, { "epoch": 0.62, "learning_rate": 0.00019104549466420607, "loss": 0.0671, "theoretical_loss": 3.378897689442766, "tokens_seen": 2675834880 }, { "epoch": 0.62, "learning_rate": 0.00019100537591270158, "loss": 0.0643, "theoretical_loss": 3.378884776324506, "tokens_seen": 2675965952 }, { "epoch": 0.62, "learning_rate": 0.00019096525716119715, "loss": 0.0641, "theoretical_loss": 3.378871864015821, "tokens_seen": 2676097024 }, { "epoch": 0.62, "learning_rate": 0.0001909251384096927, "loss": 0.0637, "theoretical_loss": 3.3788589525166195, "tokens_seen": 2676228096 }, { "epoch": 0.62, "learning_rate": 0.00019088501965818826, "loss": 0.065, "theoretical_loss": 3.378846041826812, "tokens_seen": 2676359168 }, { "epoch": 0.62, "learning_rate": 0.0001908449009066838, "loss": 0.0634, "theoretical_loss": 3.378833131946308, "tokens_seen": 2676490240 }, { "epoch": 0.62, "learning_rate": 0.0001908047821551793, "loss": 0.0658, "theoretical_loss": 3.378820222875017, "tokens_seen": 2676621312 }, { "epoch": 0.62, "learning_rate": 0.00019076466340367488, "loss": 0.0658, "theoretical_loss": 3.378807314612848, "tokens_seen": 2676752384 }, { "epoch": 0.62, "learning_rate": 0.00019072454465217042, "loss": 0.0646, "theoretical_loss": 3.378794407159712, "tokens_seen": 2676883456 }, { "epoch": 0.62, "learning_rate": 0.000190684425900666, "loss": 0.0629, "theoretical_loss": 3.378781500515518, "tokens_seen": 2677014528 }, { "epoch": 0.62, "learning_rate": 0.00019064430714916153, "loss": 0.0604, "theoretical_loss": 3.3787685946801753, "tokens_seen": 2677145600 }, { "epoch": 0.62, "learning_rate": 0.00019060418839765705, "loss": 0.066, "theoretical_loss": 3.3787556896535937, "tokens_seen": 2677276672 }, { "epoch": 0.62, "learning_rate": 0.00019056406964615261, "loss": 0.0624, "theoretical_loss": 3.378742785435684, "tokens_seen": 2677407744 }, { "epoch": 0.62, "learning_rate": 0.00019052395089464816, "loss": 0.0664, "theoretical_loss": 3.378729882026355, "tokens_seen": 2677538816 }, { "epoch": 0.62, "learning_rate": 0.00019048383214314372, "loss": 0.0633, "theoretical_loss": 3.3787169794255165, "tokens_seen": 2677669888 }, { "epoch": 0.62, "learning_rate": 0.00019044371339163927, "loss": 0.0591, "theoretical_loss": 3.3787040776330786, "tokens_seen": 2677800960 }, { "epoch": 0.62, "learning_rate": 0.00019040359464013478, "loss": 0.0631, "theoretical_loss": 3.3786911766489505, "tokens_seen": 2677932032 }, { "epoch": 0.62, "learning_rate": 0.00019036347588863035, "loss": 0.0669, "theoretical_loss": 3.3786782764730425, "tokens_seen": 2678063104 }, { "epoch": 0.62, "learning_rate": 0.0001903233571371259, "loss": 0.064, "theoretical_loss": 3.378665377105264, "tokens_seen": 2678194176 }, { "epoch": 0.62, "learning_rate": 0.00019028323838562146, "loss": 0.0665, "theoretical_loss": 3.3786524785455256, "tokens_seen": 2678325248 }, { "epoch": 0.62, "learning_rate": 0.000190243119634117, "loss": 0.063, "theoretical_loss": 3.378639580793737, "tokens_seen": 2678456320 }, { "epoch": 0.62, "learning_rate": 0.00019020300088261251, "loss": 0.0638, "theoretical_loss": 3.378626683849807, "tokens_seen": 2678587392 }, { "epoch": 0.62, "learning_rate": 0.00019016288213110808, "loss": 0.0659, "theoretical_loss": 3.3786137877136464, "tokens_seen": 2678718464 }, { "epoch": 0.62, "objective/train/advantage_avg": 0.0007648494793102145, "objective/train/docs_used": 973380, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2527201175689697, "objective/train/original_loss": 1.2527201175689697, "objective/train/theoretical_loss": 3.378600892385165, "objective/train/tokens_used": 1049374176, "objective/train/value_avg": -0.00516510009765625, "objective/train/value_loss": 6.165447848616168e-05, "objective/train/value_max": -6.502866744995117e-05, "objective/train/value_min": -0.321533203125, "objective/train/value_reward_corr": 0.7437751096086855, "objective/train/value_std": 0.008819580078125, "objective/train/weight_avg": 1.000795602798462, "objective/train/weighted_lm_loss": 1.2538611888885498, "objective/train/weights_max": 1.1400858163833618, "objective/train/weights_min": 0.8976178765296936, "theoretical_loss": 3.378600892385165, "tokens_seen": 2678849536 }, { "epoch": 0.62, "learning_rate": 0.00019012276337960362, "loss": 0.0665, "theoretical_loss": 3.378600892385165, "tokens_seen": 2678849536 }, { "epoch": 0.62, "learning_rate": 0.0001900826446280992, "loss": 0.0635, "theoretical_loss": 3.3785879978642726, "tokens_seen": 2678980608 }, { "epoch": 0.62, "learning_rate": 0.00019004252587659473, "loss": 0.0613, "theoretical_loss": 3.378575104150879, "tokens_seen": 2679111680 }, { "epoch": 0.62, "learning_rate": 0.00019000240712509025, "loss": 0.0662, "theoretical_loss": 3.378562211244894, "tokens_seen": 2679242752 }, { "epoch": 0.62, "learning_rate": 0.00018996228837358582, "loss": 0.0646, "theoretical_loss": 3.378549319146228, "tokens_seen": 2679373824 }, { "epoch": 0.62, "learning_rate": 0.00018992216962208136, "loss": 0.0669, "theoretical_loss": 3.378536427854791, "tokens_seen": 2679504896 }, { "epoch": 0.62, "learning_rate": 0.00018988205087057693, "loss": 0.067, "theoretical_loss": 3.3785235373704925, "tokens_seen": 2679635968 }, { "epoch": 0.62, "learning_rate": 0.00018984193211907247, "loss": 0.0647, "theoretical_loss": 3.3785106476932425, "tokens_seen": 2679767040 }, { "epoch": 0.62, "learning_rate": 0.00018980181336756798, "loss": 0.0657, "theoretical_loss": 3.3784977588229514, "tokens_seen": 2679898112 }, { "epoch": 0.62, "learning_rate": 0.00018976169461606355, "loss": 0.0653, "theoretical_loss": 3.378484870759529, "tokens_seen": 2680029184 }, { "epoch": 0.62, "learning_rate": 0.0001897215758645591, "loss": 0.0668, "theoretical_loss": 3.378471983502885, "tokens_seen": 2680160256 }, { "epoch": 0.62, "learning_rate": 0.00018968145711305466, "loss": 0.0661, "theoretical_loss": 3.3784590970529305, "tokens_seen": 2680291328 }, { "epoch": 0.62, "learning_rate": 0.0001896413383615502, "loss": 0.0628, "theoretical_loss": 3.378446211409574, "tokens_seen": 2680422400 }, { "epoch": 0.62, "learning_rate": 0.00018960121961004571, "loss": 0.0672, "theoretical_loss": 3.3784333265727273, "tokens_seen": 2680553472 }, { "epoch": 0.62, "learning_rate": 0.00018956110085854128, "loss": 0.0626, "theoretical_loss": 3.378420442542299, "tokens_seen": 2680684544 }, { "epoch": 0.62, "learning_rate": 0.00018952098210703682, "loss": 0.0622, "theoretical_loss": 3.3784075593182, "tokens_seen": 2680815616 }, { "epoch": 0.62, "learning_rate": 0.0001894808633555324, "loss": 0.0609, "theoretical_loss": 3.37839467690034, "tokens_seen": 2680946688 }, { "epoch": 0.62, "learning_rate": 0.00018944074460402793, "loss": 0.0643, "theoretical_loss": 3.3783817952886293, "tokens_seen": 2681077760 }, { "epoch": 0.62, "learning_rate": 0.00018940062585252347, "loss": 0.0628, "theoretical_loss": 3.3783689144829783, "tokens_seen": 2681208832 }, { "epoch": 0.63, "learning_rate": 0.00018936050710101902, "loss": 0.0666, "theoretical_loss": 3.3783560344832972, "tokens_seen": 2681339904 }, { "epoch": 0.63, "learning_rate": 0.00018932038834951456, "loss": 0.0652, "theoretical_loss": 3.3783431552894956, "tokens_seen": 2681470976 }, { "epoch": 0.63, "learning_rate": 0.00018928026959801013, "loss": 0.0661, "theoretical_loss": 3.378330276901484, "tokens_seen": 2681602048 }, { "epoch": 0.63, "learning_rate": 0.00018924015084650567, "loss": 0.0663, "theoretical_loss": 3.378317399319173, "tokens_seen": 2681733120 }, { "epoch": 0.63, "learning_rate": 0.0001892000320950012, "loss": 0.0643, "theoretical_loss": 3.3783045225424724, "tokens_seen": 2681864192 }, { "epoch": 0.63, "learning_rate": 0.00018915991334349675, "loss": 0.0682, "theoretical_loss": 3.378291646571292, "tokens_seen": 2681995264 }, { "epoch": 0.63, "objective/train/advantage_avg": 0.00024343299446627498, "objective/train/docs_used": 974516, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3272182941436768, "objective/train/original_loss": 1.3272182941436768, "objective/train/theoretical_loss": 3.378278771405543, "objective/train/tokens_used": 1052650976, "objective/train/value_avg": -0.007663726806640625, "objective/train/value_loss": 0.00023275704006664455, "objective/train/value_max": -2.4318695068359375e-05, "objective/train/value_min": -0.623046875, "objective/train/value_reward_corr": 0.7018084422852744, "objective/train/value_std": 0.013885498046875, "objective/train/weight_avg": 1.000346302986145, "objective/train/weighted_lm_loss": 1.3273406028747559, "objective/train/weights_max": 1.2213431596755981, "objective/train/weights_min": 0.3685617744922638, "theoretical_loss": 3.378278771405543, "tokens_seen": 2682126336 }, { "epoch": 0.63, "learning_rate": 0.0001891197945919923, "loss": 0.0683, "theoretical_loss": 3.378278771405543, "tokens_seen": 2682126336 }, { "epoch": 0.63, "learning_rate": 0.00018907967584048786, "loss": 0.0692, "theoretical_loss": 3.378265897045135, "tokens_seen": 2682257408 }, { "epoch": 0.63, "learning_rate": 0.0001890395570889834, "loss": 0.0642, "theoretical_loss": 3.3782530234899784, "tokens_seen": 2682388480 }, { "epoch": 0.63, "learning_rate": 0.00018899943833747894, "loss": 0.0616, "theoretical_loss": 3.3782401507399835, "tokens_seen": 2682519552 }, { "epoch": 0.63, "learning_rate": 0.00018895931958597448, "loss": 0.0635, "theoretical_loss": 3.3782272787950607, "tokens_seen": 2682650624 }, { "epoch": 0.63, "learning_rate": 0.00018891920083447002, "loss": 0.0657, "theoretical_loss": 3.3782144076551206, "tokens_seen": 2682781696 }, { "epoch": 0.63, "learning_rate": 0.0001888790820829656, "loss": 0.0638, "theoretical_loss": 3.3782015373200727, "tokens_seen": 2682912768 }, { "epoch": 0.63, "learning_rate": 0.00018883896333146113, "loss": 0.0678, "theoretical_loss": 3.3781886677898285, "tokens_seen": 2683043840 }, { "epoch": 0.63, "learning_rate": 0.00018879884457995668, "loss": 0.0645, "theoretical_loss": 3.378175799064297, "tokens_seen": 2683174912 }, { "epoch": 0.63, "learning_rate": 0.00018875872582845222, "loss": 0.0656, "theoretical_loss": 3.3781629311433896, "tokens_seen": 2683305984 }, { "epoch": 0.63, "learning_rate": 0.00018871860707694776, "loss": 0.0669, "theoretical_loss": 3.378150064027017, "tokens_seen": 2683437056 }, { "epoch": 0.63, "learning_rate": 0.00018867848832544333, "loss": 0.0638, "theoretical_loss": 3.378137197715088, "tokens_seen": 2683568128 }, { "epoch": 0.63, "learning_rate": 0.00018863836957393887, "loss": 0.0626, "theoretical_loss": 3.3781243322075145, "tokens_seen": 2683699200 }, { "epoch": 0.63, "learning_rate": 0.0001885982508224344, "loss": 0.0645, "theoretical_loss": 3.3781114675042065, "tokens_seen": 2683830272 }, { "epoch": 0.63, "learning_rate": 0.00018855813207092995, "loss": 0.0666, "theoretical_loss": 3.3780986036050744, "tokens_seen": 2683961344 }, { "epoch": 0.63, "learning_rate": 0.0001885180133194255, "loss": 0.0687, "theoretical_loss": 3.3780857405100284, "tokens_seen": 2684092416 }, { "epoch": 0.63, "learning_rate": 0.00018847789456792106, "loss": 0.0636, "theoretical_loss": 3.3780728782189793, "tokens_seen": 2684223488 }, { "epoch": 0.63, "learning_rate": 0.0001884377758164166, "loss": 0.0674, "theoretical_loss": 3.3780600167318378, "tokens_seen": 2684354560 }, { "epoch": 0.63, "learning_rate": 0.00018839765706491214, "loss": 0.0646, "theoretical_loss": 3.3780471560485137, "tokens_seen": 2684485632 }, { "epoch": 0.63, "learning_rate": 0.00018835753831340768, "loss": 0.0666, "theoretical_loss": 3.3780342961689183, "tokens_seen": 2684616704 }, { "epoch": 0.63, "learning_rate": 0.00018831741956190322, "loss": 0.0691, "theoretical_loss": 3.3780214370929613, "tokens_seen": 2684747776 }, { "epoch": 0.63, "learning_rate": 0.0001882773008103988, "loss": 0.0629, "theoretical_loss": 3.3780085788205545, "tokens_seen": 2684878848 }, { "epoch": 0.63, "learning_rate": 0.00018823718205889433, "loss": 0.0655, "theoretical_loss": 3.377995721351607, "tokens_seen": 2685009920 }, { "epoch": 0.63, "learning_rate": 0.00018819706330738988, "loss": 0.0656, "theoretical_loss": 3.3779828646860306, "tokens_seen": 2685140992 }, { "epoch": 0.63, "learning_rate": 0.00018815694455588542, "loss": 0.064, "theoretical_loss": 3.377970008823735, "tokens_seen": 2685272064 }, { "epoch": 0.63, "objective/train/advantage_avg": 0.00033941451692953706, "objective/train/docs_used": 975809, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.315285086631775, "objective/train/original_loss": 1.3152847290039062, "objective/train/theoretical_loss": 3.377957153764631, "objective/train/tokens_used": 1055927776, "objective/train/value_avg": -0.01020050048828125, "objective/train/value_loss": 0.0005690144607797265, "objective/train/value_max": -4.3332576751708984e-05, "objective/train/value_min": -0.98193359375, "objective/train/value_reward_corr": 0.749427991853407, "objective/train/value_std": 0.0279388427734375, "objective/train/weight_avg": 1.000599980354309, "objective/train/weighted_lm_loss": 1.3158401250839233, "objective/train/weights_max": 2.4378421306610107, "objective/train/weights_min": 0.36930587887763977, "theoretical_loss": 3.377957153764631, "tokens_seen": 2685403136 }, { "epoch": 0.63, "learning_rate": 0.00018811682580438096, "loss": 0.0672, "theoretical_loss": 3.377957153764631, "tokens_seen": 2685403136 }, { "epoch": 0.63, "learning_rate": 0.00018807670705287653, "loss": 0.0655, "theoretical_loss": 3.3779442995086297, "tokens_seen": 2685534208 }, { "epoch": 0.63, "learning_rate": 0.00018803658830137207, "loss": 0.0686, "theoretical_loss": 3.3779314460556416, "tokens_seen": 2685665280 }, { "epoch": 0.63, "learning_rate": 0.0001879964695498676, "loss": 0.0612, "theoretical_loss": 3.377918593405577, "tokens_seen": 2685796352 }, { "epoch": 0.63, "learning_rate": 0.00018795635079836315, "loss": 0.0644, "theoretical_loss": 3.3779057415583464, "tokens_seen": 2685927424 }, { "epoch": 0.63, "learning_rate": 0.0001879162320468587, "loss": 0.0649, "theoretical_loss": 3.377892890513861, "tokens_seen": 2686058496 }, { "epoch": 0.63, "learning_rate": 0.00018787611329535426, "loss": 0.0636, "theoretical_loss": 3.3778800402720313, "tokens_seen": 2686189568 }, { "epoch": 0.63, "learning_rate": 0.0001878359945438498, "loss": 0.066, "theoretical_loss": 3.3778671908327684, "tokens_seen": 2686320640 }, { "epoch": 0.63, "learning_rate": 0.00018779587579234534, "loss": 0.0652, "theoretical_loss": 3.377854342195982, "tokens_seen": 2686451712 }, { "epoch": 0.63, "learning_rate": 0.00018775575704084088, "loss": 0.0654, "theoretical_loss": 3.3778414943615838, "tokens_seen": 2686582784 }, { "epoch": 0.63, "learning_rate": 0.00018771563828933643, "loss": 0.0691, "theoretical_loss": 3.3778286473294843, "tokens_seen": 2686713856 }, { "epoch": 0.63, "learning_rate": 0.000187675519537832, "loss": 0.0652, "theoretical_loss": 3.377815801099594, "tokens_seen": 2686844928 }, { "epoch": 0.63, "learning_rate": 0.00018763540078632754, "loss": 0.0666, "theoretical_loss": 3.377802955671824, "tokens_seen": 2686976000 }, { "epoch": 0.63, "learning_rate": 0.00018759528203482308, "loss": 0.0666, "theoretical_loss": 3.3777901110460844, "tokens_seen": 2687107072 }, { "epoch": 0.63, "learning_rate": 0.00018755516328331862, "loss": 0.0644, "theoretical_loss": 3.377777267222287, "tokens_seen": 2687238144 }, { "epoch": 0.63, "learning_rate": 0.00018751504453181416, "loss": 0.0651, "theoretical_loss": 3.3777644242003424, "tokens_seen": 2687369216 }, { "epoch": 0.63, "learning_rate": 0.00018747492578030973, "loss": 0.0623, "theoretical_loss": 3.377751581980161, "tokens_seen": 2687500288 }, { "epoch": 0.63, "learning_rate": 0.00018743480702880527, "loss": 0.0683, "theoretical_loss": 3.3777387405616537, "tokens_seen": 2687631360 }, { "epoch": 0.63, "learning_rate": 0.0001873946882773008, "loss": 0.0663, "theoretical_loss": 3.3777258999447315, "tokens_seen": 2687762432 }, { "epoch": 0.63, "learning_rate": 0.00018735456952579635, "loss": 0.0654, "theoretical_loss": 3.3777130601293055, "tokens_seen": 2687893504 }, { "epoch": 0.63, "learning_rate": 0.0001873144507742919, "loss": 0.0642, "theoretical_loss": 3.377700221115286, "tokens_seen": 2688024576 }, { "epoch": 0.63, "learning_rate": 0.00018727433202278746, "loss": 0.0681, "theoretical_loss": 3.3776873829025846, "tokens_seen": 2688155648 }, { "epoch": 0.63, "learning_rate": 0.000187234213271283, "loss": 0.0643, "theoretical_loss": 3.3776745454911117, "tokens_seen": 2688286720 }, { "epoch": 0.63, "learning_rate": 0.00018719409451977854, "loss": 0.0661, "theoretical_loss": 3.3776617088807788, "tokens_seen": 2688417792 }, { "epoch": 0.63, "learning_rate": 0.00018715397576827408, "loss": 0.0651, "theoretical_loss": 3.377648873071496, "tokens_seen": 2688548864 }, { "epoch": 0.63, "objective/train/advantage_avg": 0.0005628898506984115, "objective/train/docs_used": 976981, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3696991205215454, "objective/train/original_loss": 1.3696991205215454, "objective/train/theoretical_loss": 3.377636038063175, "objective/train/tokens_used": 1059204576, "objective/train/value_avg": -0.005584716796875, "objective/train/value_loss": 0.0001737498096190393, "objective/train/value_max": -4.649162292480469e-05, "objective/train/value_min": -0.3447265625, "objective/train/value_reward_corr": 0.6800126520918235, "objective/train/value_std": 0.01206207275390625, "objective/train/weight_avg": 1.000638484954834, "objective/train/weighted_lm_loss": 1.3709291219711304, "objective/train/weights_max": 1.20888352394104, "objective/train/weights_min": 0.3681375980377197, "theoretical_loss": 3.377636038063175, "tokens_seen": 2688679936 }, { "epoch": 0.63, "learning_rate": 0.00018711385701676965, "loss": 0.0689, "theoretical_loss": 3.377636038063175, "tokens_seen": 2688679936 }, { "epoch": 0.63, "learning_rate": 0.0001870737382652652, "loss": 0.0625, "theoretical_loss": 3.377623203855727, "tokens_seen": 2688811008 }, { "epoch": 0.63, "learning_rate": 0.00018703361951376074, "loss": 0.0632, "theoretical_loss": 3.377610370449062, "tokens_seen": 2688942080 }, { "epoch": 0.63, "learning_rate": 0.00018699350076225628, "loss": 0.0646, "theoretical_loss": 3.3775975378430916, "tokens_seen": 2689073152 }, { "epoch": 0.63, "learning_rate": 0.00018695338201075182, "loss": 0.0646, "theoretical_loss": 3.3775847060377266, "tokens_seen": 2689204224 }, { "epoch": 0.63, "learning_rate": 0.0001869132632592474, "loss": 0.0665, "theoretical_loss": 3.3775718750328783, "tokens_seen": 2689335296 }, { "epoch": 0.63, "learning_rate": 0.00018687314450774293, "loss": 0.0636, "theoretical_loss": 3.3775590448284576, "tokens_seen": 2689466368 }, { "epoch": 0.63, "learning_rate": 0.00018683302575623847, "loss": 0.0655, "theoretical_loss": 3.3775462154243754, "tokens_seen": 2689597440 }, { "epoch": 0.63, "learning_rate": 0.000186792907004734, "loss": 0.0638, "theoretical_loss": 3.3775333868205433, "tokens_seen": 2689728512 }, { "epoch": 0.63, "learning_rate": 0.00018675278825322955, "loss": 0.0627, "theoretical_loss": 3.3775205590168715, "tokens_seen": 2689859584 }, { "epoch": 0.63, "learning_rate": 0.00018671266950172512, "loss": 0.0637, "theoretical_loss": 3.3775077320132723, "tokens_seen": 2689990656 }, { "epoch": 0.63, "learning_rate": 0.00018667255075022066, "loss": 0.0688, "theoretical_loss": 3.377494905809656, "tokens_seen": 2690121728 }, { "epoch": 0.63, "learning_rate": 0.0001866324319987162, "loss": 0.0656, "theoretical_loss": 3.377482080405934, "tokens_seen": 2690252800 }, { "epoch": 0.63, "learning_rate": 0.00018659231324721174, "loss": 0.066, "theoretical_loss": 3.377469255802017, "tokens_seen": 2690383872 }, { "epoch": 0.63, "learning_rate": 0.00018655219449570729, "loss": 0.0611, "theoretical_loss": 3.3774564319978166, "tokens_seen": 2690514944 }, { "epoch": 0.63, "learning_rate": 0.00018651207574420285, "loss": 0.0603, "theoretical_loss": 3.3774436089932443, "tokens_seen": 2690646016 }, { "epoch": 0.63, "learning_rate": 0.0001864719569926984, "loss": 0.0689, "theoretical_loss": 3.3774307867882105, "tokens_seen": 2690777088 }, { "epoch": 0.63, "learning_rate": 0.00018643183824119394, "loss": 0.0672, "theoretical_loss": 3.3774179653826266, "tokens_seen": 2690908160 }, { "epoch": 0.63, "learning_rate": 0.0001863917194896895, "loss": 0.0637, "theoretical_loss": 3.3774051447764046, "tokens_seen": 2691039232 }, { "epoch": 0.63, "learning_rate": 0.00018635160073818502, "loss": 0.0656, "theoretical_loss": 3.3773923249694544, "tokens_seen": 2691170304 }, { "epoch": 0.63, "learning_rate": 0.0001863114819866806, "loss": 0.069, "theoretical_loss": 3.3773795059616885, "tokens_seen": 2691301376 }, { "epoch": 0.63, "learning_rate": 0.00018627136323517613, "loss": 0.0666, "theoretical_loss": 3.3773666877530175, "tokens_seen": 2691432448 }, { "epoch": 0.63, "learning_rate": 0.00018623124448367167, "loss": 0.0634, "theoretical_loss": 3.377353870343353, "tokens_seen": 2691563520 }, { "epoch": 0.63, "learning_rate": 0.00018619112573216724, "loss": 0.067, "theoretical_loss": 3.377341053732606, "tokens_seen": 2691694592 }, { "epoch": 0.63, "learning_rate": 0.00018615100698066275, "loss": 0.0606, "theoretical_loss": 3.377328237920688, "tokens_seen": 2691825664 }, { "epoch": 0.63, "objective/train/advantage_avg": 0.0014841936063021421, "objective/train/docs_used": 978239, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2203998565673828, "objective/train/original_loss": 1.2203996181488037, "objective/train/theoretical_loss": 3.37731542290751, "objective/train/tokens_used": 1062481376, "objective/train/value_avg": -0.008636474609375, "objective/train/value_loss": 0.00020507659064605832, "objective/train/value_max": -2.8848648071289062e-05, "objective/train/value_min": -0.456298828125, "objective/train/value_reward_corr": 0.696050105618611, "objective/train/value_std": 0.0152740478515625, "objective/train/weight_avg": 1.0015755891799927, "objective/train/weighted_lm_loss": 1.2221832275390625, "objective/train/weights_max": 1.5782219171524048, "objective/train/weights_min": 0.37886282801628113, "theoretical_loss": 3.37731542290751, "tokens_seen": 2691956736 }, { "epoch": 0.63, "learning_rate": 0.00018611088822915832, "loss": 0.0616, "theoretical_loss": 3.37731542290751, "tokens_seen": 2691956736 }, { "epoch": 0.63, "learning_rate": 0.00018607076947765386, "loss": 0.068, "theoretical_loss": 3.3773026086929834, "tokens_seen": 2692087808 }, { "epoch": 0.63, "learning_rate": 0.0001860306507261494, "loss": 0.0662, "theoretical_loss": 3.37728979527702, "tokens_seen": 2692218880 }, { "epoch": 0.63, "learning_rate": 0.00018599053197464497, "loss": 0.0647, "theoretical_loss": 3.377276982659531, "tokens_seen": 2692349952 }, { "epoch": 0.63, "learning_rate": 0.00018595041322314049, "loss": 0.0653, "theoretical_loss": 3.3772641708404274, "tokens_seen": 2692481024 }, { "epoch": 0.63, "learning_rate": 0.00018591029447163605, "loss": 0.0627, "theoretical_loss": 3.377251359819621, "tokens_seen": 2692612096 }, { "epoch": 0.63, "learning_rate": 0.0001858701757201316, "loss": 0.0648, "theoretical_loss": 3.3772385495970227, "tokens_seen": 2692743168 }, { "epoch": 0.63, "learning_rate": 0.00018583005696862714, "loss": 0.0661, "theoretical_loss": 3.3772257401725447, "tokens_seen": 2692874240 }, { "epoch": 0.63, "learning_rate": 0.0001857899382171227, "loss": 0.0662, "theoretical_loss": 3.3772129315460977, "tokens_seen": 2693005312 }, { "epoch": 0.63, "learning_rate": 0.00018574981946561822, "loss": 0.0657, "theoretical_loss": 3.3772001237175933, "tokens_seen": 2693136384 }, { "epoch": 0.63, "learning_rate": 0.0001857097007141138, "loss": 0.0678, "theoretical_loss": 3.3771873166869435, "tokens_seen": 2693267456 }, { "epoch": 0.63, "learning_rate": 0.00018566958196260933, "loss": 0.0642, "theoretical_loss": 3.3771745104540587, "tokens_seen": 2693398528 }, { "epoch": 0.63, "learning_rate": 0.00018562946321110487, "loss": 0.0629, "theoretical_loss": 3.377161705018852, "tokens_seen": 2693529600 }, { "epoch": 0.63, "learning_rate": 0.00018558934445960044, "loss": 0.0651, "theoretical_loss": 3.377148900381233, "tokens_seen": 2693660672 }, { "epoch": 0.63, "learning_rate": 0.00018554922570809595, "loss": 0.0662, "theoretical_loss": 3.377136096541115, "tokens_seen": 2693791744 }, { "epoch": 0.63, "learning_rate": 0.00018550910695659152, "loss": 0.0639, "theoretical_loss": 3.377123293498408, "tokens_seen": 2693922816 }, { "epoch": 0.63, "learning_rate": 0.00018546898820508706, "loss": 0.0691, "theoretical_loss": 3.377110491253024, "tokens_seen": 2694053888 }, { "epoch": 0.63, "learning_rate": 0.0001854288694535826, "loss": 0.0639, "theoretical_loss": 3.3770976898048755, "tokens_seen": 2694184960 }, { "epoch": 0.63, "learning_rate": 0.00018538875070207817, "loss": 0.0648, "theoretical_loss": 3.377084889153873, "tokens_seen": 2694316032 }, { "epoch": 0.63, "learning_rate": 0.00018534863195057369, "loss": 0.0662, "theoretical_loss": 3.377072089299929, "tokens_seen": 2694447104 }, { "epoch": 0.63, "learning_rate": 0.00018530851319906925, "loss": 0.0606, "theoretical_loss": 3.3770592902429537, "tokens_seen": 2694578176 }, { "epoch": 0.63, "learning_rate": 0.0001852683944475648, "loss": 0.0657, "theoretical_loss": 3.37704649198286, "tokens_seen": 2694709248 }, { "epoch": 0.63, "learning_rate": 0.00018522827569606034, "loss": 0.0643, "theoretical_loss": 3.377033694519559, "tokens_seen": 2694840320 }, { "epoch": 0.63, "learning_rate": 0.0001851881569445559, "loss": 0.0619, "theoretical_loss": 3.3770208978529626, "tokens_seen": 2694971392 }, { "epoch": 0.63, "learning_rate": 0.00018514803819305142, "loss": 0.0642, "theoretical_loss": 3.377008101982982, "tokens_seen": 2695102464 }, { "epoch": 0.63, "objective/train/advantage_avg": -0.0001955348125193268, "objective/train/docs_used": 979344, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.440197467803955, "objective/train/original_loss": 1.4401977062225342, "objective/train/theoretical_loss": 3.3769953069095298, "objective/train/tokens_used": 1065758176, "objective/train/value_avg": -0.004871368408203125, "objective/train/value_loss": 0.0001874204317573458, "objective/train/value_max": -5.066394805908203e-05, "objective/train/value_min": -0.364013671875, "objective/train/value_reward_corr": 0.7222013307325299, "objective/train/value_std": 0.01168060302734375, "objective/train/weight_avg": 0.9998871684074402, "objective/train/weighted_lm_loss": 1.4403132200241089, "objective/train/weights_max": 1.1472055912017822, "objective/train/weights_min": 0.40593984723091125, "theoretical_loss": 3.3769953069095298, "tokens_seen": 2695233536 }, { "epoch": 0.63, "learning_rate": 0.000185107919441547, "loss": 0.0657, "theoretical_loss": 3.3769953069095298, "tokens_seen": 2695233536 }, { "epoch": 0.63, "learning_rate": 0.00018506780069004253, "loss": 0.0655, "theoretical_loss": 3.3769825126325164, "tokens_seen": 2695364608 }, { "epoch": 0.63, "learning_rate": 0.00018502768193853807, "loss": 0.0672, "theoretical_loss": 3.3769697191518544, "tokens_seen": 2695495680 }, { "epoch": 0.63, "learning_rate": 0.00018498756318703364, "loss": 0.0629, "theoretical_loss": 3.376956926467455, "tokens_seen": 2695626752 }, { "epoch": 0.63, "learning_rate": 0.00018494744443552915, "loss": 0.066, "theoretical_loss": 3.3769441345792304, "tokens_seen": 2695757824 }, { "epoch": 0.63, "learning_rate": 0.00018490732568402472, "loss": 0.0683, "theoretical_loss": 3.376931343487092, "tokens_seen": 2695888896 }, { "epoch": 0.63, "learning_rate": 0.00018486720693252026, "loss": 0.0639, "theoretical_loss": 3.376918553190952, "tokens_seen": 2696019968 }, { "epoch": 0.63, "learning_rate": 0.0001848270881810158, "loss": 0.0652, "theoretical_loss": 3.3769057636907216, "tokens_seen": 2696151040 }, { "epoch": 0.63, "learning_rate": 0.00018478696942951137, "loss": 0.0642, "theoretical_loss": 3.376892974986313, "tokens_seen": 2696282112 }, { "epoch": 0.63, "learning_rate": 0.0001847468506780069, "loss": 0.0668, "theoretical_loss": 3.3768801870776377, "tokens_seen": 2696413184 }, { "epoch": 0.63, "learning_rate": 0.00018470673192650246, "loss": 0.0629, "theoretical_loss": 3.3768673999646075, "tokens_seen": 2696544256 }, { "epoch": 0.63, "learning_rate": 0.000184666613174998, "loss": 0.0676, "theoretical_loss": 3.3768546136471347, "tokens_seen": 2696675328 }, { "epoch": 0.63, "learning_rate": 0.00018462649442349357, "loss": 0.0634, "theoretical_loss": 3.3768418281251305, "tokens_seen": 2696806400 }, { "epoch": 0.63, "learning_rate": 0.0001845863756719891, "loss": 0.0657, "theoretical_loss": 3.376829043398507, "tokens_seen": 2696937472 }, { "epoch": 0.63, "learning_rate": 0.00018454625692048462, "loss": 0.0645, "theoretical_loss": 3.3768162594671765, "tokens_seen": 2697068544 }, { "epoch": 0.63, "learning_rate": 0.0001845061381689802, "loss": 0.064, "theoretical_loss": 3.37680347633105, "tokens_seen": 2697199616 }, { "epoch": 0.63, "learning_rate": 0.00018446601941747573, "loss": 0.0641, "theoretical_loss": 3.3767906939900403, "tokens_seen": 2697330688 }, { "epoch": 0.63, "learning_rate": 0.0001844259006659713, "loss": 0.065, "theoretical_loss": 3.3767779124440587, "tokens_seen": 2697461760 }, { "epoch": 0.63, "learning_rate": 0.00018438578191446684, "loss": 0.0633, "theoretical_loss": 3.3767651316930176, "tokens_seen": 2697592832 }, { "epoch": 0.63, "learning_rate": 0.00018434566316296235, "loss": 0.0656, "theoretical_loss": 3.3767523517368288, "tokens_seen": 2697723904 }, { "epoch": 0.64, "learning_rate": 0.00018430554441145792, "loss": 0.0642, "theoretical_loss": 3.3767395725754037, "tokens_seen": 2697854976 }, { "epoch": 0.64, "learning_rate": 0.00018426542565995346, "loss": 0.0672, "theoretical_loss": 3.376726794208655, "tokens_seen": 2697986048 }, { "epoch": 0.64, "learning_rate": 0.00018422530690844903, "loss": 0.0659, "theoretical_loss": 3.3767140166364937, "tokens_seen": 2698117120 }, { "epoch": 0.64, "learning_rate": 0.00018418518815694457, "loss": 0.0664, "theoretical_loss": 3.3767012398588334, "tokens_seen": 2698248192 }, { "epoch": 0.64, "learning_rate": 0.0001841450694054401, "loss": 0.0645, "theoretical_loss": 3.3766884638755847, "tokens_seen": 2698379264 }, { "epoch": 0.64, "objective/train/advantage_avg": -0.0006693757604807615, "objective/train/docs_used": 980656, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4677000045776367, "objective/train/original_loss": 1.4676998853683472, "objective/train/theoretical_loss": 3.3766756886866602, "objective/train/tokens_used": 1069034976, "objective/train/value_avg": -0.006984710693359375, "objective/train/value_loss": 0.0003624585224315524, "objective/train/value_max": -3.9458274841308594e-05, "objective/train/value_min": -0.9736328125, "objective/train/value_reward_corr": 0.73302792618064, "objective/train/value_std": 0.01947021484375, "objective/train/weight_avg": 0.9994949102401733, "objective/train/weighted_lm_loss": 1.467222809791565, "objective/train/weights_max": 1.8249680995941162, "objective/train/weights_min": 0.3848525285720825, "theoretical_loss": 3.3766756886866602, "tokens_seen": 2698510336 }, { "epoch": 0.64, "learning_rate": 0.00018410495065393566, "loss": 0.0676, "theoretical_loss": 3.3766756886866602, "tokens_seen": 2698510336 }, { "epoch": 0.64, "learning_rate": 0.0001840648319024312, "loss": 0.0645, "theoretical_loss": 3.376662914291972, "tokens_seen": 2698641408 }, { "epoch": 0.64, "learning_rate": 0.00018402471315092677, "loss": 0.0624, "theoretical_loss": 3.3766501406914315, "tokens_seen": 2698772480 }, { "epoch": 0.64, "learning_rate": 0.0001839845943994223, "loss": 0.0638, "theoretical_loss": 3.3766373678849515, "tokens_seen": 2698903552 }, { "epoch": 0.64, "learning_rate": 0.00018394447564791782, "loss": 0.0656, "theoretical_loss": 3.376624595872444, "tokens_seen": 2699034624 }, { "epoch": 0.64, "learning_rate": 0.0001839043568964134, "loss": 0.0626, "theoretical_loss": 3.376611824653821, "tokens_seen": 2699165696 }, { "epoch": 0.64, "learning_rate": 0.00018386423814490893, "loss": 0.067, "theoretical_loss": 3.376599054228994, "tokens_seen": 2699296768 }, { "epoch": 0.64, "learning_rate": 0.0001838241193934045, "loss": 0.0656, "theoretical_loss": 3.376586284597876, "tokens_seen": 2699427840 }, { "epoch": 0.64, "learning_rate": 0.00018378400064190004, "loss": 0.0651, "theoretical_loss": 3.3765735157603785, "tokens_seen": 2699558912 }, { "epoch": 0.64, "learning_rate": 0.00018374388189039555, "loss": 0.0664, "theoretical_loss": 3.3765607477164146, "tokens_seen": 2699689984 }, { "epoch": 0.64, "learning_rate": 0.00018370376313889112, "loss": 0.0619, "theoretical_loss": 3.376547980465895, "tokens_seen": 2699821056 }, { "epoch": 0.64, "learning_rate": 0.00018366364438738666, "loss": 0.0685, "theoretical_loss": 3.376535214008733, "tokens_seen": 2699952128 }, { "epoch": 0.64, "learning_rate": 0.00018362352563588223, "loss": 0.0657, "theoretical_loss": 3.3765224483448404, "tokens_seen": 2700083200 }, { "epoch": 0.64, "learning_rate": 0.00018358340688437777, "loss": 0.064, "theoretical_loss": 3.3765096834741297, "tokens_seen": 2700214272 }, { "epoch": 0.64, "learning_rate": 0.0001835432881328733, "loss": 0.0661, "theoretical_loss": 3.3764969193965126, "tokens_seen": 2700345344 }, { "epoch": 0.64, "learning_rate": 0.00018350316938136886, "loss": 0.0643, "theoretical_loss": 3.3764841561119017, "tokens_seen": 2700476416 }, { "epoch": 0.64, "learning_rate": 0.0001834630506298644, "loss": 0.0683, "theoretical_loss": 3.376471393620209, "tokens_seen": 2700607488 }, { "epoch": 0.64, "learning_rate": 0.00018342293187835997, "loss": 0.0648, "theoretical_loss": 3.376458631921347, "tokens_seen": 2700738560 }, { "epoch": 0.64, "learning_rate": 0.0001833828131268555, "loss": 0.0659, "theoretical_loss": 3.3764458710152283, "tokens_seen": 2700869632 }, { "epoch": 0.64, "learning_rate": 0.00018334269437535102, "loss": 0.0634, "theoretical_loss": 3.3764331109017642, "tokens_seen": 2701000704 }, { "epoch": 0.64, "learning_rate": 0.0001833025756238466, "loss": 0.0634, "theoretical_loss": 3.376420351580868, "tokens_seen": 2701131776 }, { "epoch": 0.64, "learning_rate": 0.00018326245687234213, "loss": 0.0637, "theoretical_loss": 3.376407593052451, "tokens_seen": 2701262848 }, { "epoch": 0.64, "learning_rate": 0.0001832223381208377, "loss": 0.0674, "theoretical_loss": 3.376394835316427, "tokens_seen": 2701393920 }, { "epoch": 0.64, "learning_rate": 0.00018318221936933324, "loss": 0.065, "theoretical_loss": 3.376382078372707, "tokens_seen": 2701524992 }, { "epoch": 0.64, "learning_rate": 0.00018314210061782876, "loss": 0.0666, "theoretical_loss": 3.3763693222212035, "tokens_seen": 2701656064 }, { "epoch": 0.64, "objective/train/advantage_avg": 0.0008406728156842291, "objective/train/docs_used": 981962, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.306033730506897, "objective/train/original_loss": 1.3060336112976074, "objective/train/theoretical_loss": 3.37635656686183, "objective/train/tokens_used": 1072311776, "objective/train/value_avg": -0.00922393798828125, "objective/train/value_loss": 0.00021461714641191065, "objective/train/value_max": -4.035234451293945e-05, "objective/train/value_min": -0.90234375, "objective/train/value_reward_corr": 0.7834830930182887, "objective/train/value_std": 0.0176849365234375, "objective/train/weight_avg": 1.000939965248108, "objective/train/weighted_lm_loss": 1.3067773580551147, "objective/train/weights_max": 1.2366453409194946, "objective/train/weights_min": 0.36976757645606995, "theoretical_loss": 3.37635656686183, "tokens_seen": 2701787136 }, { "epoch": 0.64, "learning_rate": 0.00018310198186632432, "loss": 0.0653, "theoretical_loss": 3.37635656686183, "tokens_seen": 2701787136 }, { "epoch": 0.64, "learning_rate": 0.00018306186311481986, "loss": 0.0646, "theoretical_loss": 3.3763438122944973, "tokens_seen": 2701918208 }, { "epoch": 0.64, "learning_rate": 0.00018302174436331543, "loss": 0.0646, "theoretical_loss": 3.376331058519119, "tokens_seen": 2702049280 }, { "epoch": 0.64, "learning_rate": 0.00018298162561181097, "loss": 0.0659, "theoretical_loss": 3.3763183055356065, "tokens_seen": 2702180352 }, { "epoch": 0.64, "learning_rate": 0.0001829415068603065, "loss": 0.0625, "theoretical_loss": 3.3763055533438733, "tokens_seen": 2702311424 }, { "epoch": 0.64, "learning_rate": 0.00018290138810880206, "loss": 0.0657, "theoretical_loss": 3.3762928019438316, "tokens_seen": 2702442496 }, { "epoch": 0.64, "learning_rate": 0.0001828612693572976, "loss": 0.0628, "theoretical_loss": 3.376280051335393, "tokens_seen": 2702573568 }, { "epoch": 0.64, "learning_rate": 0.00018282115060579317, "loss": 0.065, "theoretical_loss": 3.376267301518471, "tokens_seen": 2702704640 }, { "epoch": 0.64, "learning_rate": 0.0001827810318542887, "loss": 0.0647, "theoretical_loss": 3.376254552492978, "tokens_seen": 2702835712 }, { "epoch": 0.64, "learning_rate": 0.00018274091310278422, "loss": 0.0668, "theoretical_loss": 3.3762418042588256, "tokens_seen": 2702966784 }, { "epoch": 0.64, "learning_rate": 0.0001827007943512798, "loss": 0.0638, "theoretical_loss": 3.376229056815927, "tokens_seen": 2703097856 }, { "epoch": 0.64, "learning_rate": 0.00018266067559977533, "loss": 0.0607, "theoretical_loss": 3.3762163101641947, "tokens_seen": 2703228928 }, { "epoch": 0.64, "learning_rate": 0.0001826205568482709, "loss": 0.0652, "theoretical_loss": 3.376203564303541, "tokens_seen": 2703360000 }, { "epoch": 0.64, "learning_rate": 0.00018258043809676644, "loss": 0.0671, "theoretical_loss": 3.3761908192338788, "tokens_seen": 2703491072 }, { "epoch": 0.64, "learning_rate": 0.00018254031934526196, "loss": 0.0654, "theoretical_loss": 3.3761780749551202, "tokens_seen": 2703622144 }, { "epoch": 0.64, "learning_rate": 0.00018250020059375752, "loss": 0.0626, "theoretical_loss": 3.3761653314671785, "tokens_seen": 2703753216 }, { "epoch": 0.64, "learning_rate": 0.00018246008184225307, "loss": 0.0669, "theoretical_loss": 3.3761525887699655, "tokens_seen": 2703884288 }, { "epoch": 0.64, "learning_rate": 0.00018241996309074863, "loss": 0.0664, "theoretical_loss": 3.376139846863394, "tokens_seen": 2704015360 }, { "epoch": 0.64, "learning_rate": 0.00018237984433924418, "loss": 0.069, "theoretical_loss": 3.3761271057473765, "tokens_seen": 2704146432 }, { "epoch": 0.64, "learning_rate": 0.0001823397255877397, "loss": 0.0586, "theoretical_loss": 3.3761143654218264, "tokens_seen": 2704277504 }, { "epoch": 0.64, "learning_rate": 0.00018229960683623526, "loss": 0.0693, "theoretical_loss": 3.376101625886656, "tokens_seen": 2704408576 }, { "epoch": 0.64, "learning_rate": 0.0001822594880847308, "loss": 0.0648, "theoretical_loss": 3.3760888871417776, "tokens_seen": 2704539648 }, { "epoch": 0.64, "learning_rate": 0.00018221936933322637, "loss": 0.0654, "theoretical_loss": 3.376076149187104, "tokens_seen": 2704670720 }, { "epoch": 0.64, "learning_rate": 0.0001821792505817219, "loss": 0.0684, "theoretical_loss": 3.376063412022548, "tokens_seen": 2704801792 }, { "epoch": 0.64, "learning_rate": 0.00018213913183021742, "loss": 0.0645, "theoretical_loss": 3.376050675648022, "tokens_seen": 2704932864 }, { "epoch": 0.64, "objective/train/advantage_avg": 0.00022510254348162562, "objective/train/docs_used": 983087, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.518373727798462, "objective/train/original_loss": 1.518373727798462, "objective/train/theoretical_loss": 3.3760379400634397, "objective/train/tokens_used": 1075588576, "objective/train/value_avg": -0.00830841064453125, "objective/train/value_loss": 0.00016657948435749859, "objective/train/value_max": -2.586841583251953e-05, "objective/train/value_min": -0.296142578125, "objective/train/value_reward_corr": 0.8085070619632392, "objective/train/value_std": 0.016937255859375, "objective/train/weight_avg": 1.0003033876419067, "objective/train/weighted_lm_loss": 1.5183253288269043, "objective/train/weights_max": 1.2130224704742432, "objective/train/weights_min": 0.36935028433799744, "theoretical_loss": 3.3760379400634397, "tokens_seen": 2705063936 }, { "epoch": 0.64, "learning_rate": 0.000182099013078713, "loss": 0.0692, "theoretical_loss": 3.3760379400634397, "tokens_seen": 2705063936 }, { "epoch": 0.64, "learning_rate": 0.00018205889432720853, "loss": 0.0685, "theoretical_loss": 3.376025205268713, "tokens_seen": 2705195008 }, { "epoch": 0.64, "learning_rate": 0.0001820187755757041, "loss": 0.065, "theoretical_loss": 3.3760124712637545, "tokens_seen": 2705326080 }, { "epoch": 0.64, "learning_rate": 0.00018197865682419964, "loss": 0.0645, "theoretical_loss": 3.375999738048477, "tokens_seen": 2705457152 }, { "epoch": 0.64, "learning_rate": 0.00018193853807269518, "loss": 0.0639, "theoretical_loss": 3.375987005622794, "tokens_seen": 2705588224 }, { "epoch": 0.64, "learning_rate": 0.00018189841932119072, "loss": 0.0674, "theoretical_loss": 3.3759742739866176, "tokens_seen": 2705719296 }, { "epoch": 0.64, "learning_rate": 0.00018185830056968627, "loss": 0.0617, "theoretical_loss": 3.3759615431398613, "tokens_seen": 2705850368 }, { "epoch": 0.64, "learning_rate": 0.00018181818181818183, "loss": 0.0659, "theoretical_loss": 3.3759488130824367, "tokens_seen": 2705981440 }, { "epoch": 0.64, "learning_rate": 0.00018177806306667738, "loss": 0.0644, "theoretical_loss": 3.3759360838142576, "tokens_seen": 2706112512 }, { "epoch": 0.64, "learning_rate": 0.00018173794431517292, "loss": 0.0675, "theoretical_loss": 3.375923355335237, "tokens_seen": 2706243584 }, { "epoch": 0.64, "learning_rate": 0.00018169782556366846, "loss": 0.0647, "theoretical_loss": 3.375910627645287, "tokens_seen": 2706374656 }, { "epoch": 0.64, "learning_rate": 0.000181657706812164, "loss": 0.0632, "theoretical_loss": 3.3758979007443206, "tokens_seen": 2706505728 }, { "epoch": 0.64, "learning_rate": 0.00018161758806065957, "loss": 0.0676, "theoretical_loss": 3.375885174632251, "tokens_seen": 2706636800 }, { "epoch": 0.64, "learning_rate": 0.0001815774693091551, "loss": 0.0671, "theoretical_loss": 3.3758724493089916, "tokens_seen": 2706767872 }, { "epoch": 0.64, "learning_rate": 0.00018153735055765065, "loss": 0.0713, "theoretical_loss": 3.375859724774454, "tokens_seen": 2706898944 }, { "epoch": 0.64, "learning_rate": 0.0001814972318061462, "loss": 0.0675, "theoretical_loss": 3.3758470010285526, "tokens_seen": 2707030016 }, { "epoch": 0.64, "learning_rate": 0.00018145711305464173, "loss": 0.0689, "theoretical_loss": 3.375834278071199, "tokens_seen": 2707161088 }, { "epoch": 0.64, "learning_rate": 0.0001814169943031373, "loss": 0.0695, "theoretical_loss": 3.375821555902307, "tokens_seen": 2707292160 }, { "epoch": 0.64, "learning_rate": 0.00018137687555163284, "loss": 0.0711, "theoretical_loss": 3.375808834521789, "tokens_seen": 2707423232 }, { "epoch": 0.64, "learning_rate": 0.00018133675680012838, "loss": 0.0653, "theoretical_loss": 3.3757961139295585, "tokens_seen": 2707554304 }, { "epoch": 0.64, "learning_rate": 0.00018129663804862393, "loss": 0.0656, "theoretical_loss": 3.375783394125529, "tokens_seen": 2707685376 }, { "epoch": 0.64, "learning_rate": 0.00018125651929711947, "loss": 0.068, "theoretical_loss": 3.375770675109612, "tokens_seen": 2707816448 }, { "epoch": 0.64, "learning_rate": 0.00018121640054561503, "loss": 0.0679, "theoretical_loss": 3.375757956881721, "tokens_seen": 2707947520 }, { "epoch": 0.64, "learning_rate": 0.00018117628179411058, "loss": 0.0713, "theoretical_loss": 3.37574523944177, "tokens_seen": 2708078592 }, { "epoch": 0.64, "learning_rate": 0.00018113616304260612, "loss": 0.0685, "theoretical_loss": 3.3757325227896713, "tokens_seen": 2708209664 }, { "epoch": 0.64, "objective/train/advantage_avg": 0.0017143836012110114, "objective/train/docs_used": 984234, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3384922742843628, "objective/train/original_loss": 1.3384921550750732, "objective/train/theoretical_loss": 3.3757198069253382, "objective/train/tokens_used": 1078865376, "objective/train/value_avg": -0.01042938232421875, "objective/train/value_loss": 0.00035275999107398093, "objective/train/value_max": -3.0219554901123047e-05, "objective/train/value_min": -0.6123046875, "objective/train/value_reward_corr": 0.7218405023463625, "objective/train/value_std": 0.0224761962890625, "objective/train/weight_avg": 1.0018739700317383, "objective/train/weighted_lm_loss": 1.340969443321228, "objective/train/weights_max": 1.4858635663986206, "objective/train/weights_min": 0.3811357617378235, "theoretical_loss": 3.3757198069253382, "tokens_seen": 2708340736 }, { "epoch": 0.64, "learning_rate": 0.00018109604429110166, "loss": 0.0643, "theoretical_loss": 3.3757198069253382, "tokens_seen": 2708340736 }, { "epoch": 0.64, "learning_rate": 0.0001810559255395972, "loss": 0.0682, "theoretical_loss": 3.3757070918486836, "tokens_seen": 2708471808 }, { "epoch": 0.64, "learning_rate": 0.00018101580678809277, "loss": 0.0699, "theoretical_loss": 3.3756943775596207, "tokens_seen": 2708602880 }, { "epoch": 0.64, "learning_rate": 0.0001809756880365883, "loss": 0.0686, "theoretical_loss": 3.3756816640580625, "tokens_seen": 2708733952 }, { "epoch": 0.64, "learning_rate": 0.00018093556928508385, "loss": 0.0718, "theoretical_loss": 3.375668951343922, "tokens_seen": 2708865024 }, { "epoch": 0.64, "learning_rate": 0.0001808954505335794, "loss": 0.0703, "theoretical_loss": 3.375656239417113, "tokens_seen": 2708996096 }, { "epoch": 0.64, "learning_rate": 0.00018085533178207493, "loss": 0.0693, "theoretical_loss": 3.3756435282775477, "tokens_seen": 2709127168 }, { "epoch": 0.64, "learning_rate": 0.0001808152130305705, "loss": 0.0662, "theoretical_loss": 3.37563081792514, "tokens_seen": 2709258240 }, { "epoch": 0.64, "learning_rate": 0.00018077509427906604, "loss": 0.0684, "theoretical_loss": 3.3756181083598027, "tokens_seen": 2709389312 }, { "epoch": 0.64, "learning_rate": 0.00018073497552756158, "loss": 0.0645, "theoretical_loss": 3.375605399581449, "tokens_seen": 2709520384 }, { "epoch": 0.64, "learning_rate": 0.00018069485677605713, "loss": 0.0729, "theoretical_loss": 3.3755926915899925, "tokens_seen": 2709651456 }, { "epoch": 0.64, "learning_rate": 0.00018065473802455267, "loss": 0.0654, "theoretical_loss": 3.375579984385346, "tokens_seen": 2709782528 }, { "epoch": 0.64, "learning_rate": 0.00018061461927304824, "loss": 0.0664, "theoretical_loss": 3.3755672779674226, "tokens_seen": 2709913600 }, { "epoch": 0.64, "learning_rate": 0.00018057450052154378, "loss": 0.0636, "theoretical_loss": 3.375554572336136, "tokens_seen": 2710044672 }, { "epoch": 0.64, "learning_rate": 0.00018053438177003932, "loss": 0.0647, "theoretical_loss": 3.375541867491399, "tokens_seen": 2710175744 }, { "epoch": 0.64, "learning_rate": 0.00018049426301853486, "loss": 0.0688, "theoretical_loss": 3.3755291634331255, "tokens_seen": 2710306816 }, { "epoch": 0.64, "learning_rate": 0.0001804541442670304, "loss": 0.0651, "theoretical_loss": 3.375516460161228, "tokens_seen": 2710437888 }, { "epoch": 0.64, "learning_rate": 0.00018041402551552597, "loss": 0.0665, "theoretical_loss": 3.3755037576756206, "tokens_seen": 2710568960 }, { "epoch": 0.64, "learning_rate": 0.0001803739067640215, "loss": 0.0723, "theoretical_loss": 3.375491055976216, "tokens_seen": 2710700032 }, { "epoch": 0.64, "learning_rate": 0.00018033378801251705, "loss": 0.0671, "theoretical_loss": 3.3754783550629277, "tokens_seen": 2710831104 }, { "epoch": 0.64, "learning_rate": 0.0001802936692610126, "loss": 0.0679, "theoretical_loss": 3.3754656549356694, "tokens_seen": 2710962176 }, { "epoch": 0.64, "learning_rate": 0.00018025355050950813, "loss": 0.0623, "theoretical_loss": 3.375452955594354, "tokens_seen": 2711093248 }, { "epoch": 0.64, "learning_rate": 0.0001802134317580037, "loss": 0.0681, "theoretical_loss": 3.375440257038895, "tokens_seen": 2711224320 }, { "epoch": 0.64, "learning_rate": 0.00018017331300649924, "loss": 0.0675, "theoretical_loss": 3.3754275592692053, "tokens_seen": 2711355392 }, { "epoch": 0.64, "learning_rate": 0.00018013319425499479, "loss": 0.0661, "theoretical_loss": 3.3754148622851994, "tokens_seen": 2711486464 }, { "epoch": 0.64, "objective/train/advantage_avg": 0.000427984690759331, "objective/train/docs_used": 985372, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4555859565734863, "objective/train/original_loss": 1.4555861949920654, "objective/train/theoretical_loss": 3.37540216608679, "objective/train/tokens_used": 1082142176, "objective/train/value_avg": -0.01009368896484375, "objective/train/value_loss": 0.0003736518556252122, "objective/train/value_max": -6.35385513305664e-05, "objective/train/value_min": -0.380615234375, "objective/train/value_reward_corr": 0.6651143441497535, "objective/train/value_std": 0.0164031982421875, "objective/train/weight_avg": 1.000592589378357, "objective/train/weighted_lm_loss": 1.4570707082748413, "objective/train/weights_max": 1.4631845951080322, "objective/train/weights_min": 0.3687315881252289, "theoretical_loss": 3.37540216608679, "tokens_seen": 2711617536 }, { "epoch": 0.64, "learning_rate": 0.00018009307550349033, "loss": 0.0688, "theoretical_loss": 3.37540216608679, "tokens_seen": 2711617536 }, { "epoch": 0.64, "learning_rate": 0.00018005295675198587, "loss": 0.0631, "theoretical_loss": 3.3753894706738903, "tokens_seen": 2711748608 }, { "epoch": 0.64, "learning_rate": 0.00018001283800048144, "loss": 0.065, "theoretical_loss": 3.3753767760464144, "tokens_seen": 2711879680 }, { "epoch": 0.64, "learning_rate": 0.00017997271924897698, "loss": 0.0693, "theoretical_loss": 3.375364082204275, "tokens_seen": 2712010752 }, { "epoch": 0.64, "learning_rate": 0.00017993260049747252, "loss": 0.0673, "theoretical_loss": 3.3753513891473865, "tokens_seen": 2712141824 }, { "epoch": 0.64, "learning_rate": 0.00017989248174596806, "loss": 0.0639, "theoretical_loss": 3.3753386968756613, "tokens_seen": 2712272896 }, { "epoch": 0.64, "learning_rate": 0.0001798523629944636, "loss": 0.0657, "theoretical_loss": 3.375326005389014, "tokens_seen": 2712403968 }, { "epoch": 0.64, "learning_rate": 0.00017981224424295917, "loss": 0.0654, "theoretical_loss": 3.3753133146873573, "tokens_seen": 2712535040 }, { "epoch": 0.64, "learning_rate": 0.0001797721254914547, "loss": 0.07, "theoretical_loss": 3.3753006247706048, "tokens_seen": 2712666112 }, { "epoch": 0.64, "learning_rate": 0.00017973200673995025, "loss": 0.0675, "theoretical_loss": 3.3752879356386702, "tokens_seen": 2712797184 }, { "epoch": 0.64, "learning_rate": 0.0001796918879884458, "loss": 0.0684, "theoretical_loss": 3.3752752472914676, "tokens_seen": 2712928256 }, { "epoch": 0.64, "learning_rate": 0.00017965176923694133, "loss": 0.0634, "theoretical_loss": 3.375262559728909, "tokens_seen": 2713059328 }, { "epoch": 0.64, "learning_rate": 0.0001796116504854369, "loss": 0.0691, "theoretical_loss": 3.37524987295091, "tokens_seen": 2713190400 }, { "epoch": 0.64, "learning_rate": 0.00017957153173393244, "loss": 0.0696, "theoretical_loss": 3.3752371869573823, "tokens_seen": 2713321472 }, { "epoch": 0.64, "learning_rate": 0.00017953141298242799, "loss": 0.0668, "theoretical_loss": 3.375224501748241, "tokens_seen": 2713452544 }, { "epoch": 0.64, "learning_rate": 0.00017949129423092353, "loss": 0.0635, "theoretical_loss": 3.375211817323399, "tokens_seen": 2713583616 }, { "epoch": 0.64, "learning_rate": 0.0001794511754794191, "loss": 0.071, "theoretical_loss": 3.3751991336827696, "tokens_seen": 2713714688 }, { "epoch": 0.64, "learning_rate": 0.00017941105672791464, "loss": 0.0603, "theoretical_loss": 3.3751864508262672, "tokens_seen": 2713845760 }, { "epoch": 0.64, "learning_rate": 0.00017937093797641018, "loss": 0.0674, "theoretical_loss": 3.375173768753805, "tokens_seen": 2713976832 }, { "epoch": 0.64, "learning_rate": 0.00017933081922490572, "loss": 0.0639, "theoretical_loss": 3.3751610874652966, "tokens_seen": 2714107904 }, { "epoch": 0.65, "learning_rate": 0.00017929070047340126, "loss": 0.0705, "theoretical_loss": 3.3751484069606565, "tokens_seen": 2714238976 }, { "epoch": 0.65, "learning_rate": 0.00017925058172189683, "loss": 0.0662, "theoretical_loss": 3.375135727239797, "tokens_seen": 2714370048 }, { "epoch": 0.65, "learning_rate": 0.00017921046297039237, "loss": 0.0666, "theoretical_loss": 3.375123048302633, "tokens_seen": 2714501120 }, { "epoch": 0.65, "learning_rate": 0.0001791703442188879, "loss": 0.0699, "theoretical_loss": 3.375110370149078, "tokens_seen": 2714632192 }, { "epoch": 0.65, "learning_rate": 0.00017913022546738345, "loss": 0.0668, "theoretical_loss": 3.375097692779045, "tokens_seen": 2714763264 }, { "epoch": 0.65, "objective/train/advantage_avg": -0.00015653531590942293, "objective/train/docs_used": 986641, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.236704707145691, "objective/train/original_loss": 1.2367048263549805, "objective/train/theoretical_loss": 3.375085016192448, "objective/train/tokens_used": 1085418976, "objective/train/value_avg": -0.00714874267578125, "objective/train/value_loss": 0.00015720960800535977, "objective/train/value_max": -5.519390106201172e-05, "objective/train/value_min": -0.2135009765625, "objective/train/value_reward_corr": 0.7770704588263253, "objective/train/value_std": 0.0146331787109375, "objective/train/weight_avg": 0.999917209148407, "objective/train/weighted_lm_loss": 1.2361016273498535, "objective/train/weights_max": 1.0987545251846313, "objective/train/weights_min": 0.36823222041130066, "theoretical_loss": 3.375085016192448, "tokens_seen": 2714894336 }, { "epoch": 0.65, "learning_rate": 0.000179090106715879, "loss": 0.0641, "theoretical_loss": 3.375085016192448, "tokens_seen": 2714894336 }, { "epoch": 0.65, "learning_rate": 0.00017904998796437456, "loss": 0.0653, "theoretical_loss": 3.3750723403892016, "tokens_seen": 2715025408 }, { "epoch": 0.65, "learning_rate": 0.0001790098692128701, "loss": 0.0673, "theoretical_loss": 3.375059665369219, "tokens_seen": 2715156480 }, { "epoch": 0.65, "learning_rate": 0.00017896975046136564, "loss": 0.0646, "theoretical_loss": 3.375046991132414, "tokens_seen": 2715287552 }, { "epoch": 0.65, "learning_rate": 0.00017892963170986119, "loss": 0.0654, "theoretical_loss": 3.3750343176787005, "tokens_seen": 2715418624 }, { "epoch": 0.65, "learning_rate": 0.00017888951295835673, "loss": 0.0702, "theoretical_loss": 3.375021645007992, "tokens_seen": 2715549696 }, { "epoch": 0.65, "learning_rate": 0.0001788493942068523, "loss": 0.0648, "theoretical_loss": 3.375008973120203, "tokens_seen": 2715680768 }, { "epoch": 0.65, "learning_rate": 0.00017880927545534784, "loss": 0.0698, "theoretical_loss": 3.3749963020152465, "tokens_seen": 2715811840 }, { "epoch": 0.65, "learning_rate": 0.00017876915670384338, "loss": 0.0643, "theoretical_loss": 3.374983631693037, "tokens_seen": 2715942912 }, { "epoch": 0.65, "learning_rate": 0.00017872903795233892, "loss": 0.068, "theoretical_loss": 3.3749709621534882, "tokens_seen": 2716073984 }, { "epoch": 0.65, "learning_rate": 0.00017868891920083446, "loss": 0.0659, "theoretical_loss": 3.3749582933965137, "tokens_seen": 2716205056 }, { "epoch": 0.65, "learning_rate": 0.00017864880044933003, "loss": 0.0638, "theoretical_loss": 3.374945625422028, "tokens_seen": 2716336128 }, { "epoch": 0.65, "learning_rate": 0.00017860868169782557, "loss": 0.0646, "theoretical_loss": 3.374932958229944, "tokens_seen": 2716467200 }, { "epoch": 0.65, "learning_rate": 0.0001785685629463211, "loss": 0.0679, "theoretical_loss": 3.374920291820177, "tokens_seen": 2716598272 }, { "epoch": 0.65, "learning_rate": 0.00017852844419481668, "loss": 0.064, "theoretical_loss": 3.37490762619264, "tokens_seen": 2716729344 }, { "epoch": 0.65, "learning_rate": 0.0001784883254433122, "loss": 0.064, "theoretical_loss": 3.374894961347247, "tokens_seen": 2716860416 }, { "epoch": 0.65, "learning_rate": 0.00017844820669180776, "loss": 0.07, "theoretical_loss": 3.3748822972839125, "tokens_seen": 2716991488 }, { "epoch": 0.65, "learning_rate": 0.0001784080879403033, "loss": 0.0675, "theoretical_loss": 3.37486963400255, "tokens_seen": 2717122560 }, { "epoch": 0.65, "learning_rate": 0.00017836796918879885, "loss": 0.066, "theoretical_loss": 3.3748569715030734, "tokens_seen": 2717253632 }, { "epoch": 0.65, "learning_rate": 0.00017832785043729441, "loss": 0.0703, "theoretical_loss": 3.3748443097853973, "tokens_seen": 2717384704 }, { "epoch": 0.65, "learning_rate": 0.00017828773168578993, "loss": 0.0661, "theoretical_loss": 3.374831648849435, "tokens_seen": 2717515776 }, { "epoch": 0.65, "learning_rate": 0.0001782476129342855, "loss": 0.0628, "theoretical_loss": 3.374818988695101, "tokens_seen": 2717646848 }, { "epoch": 0.65, "learning_rate": 0.00017820749418278104, "loss": 0.0631, "theoretical_loss": 3.37480632932231, "tokens_seen": 2717777920 }, { "epoch": 0.65, "learning_rate": 0.00017816737543127658, "loss": 0.0647, "theoretical_loss": 3.3747936707309747, "tokens_seen": 2717908992 }, { "epoch": 0.65, "learning_rate": 0.00017812725667977215, "loss": 0.0666, "theoretical_loss": 3.3747810129210096, "tokens_seen": 2718040064 }, { "epoch": 0.65, "objective/train/advantage_avg": 0.0005951900384388864, "objective/train/docs_used": 987833, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2629672288894653, "objective/train/original_loss": 1.2629673480987549, "objective/train/theoretical_loss": 3.3747683558923294, "objective/train/tokens_used": 1088695776, "objective/train/value_avg": -0.01258087158203125, "objective/train/value_loss": 0.0005440949462354183, "objective/train/value_max": -5.02467155456543e-05, "objective/train/value_min": -0.982421875, "objective/train/value_reward_corr": 0.8000666636121812, "objective/train/value_std": 0.033203125, "objective/train/weight_avg": 1.000870704650879, "objective/train/weighted_lm_loss": 1.2636528015136719, "objective/train/weights_max": 2.2518348693847656, "objective/train/weights_min": 0.3746890127658844, "theoretical_loss": 3.3747683558923294, "tokens_seen": 2718171136 }, { "epoch": 0.65, "learning_rate": 0.00017808713792826766, "loss": 0.0673, "theoretical_loss": 3.3747683558923294, "tokens_seen": 2718171136 }, { "epoch": 0.65, "learning_rate": 0.00017804701917676323, "loss": 0.0666, "theoretical_loss": 3.374755699644848, "tokens_seen": 2718302208 }, { "epoch": 0.65, "learning_rate": 0.00017800690042525877, "loss": 0.0649, "theoretical_loss": 3.3747430441784787, "tokens_seen": 2718433280 }, { "epoch": 0.65, "learning_rate": 0.0001779667816737543, "loss": 0.0654, "theoretical_loss": 3.3747303894931364, "tokens_seen": 2718564352 }, { "epoch": 0.65, "learning_rate": 0.00017792666292224988, "loss": 0.0652, "theoretical_loss": 3.3747177355887352, "tokens_seen": 2718695424 }, { "epoch": 0.65, "learning_rate": 0.0001778865441707454, "loss": 0.0639, "theoretical_loss": 3.3747050824651894, "tokens_seen": 2718826496 }, { "epoch": 0.65, "learning_rate": 0.00017784642541924096, "loss": 0.0659, "theoretical_loss": 3.374692430122413, "tokens_seen": 2718957568 }, { "epoch": 0.65, "learning_rate": 0.0001778063066677365, "loss": 0.0633, "theoretical_loss": 3.3746797785603198, "tokens_seen": 2719088640 }, { "epoch": 0.65, "learning_rate": 0.00017776618791623205, "loss": 0.0646, "theoretical_loss": 3.3746671277788245, "tokens_seen": 2719219712 }, { "epoch": 0.65, "learning_rate": 0.00017772606916472761, "loss": 0.0639, "theoretical_loss": 3.374654477777841, "tokens_seen": 2719350784 }, { "epoch": 0.65, "learning_rate": 0.00017768595041322313, "loss": 0.067, "theoretical_loss": 3.3746418285572837, "tokens_seen": 2719481856 }, { "epoch": 0.65, "learning_rate": 0.0001776458316617187, "loss": 0.0641, "theoretical_loss": 3.374629180117067, "tokens_seen": 2719612928 }, { "epoch": 0.65, "learning_rate": 0.00017760571291021424, "loss": 0.069, "theoretical_loss": 3.3746165324571047, "tokens_seen": 2719744000 }, { "epoch": 0.65, "learning_rate": 0.00017756559415870978, "loss": 0.0697, "theoretical_loss": 3.3746038855773115, "tokens_seen": 2719875072 }, { "epoch": 0.65, "learning_rate": 0.00017752547540720535, "loss": 0.0682, "theoretical_loss": 3.374591239477602, "tokens_seen": 2720006144 }, { "epoch": 0.65, "learning_rate": 0.00017748535665570086, "loss": 0.0694, "theoretical_loss": 3.374578594157889, "tokens_seen": 2720137216 }, { "epoch": 0.65, "learning_rate": 0.00017744523790419643, "loss": 0.06, "theoretical_loss": 3.3745659496180886, "tokens_seen": 2720268288 }, { "epoch": 0.65, "learning_rate": 0.00017740511915269197, "loss": 0.0651, "theoretical_loss": 3.374553305858114, "tokens_seen": 2720399360 }, { "epoch": 0.65, "learning_rate": 0.0001773650004011875, "loss": 0.0688, "theoretical_loss": 3.3745406628778802, "tokens_seen": 2720530432 }, { "epoch": 0.65, "learning_rate": 0.00017732488164968308, "loss": 0.0606, "theoretical_loss": 3.374528020677301, "tokens_seen": 2720661504 }, { "epoch": 0.65, "learning_rate": 0.0001772847628981786, "loss": 0.0635, "theoretical_loss": 3.374515379256291, "tokens_seen": 2720792576 }, { "epoch": 0.65, "learning_rate": 0.00017724464414667416, "loss": 0.0684, "theoretical_loss": 3.3745027386147646, "tokens_seen": 2720923648 }, { "epoch": 0.65, "learning_rate": 0.0001772045253951697, "loss": 0.0642, "theoretical_loss": 3.374490098752636, "tokens_seen": 2721054720 }, { "epoch": 0.65, "learning_rate": 0.00017716440664366527, "loss": 0.0638, "theoretical_loss": 3.3744774596698197, "tokens_seen": 2721185792 }, { "epoch": 0.65, "learning_rate": 0.00017712428789216081, "loss": 0.0651, "theoretical_loss": 3.3744648213662307, "tokens_seen": 2721316864 }, { "epoch": 0.65, "objective/train/advantage_avg": 0.0007309599313884974, "objective/train/docs_used": 988960, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3594495058059692, "objective/train/original_loss": 1.3594492673873901, "objective/train/theoretical_loss": 3.3744521838417825, "objective/train/tokens_used": 1091972576, "objective/train/value_avg": -0.00782012939453125, "objective/train/value_loss": 0.00022776176047045738, "objective/train/value_max": -4.4345855712890625e-05, "objective/train/value_min": -0.1796875, "objective/train/value_reward_corr": 0.6558630525898597, "objective/train/value_std": 0.01419830322265625, "objective/train/weight_avg": 1.0008376836776733, "objective/train/weighted_lm_loss": 1.3602958917617798, "objective/train/weights_max": 1.1968432664871216, "objective/train/weights_min": 0.3725340962409973, "theoretical_loss": 3.3744521838417825, "tokens_seen": 2721447936 }, { "epoch": 0.65, "learning_rate": 0.00017708416914065633, "loss": 0.0673, "theoretical_loss": 3.3744521838417825, "tokens_seen": 2721447936 }, { "epoch": 0.65, "learning_rate": 0.0001770440503891519, "loss": 0.0652, "theoretical_loss": 3.37443954709639, "tokens_seen": 2721579008 }, { "epoch": 0.65, "learning_rate": 0.00017700393163764744, "loss": 0.0643, "theoretical_loss": 3.3744269111299676, "tokens_seen": 2721710080 }, { "epoch": 0.65, "learning_rate": 0.000176963812886143, "loss": 0.0653, "theoretical_loss": 3.37441427594243, "tokens_seen": 2721841152 }, { "epoch": 0.65, "learning_rate": 0.00017692369413463855, "loss": 0.0648, "theoretical_loss": 3.3744016415336913, "tokens_seen": 2721972224 }, { "epoch": 0.65, "learning_rate": 0.00017688357538313406, "loss": 0.0647, "theoretical_loss": 3.3743890079036665, "tokens_seen": 2722103296 }, { "epoch": 0.65, "learning_rate": 0.00017684345663162963, "loss": 0.0699, "theoretical_loss": 3.3743763750522695, "tokens_seen": 2722234368 }, { "epoch": 0.65, "learning_rate": 0.00017680333788012517, "loss": 0.0626, "theoretical_loss": 3.374363742979415, "tokens_seen": 2722365440 }, { "epoch": 0.65, "learning_rate": 0.00017676321912862074, "loss": 0.0677, "theoretical_loss": 3.374351111685018, "tokens_seen": 2722496512 }, { "epoch": 0.65, "learning_rate": 0.00017672310037711628, "loss": 0.0614, "theoretical_loss": 3.3743384811689925, "tokens_seen": 2722627584 }, { "epoch": 0.65, "learning_rate": 0.0001766829816256118, "loss": 0.0688, "theoretical_loss": 3.3743258514312533, "tokens_seen": 2722758656 }, { "epoch": 0.65, "learning_rate": 0.00017664286287410736, "loss": 0.0683, "theoretical_loss": 3.3743132224717147, "tokens_seen": 2722889728 }, { "epoch": 0.65, "learning_rate": 0.0001766027441226029, "loss": 0.0648, "theoretical_loss": 3.374300594290292, "tokens_seen": 2723020800 }, { "epoch": 0.65, "learning_rate": 0.00017656262537109847, "loss": 0.0716, "theoretical_loss": 3.374287966886899, "tokens_seen": 2723151872 }, { "epoch": 0.65, "learning_rate": 0.00017652250661959402, "loss": 0.0629, "theoretical_loss": 3.374275340261451, "tokens_seen": 2723282944 }, { "epoch": 0.65, "learning_rate": 0.00017648238786808953, "loss": 0.065, "theoretical_loss": 3.3742627144138617, "tokens_seen": 2723414016 }, { "epoch": 0.65, "learning_rate": 0.0001764422691165851, "loss": 0.0636, "theoretical_loss": 3.374250089344047, "tokens_seen": 2723545088 }, { "epoch": 0.65, "learning_rate": 0.00017640215036508064, "loss": 0.066, "theoretical_loss": 3.3742374650519205, "tokens_seen": 2723676160 }, { "epoch": 0.65, "learning_rate": 0.0001763620316135762, "loss": 0.0656, "theoretical_loss": 3.3742248415373974, "tokens_seen": 2723807232 }, { "epoch": 0.65, "learning_rate": 0.00017632191286207175, "loss": 0.0678, "theoretical_loss": 3.3742122188003925, "tokens_seen": 2723938304 }, { "epoch": 0.65, "learning_rate": 0.00017628179411056726, "loss": 0.066, "theoretical_loss": 3.37419959684082, "tokens_seen": 2724069376 }, { "epoch": 0.65, "learning_rate": 0.00017624167535906283, "loss": 0.0676, "theoretical_loss": 3.3741869756585947, "tokens_seen": 2724200448 }, { "epoch": 0.65, "learning_rate": 0.00017620155660755837, "loss": 0.0662, "theoretical_loss": 3.374174355253632, "tokens_seen": 2724331520 }, { "epoch": 0.65, "learning_rate": 0.00017616143785605394, "loss": 0.0667, "theoretical_loss": 3.374161735625846, "tokens_seen": 2724462592 }, { "epoch": 0.65, "learning_rate": 0.00017612131910454948, "loss": 0.066, "theoretical_loss": 3.3741491167751514, "tokens_seen": 2724593664 }, { "epoch": 0.65, "objective/train/advantage_avg": -0.001426665112376213, "objective/train/docs_used": 990115, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3592063188552856, "objective/train/original_loss": 1.3592063188552856, "objective/train/theoretical_loss": 3.3741364987014633, "objective/train/tokens_used": 1095249376, "objective/train/value_avg": -0.01067352294921875, "objective/train/value_loss": 0.0005507044843398035, "objective/train/value_max": -2.3365020751953125e-05, "objective/train/value_min": -0.480224609375, "objective/train/value_reward_corr": 0.6906787583196335, "objective/train/value_std": 0.0180206298828125, "objective/train/weight_avg": 0.9988157153129578, "objective/train/weighted_lm_loss": 1.3576505184173584, "objective/train/weights_max": 1.5709179639816284, "objective/train/weights_min": 0.38551080226898193, "theoretical_loss": 3.3741364987014633, "tokens_seen": 2724724736 }, { "epoch": 0.65, "learning_rate": 0.000176081200353045, "loss": 0.0621, "theoretical_loss": 3.3741364987014633, "tokens_seen": 2724724736 }, { "epoch": 0.65, "learning_rate": 0.00017604108160154057, "loss": 0.0694, "theoretical_loss": 3.3741238814046963, "tokens_seen": 2724855808 }, { "epoch": 0.65, "learning_rate": 0.0001760009628500361, "loss": 0.0673, "theoretical_loss": 3.3741112648847653, "tokens_seen": 2724986880 }, { "epoch": 0.65, "learning_rate": 0.00017596084409853167, "loss": 0.0656, "theoretical_loss": 3.3740986491415854, "tokens_seen": 2725117952 }, { "epoch": 0.65, "learning_rate": 0.00017592072534702722, "loss": 0.0645, "theoretical_loss": 3.374086034175071, "tokens_seen": 2725249024 }, { "epoch": 0.65, "learning_rate": 0.00017588060659552273, "loss": 0.0677, "theoretical_loss": 3.3740734199851365, "tokens_seen": 2725380096 }, { "epoch": 0.65, "learning_rate": 0.0001758404878440183, "loss": 0.065, "theoretical_loss": 3.3740608065716975, "tokens_seen": 2725511168 }, { "epoch": 0.65, "learning_rate": 0.00017580036909251384, "loss": 0.0665, "theoretical_loss": 3.374048193934669, "tokens_seen": 2725642240 }, { "epoch": 0.65, "learning_rate": 0.0001757602503410094, "loss": 0.0641, "theoretical_loss": 3.374035582073965, "tokens_seen": 2725773312 }, { "epoch": 0.65, "learning_rate": 0.00017572013158950495, "loss": 0.0684, "theoretical_loss": 3.3740229709895013, "tokens_seen": 2725904384 }, { "epoch": 0.65, "learning_rate": 0.00017568001283800046, "loss": 0.0663, "theoretical_loss": 3.374010360681192, "tokens_seen": 2726035456 }, { "epoch": 0.65, "learning_rate": 0.00017563989408649603, "loss": 0.0684, "theoretical_loss": 3.373997751148953, "tokens_seen": 2726166528 }, { "epoch": 0.65, "learning_rate": 0.00017559977533499157, "loss": 0.0643, "theoretical_loss": 3.373985142392698, "tokens_seen": 2726297600 }, { "epoch": 0.65, "learning_rate": 0.00017555965658348714, "loss": 0.0626, "theoretical_loss": 3.373972534412343, "tokens_seen": 2726428672 }, { "epoch": 0.65, "learning_rate": 0.00017551953783198268, "loss": 0.0656, "theoretical_loss": 3.3739599272078022, "tokens_seen": 2726559744 }, { "epoch": 0.65, "learning_rate": 0.0001754794190804782, "loss": 0.0656, "theoretical_loss": 3.373947320778991, "tokens_seen": 2726690816 }, { "epoch": 0.65, "learning_rate": 0.00017543930032897377, "loss": 0.0657, "theoretical_loss": 3.3739347151258245, "tokens_seen": 2726821888 }, { "epoch": 0.65, "learning_rate": 0.0001753991815774693, "loss": 0.0667, "theoretical_loss": 3.373922110248218, "tokens_seen": 2726952960 }, { "epoch": 0.65, "learning_rate": 0.00017535906282596488, "loss": 0.0684, "theoretical_loss": 3.3739095061460853, "tokens_seen": 2727084032 }, { "epoch": 0.65, "learning_rate": 0.00017531894407446042, "loss": 0.0626, "theoretical_loss": 3.373896902819342, "tokens_seen": 2727215104 }, { "epoch": 0.65, "learning_rate": 0.00017527882532295593, "loss": 0.0668, "theoretical_loss": 3.3738843002679038, "tokens_seen": 2727346176 }, { "epoch": 0.65, "learning_rate": 0.0001752387065714515, "loss": 0.0701, "theoretical_loss": 3.373871698491685, "tokens_seen": 2727477248 }, { "epoch": 0.65, "learning_rate": 0.00017519858781994704, "loss": 0.0646, "theoretical_loss": 3.3738590974906004, "tokens_seen": 2727608320 }, { "epoch": 0.65, "learning_rate": 0.0001751584690684426, "loss": 0.066, "theoretical_loss": 3.373846497264566, "tokens_seen": 2727739392 }, { "epoch": 0.65, "learning_rate": 0.00017511835031693815, "loss": 0.0649, "theoretical_loss": 3.3738338978134967, "tokens_seen": 2727870464 }, { "epoch": 0.65, "objective/train/advantage_avg": 0.00012298587535042316, "objective/train/docs_used": 991320, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4016118049621582, "objective/train/original_loss": 1.4016120433807373, "objective/train/theoretical_loss": 3.373821299137307, "objective/train/tokens_used": 1098526176, "objective/train/value_avg": -0.00827789306640625, "objective/train/value_loss": 0.00023342993517871946, "objective/train/value_max": -3.2961368560791016e-05, "objective/train/value_min": -0.98291015625, "objective/train/value_reward_corr": 0.7634174816112813, "objective/train/value_std": 0.01690673828125, "objective/train/weight_avg": 1.0002321004867554, "objective/train/weighted_lm_loss": 1.4016085863113403, "objective/train/weights_max": 1.514060139656067, "objective/train/weights_min": 0.4428814649581909, "theoretical_loss": 3.373821299137307, "tokens_seen": 2728001536 }, { "epoch": 0.65, "learning_rate": 0.00017507823156543366, "loss": 0.07, "theoretical_loss": 3.373821299137307, "tokens_seen": 2728001536 }, { "epoch": 0.65, "learning_rate": 0.00017503811281392923, "loss": 0.0638, "theoretical_loss": 3.3738087012359124, "tokens_seen": 2728132608 }, { "epoch": 0.65, "learning_rate": 0.00017499799406242477, "loss": 0.0663, "theoretical_loss": 3.373796104109228, "tokens_seen": 2728263680 }, { "epoch": 0.65, "learning_rate": 0.00017495787531092034, "loss": 0.0656, "theoretical_loss": 3.3737835077571687, "tokens_seen": 2728394752 }, { "epoch": 0.65, "learning_rate": 0.00017491775655941588, "loss": 0.0661, "theoretical_loss": 3.3737709121796504, "tokens_seen": 2728525824 }, { "epoch": 0.65, "learning_rate": 0.0001748776378079114, "loss": 0.0648, "theoretical_loss": 3.3737583173765873, "tokens_seen": 2728656896 }, { "epoch": 0.65, "learning_rate": 0.00017483751905640697, "loss": 0.0677, "theoretical_loss": 3.3737457233478954, "tokens_seen": 2728787968 }, { "epoch": 0.65, "learning_rate": 0.0001747974003049025, "loss": 0.0679, "theoretical_loss": 3.3737331300934894, "tokens_seen": 2728919040 }, { "epoch": 0.65, "learning_rate": 0.00017475728155339808, "loss": 0.0627, "theoretical_loss": 3.3737205376132846, "tokens_seen": 2729050112 }, { "epoch": 0.65, "learning_rate": 0.00017471716280189362, "loss": 0.0652, "theoretical_loss": 3.3737079459071966, "tokens_seen": 2729181184 }, { "epoch": 0.65, "learning_rate": 0.00017467704405038913, "loss": 0.0665, "theoretical_loss": 3.3736953549751396, "tokens_seen": 2729312256 }, { "epoch": 0.65, "learning_rate": 0.0001746369252988847, "loss": 0.0668, "theoretical_loss": 3.37368276481703, "tokens_seen": 2729443328 }, { "epoch": 0.65, "learning_rate": 0.00017459680654738024, "loss": 0.0668, "theoretical_loss": 3.3736701754327827, "tokens_seen": 2729574400 }, { "epoch": 0.65, "learning_rate": 0.0001745566877958758, "loss": 0.0662, "theoretical_loss": 3.373657586822313, "tokens_seen": 2729705472 }, { "epoch": 0.65, "learning_rate": 0.00017451656904437135, "loss": 0.0649, "theoretical_loss": 3.373644998985536, "tokens_seen": 2729836544 }, { "epoch": 0.65, "learning_rate": 0.0001744764502928669, "loss": 0.0656, "theoretical_loss": 3.373632411922367, "tokens_seen": 2729967616 }, { "epoch": 0.65, "learning_rate": 0.00017443633154136243, "loss": 0.0651, "theoretical_loss": 3.3736198256327214, "tokens_seen": 2730098688 }, { "epoch": 0.65, "learning_rate": 0.00017439621278985797, "loss": 0.0653, "theoretical_loss": 3.373607240116515, "tokens_seen": 2730229760 }, { "epoch": 0.65, "learning_rate": 0.00017435609403835354, "loss": 0.0686, "theoretical_loss": 3.373594655373662, "tokens_seen": 2730360832 }, { "epoch": 0.65, "learning_rate": 0.00017431597528684908, "loss": 0.0636, "theoretical_loss": 3.373582071404079, "tokens_seen": 2730491904 }, { "epoch": 0.65, "learning_rate": 0.00017427585653534463, "loss": 0.0651, "theoretical_loss": 3.3735694882076803, "tokens_seen": 2730622976 }, { "epoch": 0.66, "learning_rate": 0.00017423573778384017, "loss": 0.0679, "theoretical_loss": 3.3735569057843824, "tokens_seen": 2730754048 }, { "epoch": 0.66, "learning_rate": 0.0001741956190323357, "loss": 0.0654, "theoretical_loss": 3.3735443241341, "tokens_seen": 2730885120 }, { "epoch": 0.66, "learning_rate": 0.00017415550028083128, "loss": 0.0651, "theoretical_loss": 3.373531743256748, "tokens_seen": 2731016192 }, { "epoch": 0.66, "learning_rate": 0.00017411538152932682, "loss": 0.0657, "theoretical_loss": 3.3735191631522428, "tokens_seen": 2731147264 }, { "epoch": 0.66, "objective/train/advantage_avg": 0.0006422142614610493, "objective/train/docs_used": 991969, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2505630254745483, "objective/train/original_loss": 1.2505629062652588, "objective/train/theoretical_loss": 3.3735065838205, "objective/train/tokens_used": 1101802976, "objective/train/value_avg": -0.005664825439453125, "objective/train/value_loss": 0.00023955092183314264, "objective/train/value_max": -1.3530254364013672e-05, "objective/train/value_min": -0.350830078125, "objective/train/value_reward_corr": 0.5457567488082887, "objective/train/value_std": 0.00994873046875, "objective/train/weight_avg": 1.0007487535476685, "objective/train/weighted_lm_loss": 1.2514362335205078, "objective/train/weights_max": 1.2788634300231934, "objective/train/weights_min": 0.3683825433254242, "theoretical_loss": 3.3735065838205, "tokens_seen": 2731278336 }, { "epoch": 0.66, "learning_rate": 0.00017407526277782236, "loss": 0.0631, "theoretical_loss": 3.3735065838205, "tokens_seen": 2731278336 }, { "epoch": 0.66, "learning_rate": 0.0001740351440263179, "loss": 0.0628, "theoretical_loss": 3.373494005261434, "tokens_seen": 2731409408 }, { "epoch": 0.66, "learning_rate": 0.00017399502527481344, "loss": 0.0654, "theoretical_loss": 3.3734814274749607, "tokens_seen": 2731540480 }, { "epoch": 0.66, "learning_rate": 0.000173954906523309, "loss": 0.0659, "theoretical_loss": 3.3734688504609958, "tokens_seen": 2731671552 }, { "epoch": 0.66, "learning_rate": 0.00017391478777180455, "loss": 0.0686, "theoretical_loss": 3.3734562742194547, "tokens_seen": 2731802624 }, { "epoch": 0.66, "learning_rate": 0.0001738746690203001, "loss": 0.064, "theoretical_loss": 3.3734436987502527, "tokens_seen": 2731933696 }, { "epoch": 0.66, "learning_rate": 0.00017383455026879563, "loss": 0.0682, "theoretical_loss": 3.373431124053306, "tokens_seen": 2732064768 }, { "epoch": 0.66, "learning_rate": 0.00017379443151729118, "loss": 0.065, "theoretical_loss": 3.373418550128529, "tokens_seen": 2732195840 }, { "epoch": 0.66, "learning_rate": 0.00017375431276578674, "loss": 0.0646, "theoretical_loss": 3.373405976975838, "tokens_seen": 2732326912 }, { "epoch": 0.66, "learning_rate": 0.00017371419401428228, "loss": 0.0636, "theoretical_loss": 3.3733934045951486, "tokens_seen": 2732457984 }, { "epoch": 0.66, "learning_rate": 0.00017367407526277783, "loss": 0.0659, "theoretical_loss": 3.373380832986376, "tokens_seen": 2732589056 }, { "epoch": 0.66, "learning_rate": 0.00017363395651127337, "loss": 0.0659, "theoretical_loss": 3.373368262149436, "tokens_seen": 2732720128 }, { "epoch": 0.66, "learning_rate": 0.0001735938377597689, "loss": 0.0682, "theoretical_loss": 3.3733556920842442, "tokens_seen": 2732851200 }, { "epoch": 0.66, "learning_rate": 0.00017355371900826448, "loss": 0.0654, "theoretical_loss": 3.3733431227907165, "tokens_seen": 2732982272 }, { "epoch": 0.66, "learning_rate": 0.00017351360025676002, "loss": 0.0645, "theoretical_loss": 3.373330554268768, "tokens_seen": 2733113344 }, { "epoch": 0.66, "learning_rate": 0.00017347348150525556, "loss": 0.0638, "theoretical_loss": 3.3733179865183143, "tokens_seen": 2733244416 }, { "epoch": 0.66, "learning_rate": 0.0001734333627537511, "loss": 0.0648, "theoretical_loss": 3.373305419539271, "tokens_seen": 2733375488 }, { "epoch": 0.66, "learning_rate": 0.00017339324400224664, "loss": 0.0617, "theoretical_loss": 3.3732928533315545, "tokens_seen": 2733506560 }, { "epoch": 0.66, "learning_rate": 0.0001733531252507422, "loss": 0.0656, "theoretical_loss": 3.37328028789508, "tokens_seen": 2733637632 }, { "epoch": 0.66, "learning_rate": 0.00017331300649923775, "loss": 0.0671, "theoretical_loss": 3.373267723229763, "tokens_seen": 2733768704 }, { "epoch": 0.66, "learning_rate": 0.0001732728877477333, "loss": 0.0676, "theoretical_loss": 3.37325515933552, "tokens_seen": 2733899776 }, { "epoch": 0.66, "learning_rate": 0.00017323276899622883, "loss": 0.071, "theoretical_loss": 3.3732425962122656, "tokens_seen": 2734030848 }, { "epoch": 0.66, "learning_rate": 0.00017319265024472438, "loss": 0.0648, "theoretical_loss": 3.3732300338599157, "tokens_seen": 2734161920 }, { "epoch": 0.66, "learning_rate": 0.00017315253149321994, "loss": 0.0647, "theoretical_loss": 3.373217472278387, "tokens_seen": 2734292992 }, { "epoch": 0.66, "learning_rate": 0.00017311241274171549, "loss": 0.0659, "theoretical_loss": 3.3732049114675946, "tokens_seen": 2734424064 }, { "epoch": 0.66, "objective/train/advantage_avg": 0.0007974667241796851, "objective/train/docs_used": 993449, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2993959188461304, "objective/train/original_loss": 1.2993959188461304, "objective/train/theoretical_loss": 3.373192351427454, "objective/train/tokens_used": 1105079776, "objective/train/value_avg": -0.00699615478515625, "objective/train/value_loss": 0.0001348337682429701, "objective/train/value_max": -3.594160079956055e-05, "objective/train/value_min": -0.2301025390625, "objective/train/value_reward_corr": 0.758781375167014, "objective/train/value_std": 0.0137939453125, "objective/train/weight_avg": 1.000860571861267, "objective/train/weighted_lm_loss": 1.3002279996871948, "objective/train/weights_max": 1.1801592111587524, "objective/train/weights_min": 0.3873859643936157, "theoretical_loss": 3.373192351427454, "tokens_seen": 2734555136 }, { "epoch": 0.66, "learning_rate": 0.00017307229399021103, "loss": 0.0669, "theoretical_loss": 3.373192351427454, "tokens_seen": 2734555136 }, { "epoch": 0.66, "learning_rate": 0.00017303217523870657, "loss": 0.0624, "theoretical_loss": 3.373179792157882, "tokens_seen": 2734686208 }, { "epoch": 0.66, "learning_rate": 0.0001729920564872021, "loss": 0.0626, "theoretical_loss": 3.373167233658793, "tokens_seen": 2734817280 }, { "epoch": 0.66, "learning_rate": 0.00017295193773569768, "loss": 0.0634, "theoretical_loss": 3.3731546759301034, "tokens_seen": 2734948352 }, { "epoch": 0.66, "learning_rate": 0.00017291181898419322, "loss": 0.0676, "theoretical_loss": 3.3731421189717294, "tokens_seen": 2735079424 }, { "epoch": 0.66, "learning_rate": 0.00017287170023268876, "loss": 0.0645, "theoretical_loss": 3.373129562783587, "tokens_seen": 2735210496 }, { "epoch": 0.66, "learning_rate": 0.0001728315814811843, "loss": 0.0615, "theoretical_loss": 3.373117007365591, "tokens_seen": 2735341568 }, { "epoch": 0.66, "learning_rate": 0.00017279146272967984, "loss": 0.0681, "theoretical_loss": 3.373104452717658, "tokens_seen": 2735472640 }, { "epoch": 0.66, "learning_rate": 0.0001727513439781754, "loss": 0.0641, "theoretical_loss": 3.373091898839704, "tokens_seen": 2735603712 }, { "epoch": 0.66, "learning_rate": 0.00017271122522667095, "loss": 0.0647, "theoretical_loss": 3.3730793457316444, "tokens_seen": 2735734784 }, { "epoch": 0.66, "learning_rate": 0.0001726711064751665, "loss": 0.0666, "theoretical_loss": 3.3730667933933955, "tokens_seen": 2735865856 }, { "epoch": 0.66, "learning_rate": 0.00017263098772366203, "loss": 0.0656, "theoretical_loss": 3.3730542418248732, "tokens_seen": 2735996928 }, { "epoch": 0.66, "learning_rate": 0.00017259086897215758, "loss": 0.0621, "theoretical_loss": 3.373041691025993, "tokens_seen": 2736128000 }, { "epoch": 0.66, "learning_rate": 0.00017255075022065314, "loss": 0.0616, "theoretical_loss": 3.373029140996671, "tokens_seen": 2736259072 }, { "epoch": 0.66, "learning_rate": 0.00017251063146914869, "loss": 0.0685, "theoretical_loss": 3.3730165917368238, "tokens_seen": 2736390144 }, { "epoch": 0.66, "learning_rate": 0.00017247051271764423, "loss": 0.0664, "theoretical_loss": 3.3730040432463664, "tokens_seen": 2736521216 }, { "epoch": 0.66, "learning_rate": 0.00017243039396613977, "loss": 0.0652, "theoretical_loss": 3.3729914955252154, "tokens_seen": 2736652288 }, { "epoch": 0.66, "learning_rate": 0.0001723902752146353, "loss": 0.0683, "theoretical_loss": 3.372978948573287, "tokens_seen": 2736783360 }, { "epoch": 0.66, "learning_rate": 0.00017235015646313088, "loss": 0.0582, "theoretical_loss": 3.372966402390496, "tokens_seen": 2736914432 }, { "epoch": 0.66, "learning_rate": 0.00017231003771162642, "loss": 0.0625, "theoretical_loss": 3.3729538569767596, "tokens_seen": 2737045504 }, { "epoch": 0.66, "learning_rate": 0.00017226991896012196, "loss": 0.0645, "theoretical_loss": 3.372941312331994, "tokens_seen": 2737176576 }, { "epoch": 0.66, "learning_rate": 0.0001722298002086175, "loss": 0.0673, "theoretical_loss": 3.3729287684561142, "tokens_seen": 2737307648 }, { "epoch": 0.66, "learning_rate": 0.00017218968145711304, "loss": 0.066, "theoretical_loss": 3.372916225349037, "tokens_seen": 2737438720 }, { "epoch": 0.66, "learning_rate": 0.0001721495627056086, "loss": 0.065, "theoretical_loss": 3.3729036830106778, "tokens_seen": 2737569792 }, { "epoch": 0.66, "learning_rate": 0.00017210944395410415, "loss": 0.0634, "theoretical_loss": 3.3728911414409537, "tokens_seen": 2737700864 }, { "epoch": 0.66, "objective/train/advantage_avg": -0.0006625280948355794, "objective/train/docs_used": 994721, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.316536545753479, "objective/train/original_loss": 1.3165364265441895, "objective/train/theoretical_loss": 3.3728786006397797, "objective/train/tokens_used": 1108356576, "objective/train/value_avg": -0.007205963134765625, "objective/train/value_loss": 0.0002027202135650441, "objective/train/value_max": -3.647804260253906e-05, "objective/train/value_min": -0.2276611328125, "objective/train/value_reward_corr": 0.5997968877352807, "objective/train/value_std": 0.0114593505859375, "objective/train/weight_avg": 0.9994338154792786, "objective/train/weighted_lm_loss": 1.314007043838501, "objective/train/weights_max": 1.1423146724700928, "objective/train/weights_min": 0.44093945622444153, "theoretical_loss": 3.3728786006397797, "tokens_seen": 2737831936 }, { "epoch": 0.66, "learning_rate": 0.0001720693252025997, "loss": 0.067, "theoretical_loss": 3.3728786006397797, "tokens_seen": 2737831936 }, { "epoch": 0.66, "learning_rate": 0.00017202920645109524, "loss": 0.0618, "theoretical_loss": 3.3728660606070733, "tokens_seen": 2737963008 }, { "epoch": 0.66, "learning_rate": 0.0001719890876995908, "loss": 0.0635, "theoretical_loss": 3.372853521342749, "tokens_seen": 2738094080 }, { "epoch": 0.66, "learning_rate": 0.00017194896894808635, "loss": 0.0649, "theoretical_loss": 3.372840982846724, "tokens_seen": 2738225152 }, { "epoch": 0.66, "learning_rate": 0.00017190885019658189, "loss": 0.0627, "theoretical_loss": 3.3728284451189143, "tokens_seen": 2738356224 }, { "epoch": 0.66, "learning_rate": 0.00017186873144507743, "loss": 0.0629, "theoretical_loss": 3.372815908159236, "tokens_seen": 2738487296 }, { "epoch": 0.66, "learning_rate": 0.00017182861269357297, "loss": 0.0654, "theoretical_loss": 3.3728033719676045, "tokens_seen": 2738618368 }, { "epoch": 0.66, "learning_rate": 0.00017178849394206854, "loss": 0.0631, "theoretical_loss": 3.3727908365439374, "tokens_seen": 2738749440 }, { "epoch": 0.66, "learning_rate": 0.00017174837519056408, "loss": 0.0674, "theoretical_loss": 3.3727783018881503, "tokens_seen": 2738880512 }, { "epoch": 0.66, "learning_rate": 0.00017170825643905962, "loss": 0.0659, "theoretical_loss": 3.372765768000159, "tokens_seen": 2739011584 }, { "epoch": 0.66, "learning_rate": 0.00017166813768755516, "loss": 0.0655, "theoretical_loss": 3.3727532348798803, "tokens_seen": 2739142656 }, { "epoch": 0.66, "learning_rate": 0.0001716280189360507, "loss": 0.0674, "theoretical_loss": 3.37274070252723, "tokens_seen": 2739273728 }, { "epoch": 0.66, "learning_rate": 0.00017158790018454627, "loss": 0.0687, "theoretical_loss": 3.372728170942125, "tokens_seen": 2739404800 }, { "epoch": 0.66, "learning_rate": 0.0001715477814330418, "loss": 0.0633, "theoretical_loss": 3.3727156401244804, "tokens_seen": 2739535872 }, { "epoch": 0.66, "learning_rate": 0.00017150766268153735, "loss": 0.065, "theoretical_loss": 3.3727031100742133, "tokens_seen": 2739666944 }, { "epoch": 0.66, "learning_rate": 0.0001714675439300329, "loss": 0.0632, "theoretical_loss": 3.3726905807912404, "tokens_seen": 2739798016 }, { "epoch": 0.66, "learning_rate": 0.00017142742517852844, "loss": 0.0669, "theoretical_loss": 3.372678052275478, "tokens_seen": 2739929088 }, { "epoch": 0.66, "learning_rate": 0.000171387306427024, "loss": 0.0654, "theoretical_loss": 3.3726655245268407, "tokens_seen": 2740060160 }, { "epoch": 0.66, "learning_rate": 0.00017134718767551955, "loss": 0.0659, "theoretical_loss": 3.372652997545247, "tokens_seen": 2740191232 }, { "epoch": 0.66, "learning_rate": 0.0001713070689240151, "loss": 0.0643, "theoretical_loss": 3.3726404713306115, "tokens_seen": 2740322304 }, { "epoch": 0.66, "learning_rate": 0.00017126695017251063, "loss": 0.0599, "theoretical_loss": 3.3726279458828525, "tokens_seen": 2740453376 }, { "epoch": 0.66, "learning_rate": 0.00017122683142100617, "loss": 0.0662, "theoretical_loss": 3.3726154212018846, "tokens_seen": 2740584448 }, { "epoch": 0.66, "learning_rate": 0.00017118671266950174, "loss": 0.0625, "theoretical_loss": 3.3726028972876247, "tokens_seen": 2740715520 }, { "epoch": 0.66, "learning_rate": 0.00017114659391799728, "loss": 0.0614, "theoretical_loss": 3.37259037413999, "tokens_seen": 2740846592 }, { "epoch": 0.66, "learning_rate": 0.00017110647516649282, "loss": 0.0612, "theoretical_loss": 3.3725778517588956, "tokens_seen": 2740977664 }, { "epoch": 0.66, "objective/train/advantage_avg": 0.0004722050216514617, "objective/train/docs_used": 996102, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.367855191230774, "objective/train/original_loss": 1.367855191230774, "objective/train/theoretical_loss": 3.372565330144259, "objective/train/tokens_used": 1111633376, "objective/train/value_avg": -0.00901031494140625, "objective/train/value_loss": 0.0005755489692091942, "objective/train/value_max": -3.3974647521972656e-05, "objective/train/value_min": -0.82373046875, "objective/train/value_reward_corr": 0.6721836991384951, "objective/train/value_std": 0.0253448486328125, "objective/train/weight_avg": 1.0007519721984863, "objective/train/weighted_lm_loss": 1.3675812482833862, "objective/train/weights_max": 2.0637905597686768, "objective/train/weights_min": 0.38697242736816406, "theoretical_loss": 3.372565330144259, "tokens_seen": 2741108736 }, { "epoch": 0.66, "learning_rate": 0.00017106635641498836, "loss": 0.0644, "theoretical_loss": 3.372565330144259, "tokens_seen": 2741108736 }, { "epoch": 0.66, "learning_rate": 0.0001710262376634839, "loss": 0.0646, "theoretical_loss": 3.372552809295996, "tokens_seen": 2741239808 }, { "epoch": 0.66, "learning_rate": 0.00017098611891197947, "loss": 0.0682, "theoretical_loss": 3.3725402892140233, "tokens_seen": 2741370880 }, { "epoch": 0.66, "learning_rate": 0.000170946000160475, "loss": 0.0672, "theoretical_loss": 3.372527769898258, "tokens_seen": 2741501952 }, { "epoch": 0.66, "learning_rate": 0.00017090588140897055, "loss": 0.0618, "theoretical_loss": 3.372515251348615, "tokens_seen": 2741633024 }, { "epoch": 0.66, "learning_rate": 0.0001708657626574661, "loss": 0.0654, "theoretical_loss": 3.372502733565012, "tokens_seen": 2741764096 }, { "epoch": 0.66, "learning_rate": 0.00017082564390596164, "loss": 0.0656, "theoretical_loss": 3.372490216547366, "tokens_seen": 2741895168 }, { "epoch": 0.66, "learning_rate": 0.0001707855251544572, "loss": 0.0659, "theoretical_loss": 3.372477700295592, "tokens_seen": 2742026240 }, { "epoch": 0.66, "learning_rate": 0.00017074540640295275, "loss": 0.0626, "theoretical_loss": 3.3724651848096077, "tokens_seen": 2742157312 }, { "epoch": 0.66, "learning_rate": 0.0001707052876514483, "loss": 0.0635, "theoretical_loss": 3.372452670089329, "tokens_seen": 2742288384 }, { "epoch": 0.66, "learning_rate": 0.00017066516889994383, "loss": 0.065, "theoretical_loss": 3.372440156134673, "tokens_seen": 2742419456 }, { "epoch": 0.66, "learning_rate": 0.00017062505014843937, "loss": 0.0618, "theoretical_loss": 3.3724276429455555, "tokens_seen": 2742550528 }, { "epoch": 0.66, "learning_rate": 0.00017058493139693494, "loss": 0.0659, "theoretical_loss": 3.372415130521894, "tokens_seen": 2742681600 }, { "epoch": 0.66, "learning_rate": 0.00017054481264543048, "loss": 0.0634, "theoretical_loss": 3.3724026188636045, "tokens_seen": 2742812672 }, { "epoch": 0.66, "learning_rate": 0.00017050469389392602, "loss": 0.0661, "theoretical_loss": 3.372390107970604, "tokens_seen": 2742943744 }, { "epoch": 0.66, "learning_rate": 0.0001704645751424216, "loss": 0.0632, "theoretical_loss": 3.3723775978428083, "tokens_seen": 2743074816 }, { "epoch": 0.66, "learning_rate": 0.0001704244563909171, "loss": 0.0629, "theoretical_loss": 3.372365088480135, "tokens_seen": 2743205888 }, { "epoch": 0.66, "learning_rate": 0.00017038433763941267, "loss": 0.0643, "theoretical_loss": 3.3723525798825005, "tokens_seen": 2743336960 }, { "epoch": 0.66, "learning_rate": 0.0001703442188879082, "loss": 0.0625, "theoretical_loss": 3.372340072049821, "tokens_seen": 2743468032 }, { "epoch": 0.66, "learning_rate": 0.00017030410013640375, "loss": 0.0621, "theoretical_loss": 3.372327564982014, "tokens_seen": 2743599104 }, { "epoch": 0.66, "learning_rate": 0.00017026398138489932, "loss": 0.0654, "theoretical_loss": 3.3723150586789954, "tokens_seen": 2743730176 }, { "epoch": 0.66, "learning_rate": 0.00017022386263339484, "loss": 0.0662, "theoretical_loss": 3.372302553140682, "tokens_seen": 2743861248 }, { "epoch": 0.66, "learning_rate": 0.0001701837438818904, "loss": 0.0601, "theoretical_loss": 3.372290048366991, "tokens_seen": 2743992320 }, { "epoch": 0.66, "learning_rate": 0.00017014362513038595, "loss": 0.0663, "theoretical_loss": 3.3722775443578388, "tokens_seen": 2744123392 }, { "epoch": 0.66, "learning_rate": 0.0001701035063788815, "loss": 0.0655, "theoretical_loss": 3.3722650411131423, "tokens_seen": 2744254464 }, { "epoch": 0.66, "objective/train/advantage_avg": -0.00010380984895164147, "objective/train/docs_used": 997363, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.201129674911499, "objective/train/original_loss": 1.201129674911499, "objective/train/theoretical_loss": 3.372252538632818, "objective/train/tokens_used": 1114910176, "objective/train/value_avg": -0.006717681884765625, "objective/train/value_loss": 0.00016768566274549812, "objective/train/value_max": -4.792213439941406e-05, "objective/train/value_min": -0.203125, "objective/train/value_reward_corr": 0.7716698799095428, "objective/train/value_std": 0.0126495361328125, "objective/train/weight_avg": 0.9999772310256958, "objective/train/weighted_lm_loss": 1.2009657621383667, "objective/train/weights_max": 1.1522581577301025, "objective/train/weights_min": 0.6318094730377197, "theoretical_loss": 3.372252538632818, "tokens_seen": 2744385536 }, { "epoch": 0.66, "learning_rate": 0.00017006338762737706, "loss": 0.0643, "theoretical_loss": 3.372252538632818, "tokens_seen": 2744385536 }, { "epoch": 0.66, "learning_rate": 0.00017002326887587257, "loss": 0.0674, "theoretical_loss": 3.372240036916783, "tokens_seen": 2744516608 }, { "epoch": 0.66, "learning_rate": 0.00016998315012436814, "loss": 0.0609, "theoretical_loss": 3.372227535964954, "tokens_seen": 2744647680 }, { "epoch": 0.66, "learning_rate": 0.00016994303137286368, "loss": 0.0631, "theoretical_loss": 3.3722150357772476, "tokens_seen": 2744778752 }, { "epoch": 0.66, "learning_rate": 0.00016990291262135922, "loss": 0.0644, "theoretical_loss": 3.372202536353581, "tokens_seen": 2744909824 }, { "epoch": 0.66, "learning_rate": 0.0001698627938698548, "loss": 0.0709, "theoretical_loss": 3.37219003769387, "tokens_seen": 2745040896 }, { "epoch": 0.66, "learning_rate": 0.0001698226751183503, "loss": 0.0645, "theoretical_loss": 3.3721775397980323, "tokens_seen": 2745171968 }, { "epoch": 0.66, "learning_rate": 0.00016978255636684587, "loss": 0.0682, "theoretical_loss": 3.372165042665985, "tokens_seen": 2745303040 }, { "epoch": 0.66, "learning_rate": 0.00016974243761534141, "loss": 0.0633, "theoretical_loss": 3.3721525462976443, "tokens_seen": 2745434112 }, { "epoch": 0.66, "learning_rate": 0.00016970231886383696, "loss": 0.0679, "theoretical_loss": 3.3721400506929275, "tokens_seen": 2745565184 }, { "epoch": 0.66, "learning_rate": 0.00016966220011233252, "loss": 0.0624, "theoretical_loss": 3.372127555851751, "tokens_seen": 2745696256 }, { "epoch": 0.66, "learning_rate": 0.00016962208136082804, "loss": 0.0627, "theoretical_loss": 3.3721150617740325, "tokens_seen": 2745827328 }, { "epoch": 0.66, "learning_rate": 0.0001695819626093236, "loss": 0.0667, "theoretical_loss": 3.372102568459688, "tokens_seen": 2745958400 }, { "epoch": 0.66, "learning_rate": 0.00016954184385781915, "loss": 0.0689, "theoretical_loss": 3.372090075908635, "tokens_seen": 2746089472 }, { "epoch": 0.66, "learning_rate": 0.00016950172510631472, "loss": 0.0649, "theoretical_loss": 3.372077584120791, "tokens_seen": 2746220544 }, { "epoch": 0.66, "learning_rate": 0.00016946160635481026, "loss": 0.0603, "theoretical_loss": 3.3720650930960714, "tokens_seen": 2746351616 }, { "epoch": 0.66, "learning_rate": 0.00016942148760330577, "loss": 0.0632, "theoretical_loss": 3.372052602834394, "tokens_seen": 2746482688 }, { "epoch": 0.66, "learning_rate": 0.00016938136885180134, "loss": 0.0676, "theoretical_loss": 3.372040113335676, "tokens_seen": 2746613760 }, { "epoch": 0.66, "learning_rate": 0.00016934125010029688, "loss": 0.0653, "theoretical_loss": 3.372027624599834, "tokens_seen": 2746744832 }, { "epoch": 0.66, "learning_rate": 0.00016930113134879245, "loss": 0.0653, "theoretical_loss": 3.3720151366267856, "tokens_seen": 2746875904 }, { "epoch": 0.66, "learning_rate": 0.000169261012597288, "loss": 0.0612, "theoretical_loss": 3.372002649416447, "tokens_seen": 2747006976 }, { "epoch": 0.66, "learning_rate": 0.0001692208938457835, "loss": 0.0667, "theoretical_loss": 3.3719901629687357, "tokens_seen": 2747138048 }, { "epoch": 0.67, "learning_rate": 0.00016918077509427907, "loss": 0.065, "theoretical_loss": 3.3719776772835686, "tokens_seen": 2747269120 }, { "epoch": 0.67, "learning_rate": 0.00016914065634277461, "loss": 0.0625, "theoretical_loss": 3.3719651923608627, "tokens_seen": 2747400192 }, { "epoch": 0.67, "learning_rate": 0.00016910053759127018, "loss": 0.0628, "theoretical_loss": 3.3719527082005354, "tokens_seen": 2747531264 }, { "epoch": 0.67, "objective/train/advantage_avg": 0.00045022222911939025, "objective/train/docs_used": 998534, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3035837411880493, "objective/train/original_loss": 1.3035836219787598, "objective/train/theoretical_loss": 3.3719402248025037, "objective/train/tokens_used": 1118186976, "objective/train/value_avg": -0.00669097900390625, "objective/train/value_loss": 0.00013474792649503797, "objective/train/value_max": -1.5795230865478516e-05, "objective/train/value_min": -0.240234375, "objective/train/value_reward_corr": 0.691443259640616, "objective/train/value_std": 0.01141357421875, "objective/train/weight_avg": 1.000512719154358, "objective/train/weighted_lm_loss": 1.3032158613204956, "objective/train/weights_max": 1.1363937854766846, "objective/train/weights_min": 0.39622175693511963, "theoretical_loss": 3.3719402248025037, "tokens_seen": 2747662336 }, { "epoch": 0.67, "learning_rate": 0.00016906041883976572, "loss": 0.0657, "theoretical_loss": 3.3719402248025037, "tokens_seen": 2747662336 }, { "epoch": 0.67, "learning_rate": 0.00016902030008826124, "loss": 0.0593, "theoretical_loss": 3.371927742166684, "tokens_seen": 2747793408 }, { "epoch": 0.67, "learning_rate": 0.0001689801813367568, "loss": 0.0624, "theoretical_loss": 3.3719152602929947, "tokens_seen": 2747924480 }, { "epoch": 0.67, "learning_rate": 0.00016894006258525235, "loss": 0.0647, "theoretical_loss": 3.371902779181352, "tokens_seen": 2748055552 }, { "epoch": 0.67, "learning_rate": 0.00016889994383374792, "loss": 0.0663, "theoretical_loss": 3.371890298831673, "tokens_seen": 2748186624 }, { "epoch": 0.67, "learning_rate": 0.00016885982508224346, "loss": 0.0657, "theoretical_loss": 3.3718778192438745, "tokens_seen": 2748317696 }, { "epoch": 0.67, "learning_rate": 0.00016881970633073897, "loss": 0.0647, "theoretical_loss": 3.371865340417875, "tokens_seen": 2748448768 }, { "epoch": 0.67, "learning_rate": 0.00016877958757923454, "loss": 0.0677, "theoretical_loss": 3.371852862353591, "tokens_seen": 2748579840 }, { "epoch": 0.67, "learning_rate": 0.00016873946882773008, "loss": 0.0624, "theoretical_loss": 3.3718403850509384, "tokens_seen": 2748710912 }, { "epoch": 0.67, "learning_rate": 0.00016869935007622565, "loss": 0.0598, "theoretical_loss": 3.371827908509837, "tokens_seen": 2748841984 }, { "epoch": 0.67, "learning_rate": 0.0001686592313247212, "loss": 0.0657, "theoretical_loss": 3.3718154327302017, "tokens_seen": 2748973056 }, { "epoch": 0.67, "learning_rate": 0.0001686191125732167, "loss": 0.0631, "theoretical_loss": 3.371802957711951, "tokens_seen": 2749104128 }, { "epoch": 0.67, "learning_rate": 0.00016857899382171227, "loss": 0.0689, "theoretical_loss": 3.371790483455001, "tokens_seen": 2749235200 }, { "epoch": 0.67, "learning_rate": 0.00016853887507020781, "loss": 0.0666, "theoretical_loss": 3.3717780099592707, "tokens_seen": 2749366272 }, { "epoch": 0.67, "learning_rate": 0.00016849875631870338, "loss": 0.0629, "theoretical_loss": 3.3717655372246758, "tokens_seen": 2749497344 }, { "epoch": 0.67, "learning_rate": 0.00016845863756719892, "loss": 0.0671, "theoretical_loss": 3.371753065251134, "tokens_seen": 2749628416 }, { "epoch": 0.67, "learning_rate": 0.00016841851881569444, "loss": 0.068, "theoretical_loss": 3.3717405940385627, "tokens_seen": 2749759488 }, { "epoch": 0.67, "learning_rate": 0.00016837840006419, "loss": 0.0642, "theoretical_loss": 3.3717281235868795, "tokens_seen": 2749890560 }, { "epoch": 0.67, "learning_rate": 0.00016833828131268555, "loss": 0.0669, "theoretical_loss": 3.3717156538960014, "tokens_seen": 2750021632 }, { "epoch": 0.67, "learning_rate": 0.00016829816256118112, "loss": 0.0663, "theoretical_loss": 3.3717031849658454, "tokens_seen": 2750152704 }, { "epoch": 0.67, "learning_rate": 0.00016825804380967666, "loss": 0.0668, "theoretical_loss": 3.3716907167963295, "tokens_seen": 2750283776 }, { "epoch": 0.67, "learning_rate": 0.00016821792505817217, "loss": 0.0663, "theoretical_loss": 3.3716782493873705, "tokens_seen": 2750414848 }, { "epoch": 0.67, "learning_rate": 0.00016817780630666774, "loss": 0.063, "theoretical_loss": 3.3716657827388863, "tokens_seen": 2750545920 }, { "epoch": 0.67, "learning_rate": 0.00016813768755516328, "loss": 0.062, "theoretical_loss": 3.371653316850794, "tokens_seen": 2750676992 }, { "epoch": 0.67, "learning_rate": 0.00016809756880365885, "loss": 0.0658, "theoretical_loss": 3.3716408517230105, "tokens_seen": 2750808064 }, { "epoch": 0.67, "objective/train/advantage_avg": 0.00018578708113636822, "objective/train/docs_used": 999260, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3761273622512817, "objective/train/original_loss": 1.3761272430419922, "objective/train/theoretical_loss": 3.371628387355454, "objective/train/tokens_used": 1121463776, "objective/train/value_avg": -0.004131317138671875, "objective/train/value_loss": 5.9940324717899784e-05, "objective/train/value_max": -2.4497509002685547e-05, "objective/train/value_min": -0.2012939453125, "objective/train/value_reward_corr": 0.6108162359119751, "objective/train/value_std": 0.006206512451171875, "objective/train/weight_avg": 1.0002154111862183, "objective/train/weighted_lm_loss": 1.3765864372253418, "objective/train/weights_max": 1.0865014791488647, "objective/train/weights_min": 0.8230646848678589, "theoretical_loss": 3.371628387355454, "tokens_seen": 2750939136 }, { "epoch": 0.67, "learning_rate": 0.0001680574500521544, "loss": 0.0628, "theoretical_loss": 3.371628387355454, "tokens_seen": 2750939136 }, { "epoch": 0.67, "learning_rate": 0.0001680173313006499, "loss": 0.0642, "theoretical_loss": 3.3716159237480414, "tokens_seen": 2751070208 }, { "epoch": 0.67, "learning_rate": 0.00016797721254914547, "loss": 0.068, "theoretical_loss": 3.3716034609006904, "tokens_seen": 2751201280 }, { "epoch": 0.67, "learning_rate": 0.00016793709379764102, "loss": 0.0608, "theoretical_loss": 3.371590998813318, "tokens_seen": 2751332352 }, { "epoch": 0.67, "learning_rate": 0.00016789697504613658, "loss": 0.0627, "theoretical_loss": 3.3715785374858425, "tokens_seen": 2751463424 }, { "epoch": 0.67, "learning_rate": 0.00016785685629463213, "loss": 0.0679, "theoretical_loss": 3.3715660769181808, "tokens_seen": 2751594496 }, { "epoch": 0.67, "learning_rate": 0.00016781673754312764, "loss": 0.0643, "theoretical_loss": 3.3715536171102505, "tokens_seen": 2751725568 }, { "epoch": 0.67, "learning_rate": 0.0001677766187916232, "loss": 0.0626, "theoretical_loss": 3.371541158061969, "tokens_seen": 2751856640 }, { "epoch": 0.67, "learning_rate": 0.00016773650004011875, "loss": 0.0673, "theoretical_loss": 3.3715286997732536, "tokens_seen": 2751987712 }, { "epoch": 0.67, "learning_rate": 0.00016769638128861432, "loss": 0.0648, "theoretical_loss": 3.3715162422440224, "tokens_seen": 2752118784 }, { "epoch": 0.67, "learning_rate": 0.00016765626253710986, "loss": 0.0612, "theoretical_loss": 3.3715037854741925, "tokens_seen": 2752249856 }, { "epoch": 0.67, "learning_rate": 0.00016761614378560537, "loss": 0.0667, "theoretical_loss": 3.3714913294636815, "tokens_seen": 2752380928 }, { "epoch": 0.67, "learning_rate": 0.00016757602503410094, "loss": 0.0662, "theoretical_loss": 3.371478874212407, "tokens_seen": 2752512000 }, { "epoch": 0.67, "learning_rate": 0.00016753590628259648, "loss": 0.066, "theoretical_loss": 3.371466419720287, "tokens_seen": 2752643072 }, { "epoch": 0.67, "learning_rate": 0.00016749578753109205, "loss": 0.0667, "theoretical_loss": 3.371453965987238, "tokens_seen": 2752774144 }, { "epoch": 0.67, "learning_rate": 0.0001674556687795876, "loss": 0.0654, "theoretical_loss": 3.3714415130131785, "tokens_seen": 2752905216 }, { "epoch": 0.67, "learning_rate": 0.0001674155500280831, "loss": 0.0632, "theoretical_loss": 3.371429060798026, "tokens_seen": 2753036288 }, { "epoch": 0.67, "learning_rate": 0.00016737543127657867, "loss": 0.067, "theoretical_loss": 3.371416609341698, "tokens_seen": 2753167360 }, { "epoch": 0.67, "learning_rate": 0.00016733531252507422, "loss": 0.0643, "theoretical_loss": 3.371404158644112, "tokens_seen": 2753298432 }, { "epoch": 0.67, "learning_rate": 0.00016729519377356978, "loss": 0.0657, "theoretical_loss": 3.371391708705186, "tokens_seen": 2753429504 }, { "epoch": 0.67, "learning_rate": 0.00016725507502206533, "loss": 0.0622, "theoretical_loss": 3.3713792595248373, "tokens_seen": 2753560576 }, { "epoch": 0.67, "learning_rate": 0.00016721495627056084, "loss": 0.0641, "theoretical_loss": 3.3713668111029835, "tokens_seen": 2753691648 }, { "epoch": 0.67, "learning_rate": 0.0001671748375190564, "loss": 0.0611, "theoretical_loss": 3.3713543634395426, "tokens_seen": 2753822720 }, { "epoch": 0.67, "learning_rate": 0.00016713471876755195, "loss": 0.0662, "theoretical_loss": 3.3713419165344325, "tokens_seen": 2753953792 }, { "epoch": 0.67, "learning_rate": 0.00016709460001604752, "loss": 0.073, "theoretical_loss": 3.3713294703875705, "tokens_seen": 2754084864 }, { "epoch": 0.67, "objective/train/advantage_avg": 0.0006281702080741525, "objective/train/docs_used": 1000409, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2972497940063477, "objective/train/original_loss": 1.2972497940063477, "objective/train/theoretical_loss": 3.3713170249988744, "objective/train/tokens_used": 1124740576, "objective/train/value_avg": -0.007061004638671875, "objective/train/value_loss": 0.0001499951904406771, "objective/train/value_max": -4.00543212890625e-05, "objective/train/value_min": -0.4638671875, "objective/train/value_reward_corr": 0.8197074908901109, "objective/train/value_std": 0.017425537109375, "objective/train/weight_avg": 1.0006979703903198, "objective/train/weighted_lm_loss": 1.2979732751846313, "objective/train/weights_max": 1.329933762550354, "objective/train/weights_min": 0.4382029175758362, "theoretical_loss": 3.3713170249988744, "tokens_seen": 2754215936 }, { "epoch": 0.67, "learning_rate": 0.00016705448126454306, "loss": 0.0675, "theoretical_loss": 3.3713170249988744, "tokens_seen": 2754215936 }, { "epoch": 0.67, "learning_rate": 0.00016701436251303857, "loss": 0.0658, "theoretical_loss": 3.371304580368262, "tokens_seen": 2754347008 }, { "epoch": 0.67, "learning_rate": 0.00016697424376153414, "loss": 0.063, "theoretical_loss": 3.3712921364956507, "tokens_seen": 2754478080 }, { "epoch": 0.67, "learning_rate": 0.00016693412501002968, "loss": 0.0657, "theoretical_loss": 3.3712796933809592, "tokens_seen": 2754609152 }, { "epoch": 0.67, "learning_rate": 0.00016689400625852525, "loss": 0.0622, "theoretical_loss": 3.371267251024104, "tokens_seen": 2754740224 }, { "epoch": 0.67, "learning_rate": 0.0001668538875070208, "loss": 0.0662, "theoretical_loss": 3.3712548094250043, "tokens_seen": 2754871296 }, { "epoch": 0.67, "learning_rate": 0.00016681376875551633, "loss": 0.0633, "theoretical_loss": 3.3712423685835766, "tokens_seen": 2755002368 }, { "epoch": 0.67, "learning_rate": 0.00016677365000401188, "loss": 0.0662, "theoretical_loss": 3.37122992849974, "tokens_seen": 2755133440 }, { "epoch": 0.67, "learning_rate": 0.00016673353125250742, "loss": 0.0656, "theoretical_loss": 3.371217489173411, "tokens_seen": 2755264512 }, { "epoch": 0.67, "learning_rate": 0.00016669341250100298, "loss": 0.0639, "theoretical_loss": 3.371205050604508, "tokens_seen": 2755395584 }, { "epoch": 0.67, "learning_rate": 0.00016665329374949853, "loss": 0.0614, "theoretical_loss": 3.3711926127929486, "tokens_seen": 2755526656 }, { "epoch": 0.67, "learning_rate": 0.00016661317499799407, "loss": 0.063, "theoretical_loss": 3.371180175738652, "tokens_seen": 2755657728 }, { "epoch": 0.67, "learning_rate": 0.0001665730562464896, "loss": 0.0622, "theoretical_loss": 3.371167739441534, "tokens_seen": 2755788800 }, { "epoch": 0.67, "learning_rate": 0.00016653293749498515, "loss": 0.0643, "theoretical_loss": 3.371155303901514, "tokens_seen": 2755919872 }, { "epoch": 0.67, "learning_rate": 0.00016649281874348072, "loss": 0.0658, "theoretical_loss": 3.3711428691185095, "tokens_seen": 2756050944 }, { "epoch": 0.67, "learning_rate": 0.00016645269999197626, "loss": 0.068, "theoretical_loss": 3.3711304350924385, "tokens_seen": 2756182016 }, { "epoch": 0.67, "learning_rate": 0.0001664125812404718, "loss": 0.0666, "theoretical_loss": 3.371118001823218, "tokens_seen": 2756313088 }, { "epoch": 0.67, "learning_rate": 0.00016637246248896734, "loss": 0.0604, "theoretical_loss": 3.371105569310768, "tokens_seen": 2756444160 }, { "epoch": 0.67, "learning_rate": 0.00016633234373746288, "loss": 0.0659, "theoretical_loss": 3.371093137555004, "tokens_seen": 2756575232 }, { "epoch": 0.67, "learning_rate": 0.00016629222498595845, "loss": 0.0647, "theoretical_loss": 3.371080706555846, "tokens_seen": 2756706304 }, { "epoch": 0.67, "learning_rate": 0.000166252106234454, "loss": 0.0665, "theoretical_loss": 3.3710682763132107, "tokens_seen": 2756837376 }, { "epoch": 0.67, "learning_rate": 0.00016621198748294953, "loss": 0.0687, "theoretical_loss": 3.3710558468270166, "tokens_seen": 2756968448 }, { "epoch": 0.67, "learning_rate": 0.00016617186873144508, "loss": 0.0664, "theoretical_loss": 3.371043418097182, "tokens_seen": 2757099520 }, { "epoch": 0.67, "learning_rate": 0.00016613174997994062, "loss": 0.067, "theoretical_loss": 3.371030990123624, "tokens_seen": 2757230592 }, { "epoch": 0.67, "learning_rate": 0.00016609163122843619, "loss": 0.0682, "theoretical_loss": 3.3710185629062615, "tokens_seen": 2757361664 }, { "epoch": 0.67, "objective/train/advantage_avg": 0.0002099705598084256, "objective/train/docs_used": 1001599, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3605930805206299, "objective/train/original_loss": 1.3605929613113403, "objective/train/theoretical_loss": 3.371006136445012, "objective/train/tokens_used": 1128017376, "objective/train/value_avg": -0.00815582275390625, "objective/train/value_loss": 9.077903814613819e-05, "objective/train/value_max": -2.777576446533203e-05, "objective/train/value_min": -0.232177734375, "objective/train/value_reward_corr": 0.7610554481920724, "objective/train/value_std": 0.0121002197265625, "objective/train/weight_avg": 1.0002551078796387, "objective/train/weighted_lm_loss": 1.3603637218475342, "objective/train/weights_max": 1.2613439559936523, "objective/train/weights_min": 0.8238751292228699, "theoretical_loss": 3.371006136445012, "tokens_seen": 2757492736 }, { "epoch": 0.67, "learning_rate": 0.00016605151247693173, "loss": 0.0689, "theoretical_loss": 3.371006136445012, "tokens_seen": 2757492736 }, { "epoch": 0.67, "learning_rate": 0.00016601139372542727, "loss": 0.0648, "theoretical_loss": 3.3709937107397945, "tokens_seen": 2757623808 }, { "epoch": 0.67, "learning_rate": 0.0001659712749739228, "loss": 0.0679, "theoretical_loss": 3.3709812857905255, "tokens_seen": 2757754880 }, { "epoch": 0.67, "learning_rate": 0.00016593115622241835, "loss": 0.0645, "theoretical_loss": 3.3709688615971247, "tokens_seen": 2757885952 }, { "epoch": 0.67, "learning_rate": 0.00016589103747091392, "loss": 0.0642, "theoretical_loss": 3.3709564381595087, "tokens_seen": 2758017024 }, { "epoch": 0.67, "learning_rate": 0.00016585091871940946, "loss": 0.0652, "theoretical_loss": 3.3709440154775967, "tokens_seen": 2758148096 }, { "epoch": 0.67, "learning_rate": 0.000165810799967905, "loss": 0.0683, "theoretical_loss": 3.370931593551307, "tokens_seen": 2758279168 }, { "epoch": 0.67, "learning_rate": 0.00016577068121640054, "loss": 0.0646, "theoretical_loss": 3.3709191723805567, "tokens_seen": 2758410240 }, { "epoch": 0.67, "learning_rate": 0.00016573056246489608, "loss": 0.0679, "theoretical_loss": 3.3709067519652645, "tokens_seen": 2758541312 }, { "epoch": 0.67, "learning_rate": 0.00016569044371339165, "loss": 0.067, "theoretical_loss": 3.3708943323053484, "tokens_seen": 2758672384 }, { "epoch": 0.67, "learning_rate": 0.0001656503249618872, "loss": 0.0643, "theoretical_loss": 3.370881913400727, "tokens_seen": 2758803456 }, { "epoch": 0.67, "learning_rate": 0.00016561020621038274, "loss": 0.0632, "theoretical_loss": 3.370869495251318, "tokens_seen": 2758934528 }, { "epoch": 0.67, "learning_rate": 0.00016557008745887828, "loss": 0.0626, "theoretical_loss": 3.37085707785704, "tokens_seen": 2759065600 }, { "epoch": 0.67, "learning_rate": 0.00016552996870737382, "loss": 0.0693, "theoretical_loss": 3.3708446612178107, "tokens_seen": 2759196672 }, { "epoch": 0.67, "learning_rate": 0.00016548984995586939, "loss": 0.0682, "theoretical_loss": 3.3708322453335486, "tokens_seen": 2759327744 }, { "epoch": 0.67, "learning_rate": 0.00016544973120436493, "loss": 0.0674, "theoretical_loss": 3.370819830204172, "tokens_seen": 2759458816 }, { "epoch": 0.67, "learning_rate": 0.00016540961245286047, "loss": 0.0661, "theoretical_loss": 3.3708074158295993, "tokens_seen": 2759589888 }, { "epoch": 0.67, "learning_rate": 0.000165369493701356, "loss": 0.0686, "theoretical_loss": 3.370795002209749, "tokens_seen": 2759720960 }, { "epoch": 0.67, "learning_rate": 0.00016532937494985155, "loss": 0.0628, "theoretical_loss": 3.3707825893445382, "tokens_seen": 2759852032 }, { "epoch": 0.67, "learning_rate": 0.00016528925619834712, "loss": 0.0644, "theoretical_loss": 3.3707701772338865, "tokens_seen": 2759983104 }, { "epoch": 0.67, "learning_rate": 0.00016524913744684266, "loss": 0.0667, "theoretical_loss": 3.3707577658777113, "tokens_seen": 2760114176 }, { "epoch": 0.67, "learning_rate": 0.0001652090186953382, "loss": 0.0659, "theoretical_loss": 3.370745355275931, "tokens_seen": 2760245248 }, { "epoch": 0.67, "learning_rate": 0.00016516889994383374, "loss": 0.0632, "theoretical_loss": 3.3707329454284647, "tokens_seen": 2760376320 }, { "epoch": 0.67, "learning_rate": 0.00016512878119232928, "loss": 0.0657, "theoretical_loss": 3.37072053633523, "tokens_seen": 2760507392 }, { "epoch": 0.67, "learning_rate": 0.00016508866244082485, "loss": 0.0637, "theoretical_loss": 3.3707081279961453, "tokens_seen": 2760638464 }, { "epoch": 0.67, "objective/train/advantage_avg": 0.0001519679935881868, "objective/train/docs_used": 1002769, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3162009716033936, "objective/train/original_loss": 1.3162009716033936, "objective/train/theoretical_loss": 3.3706957204111294, "objective/train/tokens_used": 1131294176, "objective/train/value_avg": -0.006664276123046875, "objective/train/value_loss": 0.00010854294669115916, "objective/train/value_max": -7.086992263793945e-05, "objective/train/value_min": -0.57666015625, "objective/train/value_reward_corr": 0.7832580481182725, "objective/train/value_std": 0.0128021240234375, "objective/train/weight_avg": 1.0002044439315796, "objective/train/weighted_lm_loss": 1.3163353204727173, "objective/train/weights_max": 1.220185399055481, "objective/train/weights_min": 0.5190214514732361, "theoretical_loss": 3.3706957204111294, "tokens_seen": 2760769536 }, { "epoch": 0.67, "learning_rate": 0.0001650485436893204, "loss": 0.0628, "theoretical_loss": 3.3706957204111294, "tokens_seen": 2760769536 }, { "epoch": 0.67, "learning_rate": 0.00016500842493781594, "loss": 0.0643, "theoretical_loss": 3.3706833135801, "tokens_seen": 2760900608 }, { "epoch": 0.67, "learning_rate": 0.00016496830618631148, "loss": 0.0623, "theoretical_loss": 3.3706709075029764, "tokens_seen": 2761031680 }, { "epoch": 0.67, "learning_rate": 0.00016492818743480702, "loss": 0.0664, "theoretical_loss": 3.370658502179676, "tokens_seen": 2761162752 }, { "epoch": 0.67, "learning_rate": 0.0001648880686833026, "loss": 0.0661, "theoretical_loss": 3.3706460976101185, "tokens_seen": 2761293824 }, { "epoch": 0.67, "learning_rate": 0.00016484794993179813, "loss": 0.067, "theoretical_loss": 3.370633693794221, "tokens_seen": 2761424896 }, { "epoch": 0.67, "learning_rate": 0.00016480783118029367, "loss": 0.0637, "theoretical_loss": 3.3706212907319024, "tokens_seen": 2761555968 }, { "epoch": 0.67, "learning_rate": 0.0001647677124287892, "loss": 0.0661, "theoretical_loss": 3.3706088884230816, "tokens_seen": 2761687040 }, { "epoch": 0.67, "learning_rate": 0.00016472759367728475, "loss": 0.0645, "theoretical_loss": 3.3705964868676763, "tokens_seen": 2761818112 }, { "epoch": 0.67, "learning_rate": 0.00016468747492578032, "loss": 0.0641, "theoretical_loss": 3.3705840860656053, "tokens_seen": 2761949184 }, { "epoch": 0.67, "learning_rate": 0.00016464735617427586, "loss": 0.065, "theoretical_loss": 3.370571686016788, "tokens_seen": 2762080256 }, { "epoch": 0.67, "learning_rate": 0.0001646072374227714, "loss": 0.0646, "theoretical_loss": 3.370559286721141, "tokens_seen": 2762211328 }, { "epoch": 0.67, "learning_rate": 0.00016456711867126694, "loss": 0.0631, "theoretical_loss": 3.370546888178585, "tokens_seen": 2762342400 }, { "epoch": 0.67, "learning_rate": 0.0001645269999197625, "loss": 0.0633, "theoretical_loss": 3.3705344903890366, "tokens_seen": 2762473472 }, { "epoch": 0.67, "learning_rate": 0.00016448688116825805, "loss": 0.0668, "theoretical_loss": 3.3705220933524154, "tokens_seen": 2762604544 }, { "epoch": 0.67, "learning_rate": 0.0001644467624167536, "loss": 0.0613, "theoretical_loss": 3.3705096970686395, "tokens_seen": 2762735616 }, { "epoch": 0.67, "learning_rate": 0.00016440664366524914, "loss": 0.0636, "theoretical_loss": 3.370497301537628, "tokens_seen": 2762866688 }, { "epoch": 0.67, "learning_rate": 0.00016436652491374468, "loss": 0.0622, "theoretical_loss": 3.370484906759299, "tokens_seen": 2762997760 }, { "epoch": 0.67, "learning_rate": 0.00016432640616224025, "loss": 0.065, "theoretical_loss": 3.3704725127335715, "tokens_seen": 2763128832 }, { "epoch": 0.67, "learning_rate": 0.0001642862874107358, "loss": 0.0643, "theoretical_loss": 3.370460119460364, "tokens_seen": 2763259904 }, { "epoch": 0.67, "learning_rate": 0.00016424616865923133, "loss": 0.0654, "theoretical_loss": 3.3704477269395943, "tokens_seen": 2763390976 }, { "epoch": 0.67, "learning_rate": 0.00016420604990772687, "loss": 0.069, "theoretical_loss": 3.3704353351711824, "tokens_seen": 2763522048 }, { "epoch": 0.67, "learning_rate": 0.0001641659311562224, "loss": 0.0641, "theoretical_loss": 3.370422944155046, "tokens_seen": 2763653120 }, { "epoch": 0.68, "learning_rate": 0.00016412581240471798, "loss": 0.0642, "theoretical_loss": 3.3704105538911038, "tokens_seen": 2763784192 }, { "epoch": 0.68, "learning_rate": 0.00016408569365321352, "loss": 0.0638, "theoretical_loss": 3.370398164379275, "tokens_seen": 2763915264 }, { "epoch": 0.68, "objective/train/advantage_avg": 0.0005824301042594016, "objective/train/docs_used": 1003856, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2406164407730103, "objective/train/original_loss": 1.2406164407730103, "objective/train/theoretical_loss": 3.3703857756194777, "objective/train/tokens_used": 1134570976, "objective/train/value_avg": -0.008087158203125, "objective/train/value_loss": 0.00018010505300480872, "objective/train/value_max": -3.618001937866211e-05, "objective/train/value_min": -0.29931640625, "objective/train/value_reward_corr": 0.6956076384199376, "objective/train/value_std": 0.014129638671875, "objective/train/weight_avg": 1.0006639957427979, "objective/train/weighted_lm_loss": 1.2408028841018677, "objective/train/weights_max": 1.1831839084625244, "objective/train/weights_min": 0.3698268234729767, "theoretical_loss": 3.3703857756194777, "tokens_seen": 2764046336 }, { "epoch": 0.68, "learning_rate": 0.00016404557490170906, "loss": 0.0625, "theoretical_loss": 3.3703857756194777, "tokens_seen": 2764046336 }, { "epoch": 0.68, "learning_rate": 0.0001640054561502046, "loss": 0.0645, "theoretical_loss": 3.370373387611631, "tokens_seen": 2764177408 }, { "epoch": 0.68, "learning_rate": 0.00016396533739870014, "loss": 0.0614, "theoretical_loss": 3.3703610003556537, "tokens_seen": 2764308480 }, { "epoch": 0.68, "learning_rate": 0.0001639252186471957, "loss": 0.0672, "theoretical_loss": 3.370348613851464, "tokens_seen": 2764439552 }, { "epoch": 0.68, "learning_rate": 0.00016388509989569125, "loss": 0.0601, "theoretical_loss": 3.370336228098981, "tokens_seen": 2764570624 }, { "epoch": 0.68, "learning_rate": 0.0001638449811441868, "loss": 0.0648, "theoretical_loss": 3.3703238430981233, "tokens_seen": 2764701696 }, { "epoch": 0.68, "learning_rate": 0.00016380486239268234, "loss": 0.0634, "theoretical_loss": 3.3703114588488097, "tokens_seen": 2764832768 }, { "epoch": 0.68, "learning_rate": 0.00016376474364117788, "loss": 0.0663, "theoretical_loss": 3.3702990753509594, "tokens_seen": 2764963840 }, { "epoch": 0.68, "learning_rate": 0.00016372462488967345, "loss": 0.063, "theoretical_loss": 3.3702866926044903, "tokens_seen": 2765094912 }, { "epoch": 0.68, "learning_rate": 0.000163684506138169, "loss": 0.0646, "theoretical_loss": 3.3702743106093216, "tokens_seen": 2765225984 }, { "epoch": 0.68, "learning_rate": 0.00016364438738666453, "loss": 0.0614, "theoretical_loss": 3.3702619293653724, "tokens_seen": 2765357056 }, { "epoch": 0.68, "learning_rate": 0.00016360426863516007, "loss": 0.0642, "theoretical_loss": 3.370249548872561, "tokens_seen": 2765488128 }, { "epoch": 0.68, "learning_rate": 0.0001635641498836556, "loss": 0.067, "theoretical_loss": 3.370237169130807, "tokens_seen": 2765619200 }, { "epoch": 0.68, "learning_rate": 0.00016352403113215118, "loss": 0.064, "theoretical_loss": 3.3702247901400284, "tokens_seen": 2765750272 }, { "epoch": 0.68, "learning_rate": 0.00016348391238064672, "loss": 0.0636, "theoretical_loss": 3.3702124119001446, "tokens_seen": 2765881344 }, { "epoch": 0.68, "learning_rate": 0.00016344379362914226, "loss": 0.0637, "theoretical_loss": 3.3702000344110745, "tokens_seen": 2766012416 }, { "epoch": 0.68, "learning_rate": 0.0001634036748776378, "loss": 0.0612, "theoretical_loss": 3.3701876576727368, "tokens_seen": 2766143488 }, { "epoch": 0.68, "learning_rate": 0.00016336355612613335, "loss": 0.0653, "theoretical_loss": 3.37017528168505, "tokens_seen": 2766274560 }, { "epoch": 0.68, "learning_rate": 0.0001633234373746289, "loss": 0.0627, "theoretical_loss": 3.3701629064479333, "tokens_seen": 2766405632 }, { "epoch": 0.68, "learning_rate": 0.00016328331862312445, "loss": 0.0688, "theoretical_loss": 3.370150531961306, "tokens_seen": 2766536704 }, { "epoch": 0.68, "learning_rate": 0.00016324319987162, "loss": 0.0692, "theoretical_loss": 3.370138158225087, "tokens_seen": 2766667776 }, { "epoch": 0.68, "learning_rate": 0.00016320308112011554, "loss": 0.0618, "theoretical_loss": 3.3701257852391944, "tokens_seen": 2766798848 }, { "epoch": 0.68, "learning_rate": 0.00016316296236861108, "loss": 0.0675, "theoretical_loss": 3.370113413003548, "tokens_seen": 2766929920 }, { "epoch": 0.68, "learning_rate": 0.00016312284361710665, "loss": 0.0651, "theoretical_loss": 3.3701010415180668, "tokens_seen": 2767060992 }, { "epoch": 0.68, "learning_rate": 0.0001630827248656022, "loss": 0.0662, "theoretical_loss": 3.3700886707826694, "tokens_seen": 2767192064 }, { "epoch": 0.68, "objective/train/advantage_avg": 0.001512386603280902, "objective/train/docs_used": 1005007, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3032426834106445, "objective/train/original_loss": 1.3032426834106445, "objective/train/theoretical_loss": 3.3700763007972747, "objective/train/tokens_used": 1137847776, "objective/train/value_avg": -0.00946807861328125, "objective/train/value_loss": 0.000440017698565498, "objective/train/value_max": -6.604194641113281e-05, "objective/train/value_min": -0.8876953125, "objective/train/value_reward_corr": 0.6925791716494596, "objective/train/value_std": 0.0222625732421875, "objective/train/weight_avg": 1.0017192363739014, "objective/train/weighted_lm_loss": 1.304863691329956, "objective/train/weights_max": 2.1810035705566406, "objective/train/weights_min": 0.3684816360473633, "theoretical_loss": 3.3700763007972747, "tokens_seen": 2767323136 }, { "epoch": 0.68, "learning_rate": 0.00016304260611409773, "loss": 0.0676, "theoretical_loss": 3.3700763007972747, "tokens_seen": 2767323136 }, { "epoch": 0.68, "learning_rate": 0.00016300248736259327, "loss": 0.0644, "theoretical_loss": 3.370063931561802, "tokens_seen": 2767454208 }, { "epoch": 0.68, "learning_rate": 0.0001629623686110888, "loss": 0.064, "theoretical_loss": 3.3700515630761703, "tokens_seen": 2767585280 }, { "epoch": 0.68, "learning_rate": 0.00016292224985958438, "loss": 0.0635, "theoretical_loss": 3.3700391953402984, "tokens_seen": 2767716352 }, { "epoch": 0.68, "learning_rate": 0.00016288213110807992, "loss": 0.0642, "theoretical_loss": 3.3700268283541055, "tokens_seen": 2767847424 }, { "epoch": 0.68, "learning_rate": 0.00016284201235657546, "loss": 0.0658, "theoretical_loss": 3.3700144621175108, "tokens_seen": 2767978496 }, { "epoch": 0.68, "learning_rate": 0.000162801893605071, "loss": 0.0659, "theoretical_loss": 3.3700020966304334, "tokens_seen": 2768109568 }, { "epoch": 0.68, "learning_rate": 0.00016276177485356655, "loss": 0.0613, "theoretical_loss": 3.369989731892792, "tokens_seen": 2768240640 }, { "epoch": 0.68, "learning_rate": 0.00016272165610206211, "loss": 0.0638, "theoretical_loss": 3.3699773679045064, "tokens_seen": 2768371712 }, { "epoch": 0.68, "learning_rate": 0.00016268153735055766, "loss": 0.0641, "theoretical_loss": 3.3699650046654948, "tokens_seen": 2768502784 }, { "epoch": 0.68, "learning_rate": 0.0001626414185990532, "loss": 0.0644, "theoretical_loss": 3.3699526421756767, "tokens_seen": 2768633856 }, { "epoch": 0.68, "learning_rate": 0.00016260129984754877, "loss": 0.0654, "theoretical_loss": 3.3699402804349714, "tokens_seen": 2768764928 }, { "epoch": 0.68, "learning_rate": 0.00016256118109604428, "loss": 0.0655, "theoretical_loss": 3.369927919443298, "tokens_seen": 2768896000 }, { "epoch": 0.68, "learning_rate": 0.00016252106234453985, "loss": 0.0654, "theoretical_loss": 3.3699155592005754, "tokens_seen": 2769027072 }, { "epoch": 0.68, "learning_rate": 0.0001624809435930354, "loss": 0.0653, "theoretical_loss": 3.3699031997067235, "tokens_seen": 2769158144 }, { "epoch": 0.68, "learning_rate": 0.00016244082484153093, "loss": 0.0638, "theoretical_loss": 3.36989084096166, "tokens_seen": 2769289216 }, { "epoch": 0.68, "learning_rate": 0.0001624007060900265, "loss": 0.066, "theoretical_loss": 3.369878482965306, "tokens_seen": 2769420288 }, { "epoch": 0.68, "learning_rate": 0.000162360587338522, "loss": 0.0639, "theoretical_loss": 3.3698661257175795, "tokens_seen": 2769551360 }, { "epoch": 0.68, "learning_rate": 0.00016232046858701758, "loss": 0.064, "theoretical_loss": 3.3698537692184, "tokens_seen": 2769682432 }, { "epoch": 0.68, "learning_rate": 0.00016228034983551312, "loss": 0.0632, "theoretical_loss": 3.369841413467687, "tokens_seen": 2769813504 }, { "epoch": 0.68, "learning_rate": 0.00016224023108400866, "loss": 0.0679, "theoretical_loss": 3.369829058465359, "tokens_seen": 2769944576 }, { "epoch": 0.68, "learning_rate": 0.00016220011233250423, "loss": 0.0645, "theoretical_loss": 3.3698167042113356, "tokens_seen": 2770075648 }, { "epoch": 0.68, "learning_rate": 0.00016215999358099975, "loss": 0.0627, "theoretical_loss": 3.369804350705537, "tokens_seen": 2770206720 }, { "epoch": 0.68, "learning_rate": 0.00016211987482949531, "loss": 0.0642, "theoretical_loss": 3.369791997947881, "tokens_seen": 2770337792 }, { "epoch": 0.68, "learning_rate": 0.00016207975607799086, "loss": 0.0652, "theoretical_loss": 3.3697796459382876, "tokens_seen": 2770468864 }, { "epoch": 0.68, "objective/train/advantage_avg": 0.00016182383114937693, "objective/train/docs_used": 1006223, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.27201247215271, "objective/train/original_loss": 1.27201247215271, "objective/train/theoretical_loss": 3.3697672946766764, "objective/train/tokens_used": 1141124576, "objective/train/value_avg": -0.0052032470703125, "objective/train/value_loss": 7.446388917742297e-05, "objective/train/value_max": -3.790855407714844e-05, "objective/train/value_min": -0.2235107421875, "objective/train/value_reward_corr": 0.7316084237888743, "objective/train/value_std": 0.0095062255859375, "objective/train/weight_avg": 1.0001986026763916, "objective/train/weighted_lm_loss": 1.2719734907150269, "objective/train/weights_max": 1.1296266317367554, "objective/train/weights_min": 0.8191763758659363, "theoretical_loss": 3.3697672946766764, "tokens_seen": 2770599936 }, { "epoch": 0.68, "learning_rate": 0.00016203963732648642, "loss": 0.065, "theoretical_loss": 3.3697672946766764, "tokens_seen": 2770599936 }, { "epoch": 0.68, "learning_rate": 0.00016199951857498197, "loss": 0.0646, "theoretical_loss": 3.3697549441629664, "tokens_seen": 2770731008 }, { "epoch": 0.68, "learning_rate": 0.00016195939982347748, "loss": 0.0674, "theoretical_loss": 3.369742594397077, "tokens_seen": 2770862080 }, { "epoch": 0.68, "learning_rate": 0.00016191928107197305, "loss": 0.0634, "theoretical_loss": 3.3697302453789275, "tokens_seen": 2770993152 }, { "epoch": 0.68, "learning_rate": 0.0001618791623204686, "loss": 0.0665, "theoretical_loss": 3.369717897108437, "tokens_seen": 2771124224 }, { "epoch": 0.68, "learning_rate": 0.00016183904356896416, "loss": 0.065, "theoretical_loss": 3.3697055495855253, "tokens_seen": 2771255296 }, { "epoch": 0.68, "learning_rate": 0.0001617989248174597, "loss": 0.0656, "theoretical_loss": 3.3696932028101116, "tokens_seen": 2771386368 }, { "epoch": 0.68, "learning_rate": 0.0001617588060659552, "loss": 0.0653, "theoretical_loss": 3.3696808567821157, "tokens_seen": 2771517440 }, { "epoch": 0.68, "learning_rate": 0.00016171868731445078, "loss": 0.0651, "theoretical_loss": 3.369668511501456, "tokens_seen": 2771648512 }, { "epoch": 0.68, "learning_rate": 0.00016167856856294632, "loss": 0.0663, "theoretical_loss": 3.369656166968053, "tokens_seen": 2771779584 }, { "epoch": 0.68, "learning_rate": 0.0001616384498114419, "loss": 0.0661, "theoretical_loss": 3.3696438231818258, "tokens_seen": 2771910656 }, { "epoch": 0.68, "learning_rate": 0.00016159833105993743, "loss": 0.0619, "theoretical_loss": 3.3696314801426936, "tokens_seen": 2772041728 }, { "epoch": 0.68, "learning_rate": 0.00016155821230843295, "loss": 0.0677, "theoretical_loss": 3.369619137850576, "tokens_seen": 2772172800 }, { "epoch": 0.68, "learning_rate": 0.00016151809355692852, "loss": 0.0613, "theoretical_loss": 3.3696067963053924, "tokens_seen": 2772303872 }, { "epoch": 0.68, "learning_rate": 0.00016147797480542406, "loss": 0.0668, "theoretical_loss": 3.3695944555070625, "tokens_seen": 2772434944 }, { "epoch": 0.68, "learning_rate": 0.00016143785605391962, "loss": 0.064, "theoretical_loss": 3.369582115455506, "tokens_seen": 2772566016 }, { "epoch": 0.68, "learning_rate": 0.00016139773730241517, "loss": 0.0658, "theoretical_loss": 3.3695697761506413, "tokens_seen": 2772697088 }, { "epoch": 0.68, "learning_rate": 0.00016135761855091068, "loss": 0.0652, "theoretical_loss": 3.3695574375923893, "tokens_seen": 2772828160 }, { "epoch": 0.68, "learning_rate": 0.00016131749979940625, "loss": 0.0637, "theoretical_loss": 3.3695450997806686, "tokens_seen": 2772959232 }, { "epoch": 0.68, "learning_rate": 0.0001612773810479018, "loss": 0.0667, "theoretical_loss": 3.3695327627153993, "tokens_seen": 2773090304 }, { "epoch": 0.68, "learning_rate": 0.00016123726229639736, "loss": 0.064, "theoretical_loss": 3.369520426396501, "tokens_seen": 2773221376 }, { "epoch": 0.68, "learning_rate": 0.0001611971435448929, "loss": 0.0641, "theoretical_loss": 3.369508090823892, "tokens_seen": 2773352448 }, { "epoch": 0.68, "learning_rate": 0.00016115702479338841, "loss": 0.0673, "theoretical_loss": 3.3694957559974936, "tokens_seen": 2773483520 }, { "epoch": 0.68, "learning_rate": 0.00016111690604188398, "loss": 0.0678, "theoretical_loss": 3.369483421917225, "tokens_seen": 2773614592 }, { "epoch": 0.68, "learning_rate": 0.00016107678729037952, "loss": 0.0682, "theoretical_loss": 3.3694710885830053, "tokens_seen": 2773745664 }, { "epoch": 0.68, "objective/train/advantage_avg": -0.00022119656205177307, "objective/train/docs_used": 1007399, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.221588373184204, "objective/train/original_loss": 1.221588373184204, "objective/train/theoretical_loss": 3.369458755994754, "objective/train/tokens_used": 1144401376, "objective/train/value_avg": -0.00899505615234375, "objective/train/value_loss": 0.00016004023200366646, "objective/train/value_max": -4.398822784423828e-05, "objective/train/value_min": -0.7587890625, "objective/train/value_reward_corr": 0.7456196356822933, "objective/train/value_std": 0.015228271484375, "objective/train/weight_avg": 0.9998599290847778, "objective/train/weighted_lm_loss": 1.2207692861557007, "objective/train/weights_max": 2.135688543319702, "objective/train/weights_min": 0.7178144454956055, "theoretical_loss": 3.369458755994754, "tokens_seen": 2773876736 }, { "epoch": 0.68, "learning_rate": 0.0001610366685388751, "loss": 0.0647, "theoretical_loss": 3.369458755994754, "tokens_seen": 2773876736 }, { "epoch": 0.68, "learning_rate": 0.00016099654978737063, "loss": 0.0638, "theoretical_loss": 3.3694464241523914, "tokens_seen": 2774007808 }, { "epoch": 0.68, "learning_rate": 0.00016095643103586615, "loss": 0.064, "theoretical_loss": 3.369434093055837, "tokens_seen": 2774138880 }, { "epoch": 0.68, "learning_rate": 0.00016091631228436172, "loss": 0.0639, "theoretical_loss": 3.36942176270501, "tokens_seen": 2774269952 }, { "epoch": 0.68, "learning_rate": 0.00016087619353285726, "loss": 0.0675, "theoretical_loss": 3.36940943309983, "tokens_seen": 2774401024 }, { "epoch": 0.68, "learning_rate": 0.00016083607478135283, "loss": 0.0624, "theoretical_loss": 3.3693971042402175, "tokens_seen": 2774532096 }, { "epoch": 0.68, "learning_rate": 0.00016079595602984837, "loss": 0.064, "theoretical_loss": 3.3693847761260916, "tokens_seen": 2774663168 }, { "epoch": 0.68, "learning_rate": 0.00016075583727834388, "loss": 0.0663, "theoretical_loss": 3.369372448757372, "tokens_seen": 2774794240 }, { "epoch": 0.68, "learning_rate": 0.00016071571852683945, "loss": 0.0614, "theoretical_loss": 3.369360122133979, "tokens_seen": 2774925312 }, { "epoch": 0.68, "learning_rate": 0.000160675599775335, "loss": 0.062, "theoretical_loss": 3.3693477962558314, "tokens_seen": 2775056384 }, { "epoch": 0.68, "learning_rate": 0.00016063548102383056, "loss": 0.0622, "theoretical_loss": 3.36933547112285, "tokens_seen": 2775187456 }, { "epoch": 0.68, "learning_rate": 0.0001605953622723261, "loss": 0.0635, "theoretical_loss": 3.3693231467349536, "tokens_seen": 2775318528 }, { "epoch": 0.68, "learning_rate": 0.00016055524352082161, "loss": 0.0626, "theoretical_loss": 3.3693108230920625, "tokens_seen": 2775449600 }, { "epoch": 0.68, "learning_rate": 0.00016051512476931718, "loss": 0.0633, "theoretical_loss": 3.3692985001940965, "tokens_seen": 2775580672 }, { "epoch": 0.68, "learning_rate": 0.00016047500601781272, "loss": 0.0632, "theoretical_loss": 3.3692861780409746, "tokens_seen": 2775711744 }, { "epoch": 0.68, "learning_rate": 0.0001604348872663083, "loss": 0.064, "theoretical_loss": 3.369273856632618, "tokens_seen": 2775842816 }, { "epoch": 0.68, "learning_rate": 0.00016039476851480383, "loss": 0.0629, "theoretical_loss": 3.3692615359689455, "tokens_seen": 2775973888 }, { "epoch": 0.68, "learning_rate": 0.00016035464976329935, "loss": 0.0652, "theoretical_loss": 3.3692492160498775, "tokens_seen": 2776104960 }, { "epoch": 0.68, "learning_rate": 0.00016031453101179492, "loss": 0.0642, "theoretical_loss": 3.3692368968753335, "tokens_seen": 2776236032 }, { "epoch": 0.68, "learning_rate": 0.00016027441226029046, "loss": 0.0615, "theoretical_loss": 3.369224578445233, "tokens_seen": 2776367104 }, { "epoch": 0.68, "learning_rate": 0.00016023429350878603, "loss": 0.0641, "theoretical_loss": 3.3692122607594968, "tokens_seen": 2776498176 }, { "epoch": 0.68, "learning_rate": 0.00016019417475728157, "loss": 0.0605, "theoretical_loss": 3.369199943818044, "tokens_seen": 2776629248 }, { "epoch": 0.68, "learning_rate": 0.00016015405600577708, "loss": 0.0624, "theoretical_loss": 3.369187627620795, "tokens_seen": 2776760320 }, { "epoch": 0.68, "learning_rate": 0.00016011393725427265, "loss": 0.0668, "theoretical_loss": 3.3691753121676693, "tokens_seen": 2776891392 }, { "epoch": 0.68, "learning_rate": 0.0001600738185027682, "loss": 0.063, "theoretical_loss": 3.369162997458587, "tokens_seen": 2777022464 }, { "epoch": 0.68, "objective/train/advantage_avg": -0.0001963702670764178, "objective/train/docs_used": 1008708, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4306163787841797, "objective/train/original_loss": 1.4306162595748901, "objective/train/theoretical_loss": 3.3691506834934684, "objective/train/tokens_used": 1147678176, "objective/train/value_avg": -0.01285552978515625, "objective/train/value_loss": 0.0007117040222510695, "objective/train/value_max": -1.5437602996826172e-05, "objective/train/value_min": -0.994140625, "objective/train/value_reward_corr": 0.725471245127362, "objective/train/value_std": 0.0271453857421875, "objective/train/weight_avg": 1.0001131296157837, "objective/train/weighted_lm_loss": 1.4284851551055908, "objective/train/weights_max": 1.5516058206558228, "objective/train/weights_min": 0.23250064253807068, "theoretical_loss": 3.3691506834934684, "tokens_seen": 2777153536 }, { "epoch": 0.68, "learning_rate": 0.00016003369975126376, "loss": 0.0718, "theoretical_loss": 3.3691506834934684, "tokens_seen": 2777153536 }, { "epoch": 0.68, "learning_rate": 0.0001599935809997593, "loss": 0.0636, "theoretical_loss": 3.369138370272233, "tokens_seen": 2777284608 }, { "epoch": 0.68, "learning_rate": 0.00015995346224825481, "loss": 0.0596, "theoretical_loss": 3.3691260577948006, "tokens_seen": 2777415680 }, { "epoch": 0.68, "learning_rate": 0.00015991334349675038, "loss": 0.0634, "theoretical_loss": 3.3691137460610916, "tokens_seen": 2777546752 }, { "epoch": 0.68, "learning_rate": 0.00015987322474524592, "loss": 0.0643, "theoretical_loss": 3.3691014350710256, "tokens_seen": 2777677824 }, { "epoch": 0.68, "learning_rate": 0.0001598331059937415, "loss": 0.065, "theoretical_loss": 3.369089124824523, "tokens_seen": 2777808896 }, { "epoch": 0.68, "learning_rate": 0.00015979298724223703, "loss": 0.0645, "theoretical_loss": 3.3690768153215034, "tokens_seen": 2777939968 }, { "epoch": 0.68, "learning_rate": 0.00015975286849073255, "loss": 0.0688, "theoretical_loss": 3.3690645065618874, "tokens_seen": 2778071040 }, { "epoch": 0.68, "learning_rate": 0.00015971274973922812, "loss": 0.0649, "theoretical_loss": 3.3690521985455946, "tokens_seen": 2778202112 }, { "epoch": 0.68, "learning_rate": 0.00015967263098772366, "loss": 0.0635, "theoretical_loss": 3.3690398912725446, "tokens_seen": 2778333184 }, { "epoch": 0.68, "learning_rate": 0.00015963251223621923, "loss": 0.0637, "theoretical_loss": 3.3690275847426587, "tokens_seen": 2778464256 }, { "epoch": 0.68, "learning_rate": 0.00015959239348471477, "loss": 0.0645, "theoretical_loss": 3.369015278955856, "tokens_seen": 2778595328 }, { "epoch": 0.68, "learning_rate": 0.00015955227473321028, "loss": 0.0666, "theoretical_loss": 3.3690029739120564, "tokens_seen": 2778726400 }, { "epoch": 0.68, "learning_rate": 0.00015951215598170585, "loss": 0.0669, "theoretical_loss": 3.368990669611181, "tokens_seen": 2778857472 }, { "epoch": 0.68, "learning_rate": 0.0001594720372302014, "loss": 0.0659, "theoretical_loss": 3.368978366053149, "tokens_seen": 2778988544 }, { "epoch": 0.68, "learning_rate": 0.00015943191847869696, "loss": 0.062, "theoretical_loss": 3.3689660632378806, "tokens_seen": 2779119616 }, { "epoch": 0.68, "learning_rate": 0.0001593917997271925, "loss": 0.064, "theoretical_loss": 3.3689537611652964, "tokens_seen": 2779250688 }, { "epoch": 0.68, "learning_rate": 0.00015935168097568804, "loss": 0.0639, "theoretical_loss": 3.3689414598353165, "tokens_seen": 2779381760 }, { "epoch": 0.68, "learning_rate": 0.00015931156222418358, "loss": 0.065, "theoretical_loss": 3.3689291592478607, "tokens_seen": 2779512832 }, { "epoch": 0.68, "learning_rate": 0.00015927144347267913, "loss": 0.0636, "theoretical_loss": 3.3689168594028494, "tokens_seen": 2779643904 }, { "epoch": 0.68, "learning_rate": 0.0001592313247211747, "loss": 0.0651, "theoretical_loss": 3.3689045603002024, "tokens_seen": 2779774976 }, { "epoch": 0.68, "learning_rate": 0.00015919120596967023, "loss": 0.0662, "theoretical_loss": 3.3688922619398407, "tokens_seen": 2779906048 }, { "epoch": 0.68, "learning_rate": 0.00015915108721816578, "loss": 0.0675, "theoretical_loss": 3.3688799643216836, "tokens_seen": 2780037120 }, { "epoch": 0.68, "learning_rate": 0.00015911096846666132, "loss": 0.0632, "theoretical_loss": 3.368867667445652, "tokens_seen": 2780168192 }, { "epoch": 0.69, "learning_rate": 0.00015907084971515686, "loss": 0.0631, "theoretical_loss": 3.368855371311666, "tokens_seen": 2780299264 }, { "epoch": 0.69, "objective/train/advantage_avg": 0.0002878143859561533, "objective/train/docs_used": 1010001, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.236162781715393, "objective/train/original_loss": 1.2361629009246826, "objective/train/theoretical_loss": 3.3688430759196453, "objective/train/tokens_used": 1150954976, "objective/train/value_avg": -0.007083892822265625, "objective/train/value_loss": 0.00011704228381859139, "objective/train/value_max": -3.170967102050781e-05, "objective/train/value_min": -0.2384033203125, "objective/train/value_reward_corr": 0.7836025865370666, "objective/train/value_std": 0.0139007568359375, "objective/train/weight_avg": 1.0003418922424316, "objective/train/weighted_lm_loss": 1.2362388372421265, "objective/train/weights_max": 1.1216298341751099, "objective/train/weights_min": 0.3684542179107666, "theoretical_loss": 3.3688430759196453, "tokens_seen": 2780430336 }, { "epoch": 0.69, "learning_rate": 0.00015903073096365243, "loss": 0.0661, "theoretical_loss": 3.3688430759196453, "tokens_seen": 2780430336 }, { "epoch": 0.69, "learning_rate": 0.00015899061221214797, "loss": 0.0644, "theoretical_loss": 3.368830781269511, "tokens_seen": 2780561408 }, { "epoch": 0.69, "learning_rate": 0.0001589504934606435, "loss": 0.0624, "theoretical_loss": 3.3688184873611826, "tokens_seen": 2780692480 }, { "epoch": 0.69, "learning_rate": 0.00015891037470913905, "loss": 0.0638, "theoretical_loss": 3.3688061941945806, "tokens_seen": 2780823552 }, { "epoch": 0.69, "learning_rate": 0.0001588702559576346, "loss": 0.0679, "theoretical_loss": 3.368793901769626, "tokens_seen": 2780954624 }, { "epoch": 0.69, "learning_rate": 0.00015883013720613016, "loss": 0.0671, "theoretical_loss": 3.3687816100862378, "tokens_seen": 2781085696 }, { "epoch": 0.69, "learning_rate": 0.0001587900184546257, "loss": 0.0677, "theoretical_loss": 3.3687693191443375, "tokens_seen": 2781216768 }, { "epoch": 0.69, "learning_rate": 0.00015874989970312124, "loss": 0.0661, "theoretical_loss": 3.368757028943845, "tokens_seen": 2781347840 }, { "epoch": 0.69, "learning_rate": 0.00015870978095161678, "loss": 0.0611, "theoretical_loss": 3.3687447394846806, "tokens_seen": 2781478912 }, { "epoch": 0.69, "learning_rate": 0.00015866966220011233, "loss": 0.0684, "theoretical_loss": 3.3687324507667644, "tokens_seen": 2781609984 }, { "epoch": 0.69, "learning_rate": 0.0001586295434486079, "loss": 0.0672, "theoretical_loss": 3.3687201627900167, "tokens_seen": 2781741056 }, { "epoch": 0.69, "learning_rate": 0.00015858942469710344, "loss": 0.0661, "theoretical_loss": 3.368707875554359, "tokens_seen": 2781872128 }, { "epoch": 0.69, "learning_rate": 0.00015854930594559898, "loss": 0.0645, "theoretical_loss": 3.36869558905971, "tokens_seen": 2782003200 }, { "epoch": 0.69, "learning_rate": 0.00015850918719409452, "loss": 0.0629, "theoretical_loss": 3.3686833033059917, "tokens_seen": 2782134272 }, { "epoch": 0.69, "learning_rate": 0.00015846906844259006, "loss": 0.0675, "theoretical_loss": 3.3686710182931234, "tokens_seen": 2782265344 }, { "epoch": 0.69, "learning_rate": 0.00015842894969108563, "loss": 0.0683, "theoretical_loss": 3.368658734021026, "tokens_seen": 2782396416 }, { "epoch": 0.69, "learning_rate": 0.00015838883093958117, "loss": 0.0691, "theoretical_loss": 3.3686464504896203, "tokens_seen": 2782527488 }, { "epoch": 0.69, "learning_rate": 0.0001583487121880767, "loss": 0.0675, "theoretical_loss": 3.3686341676988256, "tokens_seen": 2782658560 }, { "epoch": 0.69, "learning_rate": 0.00015830859343657225, "loss": 0.0619, "theoretical_loss": 3.3686218856485635, "tokens_seen": 2782789632 }, { "epoch": 0.69, "learning_rate": 0.0001582684746850678, "loss": 0.0683, "theoretical_loss": 3.3686096043387543, "tokens_seen": 2782920704 }, { "epoch": 0.69, "learning_rate": 0.00015822835593356336, "loss": 0.0606, "theoretical_loss": 3.368597323769318, "tokens_seen": 2783051776 }, { "epoch": 0.69, "learning_rate": 0.0001581882371820589, "loss": 0.0636, "theoretical_loss": 3.3685850439401754, "tokens_seen": 2783182848 }, { "epoch": 0.69, "learning_rate": 0.00015814811843055444, "loss": 0.0624, "theoretical_loss": 3.3685727648512467, "tokens_seen": 2783313920 }, { "epoch": 0.69, "learning_rate": 0.00015810799967904999, "loss": 0.0662, "theoretical_loss": 3.368560486502453, "tokens_seen": 2783444992 }, { "epoch": 0.69, "learning_rate": 0.00015806788092754553, "loss": 0.0661, "theoretical_loss": 3.3685482088937144, "tokens_seen": 2783576064 }, { "epoch": 0.69, "objective/train/advantage_avg": 0.0002845351991709322, "objective/train/docs_used": 1011173, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3215218782424927, "objective/train/original_loss": 1.3215217590332031, "objective/train/theoretical_loss": 3.3685359320249515, "objective/train/tokens_used": 1154231776, "objective/train/value_avg": -0.007007598876953125, "objective/train/value_loss": 0.0003132631827611476, "objective/train/value_max": -4.2319297790527344e-05, "objective/train/value_min": -0.775390625, "objective/train/value_reward_corr": 0.8133928882345187, "objective/train/value_std": 0.0205535888671875, "objective/train/weight_avg": 1.0004253387451172, "objective/train/weighted_lm_loss": 1.3217054605484009, "objective/train/weights_max": 1.4043852090835571, "objective/train/weights_min": 0.4547698199748993, "theoretical_loss": 3.3685359320249515, "tokens_seen": 2783707136 }, { "epoch": 0.69, "learning_rate": 0.0001580277621760411, "loss": 0.0684, "theoretical_loss": 3.3685359320249515, "tokens_seen": 2783707136 }, { "epoch": 0.69, "learning_rate": 0.00015798764342453664, "loss": 0.0622, "theoretical_loss": 3.3685236558960847, "tokens_seen": 2783838208 }, { "epoch": 0.69, "learning_rate": 0.00015794752467303218, "loss": 0.067, "theoretical_loss": 3.3685113805070355, "tokens_seen": 2783969280 }, { "epoch": 0.69, "learning_rate": 0.00015790740592152772, "loss": 0.0659, "theoretical_loss": 3.3684991058577234, "tokens_seen": 2784100352 }, { "epoch": 0.69, "learning_rate": 0.00015786728717002326, "loss": 0.0651, "theoretical_loss": 3.3684868319480694, "tokens_seen": 2784231424 }, { "epoch": 0.69, "learning_rate": 0.00015782716841851883, "loss": 0.0655, "theoretical_loss": 3.368474558777994, "tokens_seen": 2784362496 }, { "epoch": 0.69, "learning_rate": 0.00015778704966701437, "loss": 0.0626, "theoretical_loss": 3.3684622863474183, "tokens_seen": 2784493568 }, { "epoch": 0.69, "learning_rate": 0.0001577469309155099, "loss": 0.0692, "theoretical_loss": 3.368450014656262, "tokens_seen": 2784624640 }, { "epoch": 0.69, "learning_rate": 0.00015770681216400545, "loss": 0.0662, "theoretical_loss": 3.368437743704447, "tokens_seen": 2784755712 }, { "epoch": 0.69, "learning_rate": 0.000157666693412501, "loss": 0.0681, "theoretical_loss": 3.368425473491893, "tokens_seen": 2784886784 }, { "epoch": 0.69, "learning_rate": 0.00015762657466099656, "loss": 0.066, "theoretical_loss": 3.3684132040185206, "tokens_seen": 2785017856 }, { "epoch": 0.69, "learning_rate": 0.0001575864559094921, "loss": 0.064, "theoretical_loss": 3.368400935284251, "tokens_seen": 2785148928 }, { "epoch": 0.69, "learning_rate": 0.00015754633715798764, "loss": 0.0617, "theoretical_loss": 3.3683886672890053, "tokens_seen": 2785280000 }, { "epoch": 0.69, "learning_rate": 0.00015750621840648319, "loss": 0.0623, "theoretical_loss": 3.3683764000327034, "tokens_seen": 2785411072 }, { "epoch": 0.69, "learning_rate": 0.00015746609965497873, "loss": 0.0657, "theoretical_loss": 3.368364133515266, "tokens_seen": 2785542144 }, { "epoch": 0.69, "learning_rate": 0.0001574259809034743, "loss": 0.0647, "theoretical_loss": 3.368351867736614, "tokens_seen": 2785673216 }, { "epoch": 0.69, "learning_rate": 0.00015738586215196984, "loss": 0.0671, "theoretical_loss": 3.3683396026966688, "tokens_seen": 2785804288 }, { "epoch": 0.69, "learning_rate": 0.00015734574340046538, "loss": 0.0708, "theoretical_loss": 3.3683273383953503, "tokens_seen": 2785935360 }, { "epoch": 0.69, "learning_rate": 0.00015730562464896092, "loss": 0.0673, "theoretical_loss": 3.368315074832579, "tokens_seen": 2786066432 }, { "epoch": 0.69, "learning_rate": 0.00015726550589745646, "loss": 0.0666, "theoretical_loss": 3.368302812008277, "tokens_seen": 2786197504 }, { "epoch": 0.69, "learning_rate": 0.00015722538714595203, "loss": 0.0665, "theoretical_loss": 3.3682905499223645, "tokens_seen": 2786328576 }, { "epoch": 0.69, "learning_rate": 0.00015718526839444757, "loss": 0.0649, "theoretical_loss": 3.3682782885747615, "tokens_seen": 2786459648 }, { "epoch": 0.69, "learning_rate": 0.0001571451496429431, "loss": 0.0663, "theoretical_loss": 3.36826602796539, "tokens_seen": 2786590720 }, { "epoch": 0.69, "learning_rate": 0.00015710503089143865, "loss": 0.0658, "theoretical_loss": 3.36825376809417, "tokens_seen": 2786721792 }, { "epoch": 0.69, "learning_rate": 0.0001570649121399342, "loss": 0.0679, "theoretical_loss": 3.368241508961023, "tokens_seen": 2786852864 }, { "epoch": 0.69, "objective/train/advantage_avg": 0.0005582149024121463, "objective/train/docs_used": 1012303, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2183548212051392, "objective/train/original_loss": 1.2183548212051392, "objective/train/theoretical_loss": 3.3682292505658693, "objective/train/tokens_used": 1157508576, "objective/train/value_avg": -0.00937652587890625, "objective/train/value_loss": 0.00028731347993016243, "objective/train/value_max": -2.568960189819336e-05, "objective/train/value_min": -0.5458984375, "objective/train/value_reward_corr": 0.7509548163451076, "objective/train/value_std": 0.0192108154296875, "objective/train/weight_avg": 1.0006815195083618, "objective/train/weighted_lm_loss": 1.2184762954711914, "objective/train/weights_max": 1.6676503419876099, "objective/train/weights_min": 0.2449558675289154, "theoretical_loss": 3.3682292505658693, "tokens_seen": 2786983936 }, { "epoch": 0.69, "learning_rate": 0.00015702479338842976, "loss": 0.0651, "theoretical_loss": 3.3682292505658693, "tokens_seen": 2786983936 }, { "epoch": 0.69, "learning_rate": 0.0001569846746369253, "loss": 0.0655, "theoretical_loss": 3.36821699290863, "tokens_seen": 2787115008 }, { "epoch": 0.69, "learning_rate": 0.00015694455588542084, "loss": 0.0621, "theoretical_loss": 3.368204735989226, "tokens_seen": 2787246080 }, { "epoch": 0.69, "learning_rate": 0.00015690443713391639, "loss": 0.0633, "theoretical_loss": 3.368192479807578, "tokens_seen": 2787377152 }, { "epoch": 0.69, "learning_rate": 0.00015686431838241195, "loss": 0.0654, "theoretical_loss": 3.368180224363607, "tokens_seen": 2787508224 }, { "epoch": 0.69, "learning_rate": 0.0001568241996309075, "loss": 0.0634, "theoretical_loss": 3.368167969657234, "tokens_seen": 2787639296 }, { "epoch": 0.69, "learning_rate": 0.00015678408087940304, "loss": 0.0631, "theoretical_loss": 3.3681557156883803, "tokens_seen": 2787770368 }, { "epoch": 0.69, "learning_rate": 0.00015674396212789858, "loss": 0.063, "theoretical_loss": 3.368143462456966, "tokens_seen": 2787901440 }, { "epoch": 0.69, "learning_rate": 0.00015670384337639412, "loss": 0.0669, "theoretical_loss": 3.368131209962913, "tokens_seen": 2788032512 }, { "epoch": 0.69, "learning_rate": 0.0001566637246248897, "loss": 0.0654, "theoretical_loss": 3.3681189582061415, "tokens_seen": 2788163584 }, { "epoch": 0.69, "learning_rate": 0.00015662360587338523, "loss": 0.0674, "theoretical_loss": 3.3681067071865725, "tokens_seen": 2788294656 }, { "epoch": 0.69, "learning_rate": 0.00015658348712188077, "loss": 0.0639, "theoretical_loss": 3.3680944569041276, "tokens_seen": 2788425728 }, { "epoch": 0.69, "learning_rate": 0.0001565433683703763, "loss": 0.0664, "theoretical_loss": 3.3680822073587273, "tokens_seen": 2788556800 }, { "epoch": 0.69, "learning_rate": 0.00015650324961887185, "loss": 0.063, "theoretical_loss": 3.3680699585502927, "tokens_seen": 2788687872 }, { "epoch": 0.69, "learning_rate": 0.00015646313086736742, "loss": 0.0654, "theoretical_loss": 3.368057710478745, "tokens_seen": 2788818944 }, { "epoch": 0.69, "learning_rate": 0.00015642301211586296, "loss": 0.0632, "theoretical_loss": 3.3680454631440053, "tokens_seen": 2788950016 }, { "epoch": 0.69, "learning_rate": 0.0001563828933643585, "loss": 0.0658, "theoretical_loss": 3.368033216545994, "tokens_seen": 2789081088 }, { "epoch": 0.69, "learning_rate": 0.00015634277461285405, "loss": 0.0673, "theoretical_loss": 3.368020970684633, "tokens_seen": 2789212160 }, { "epoch": 0.69, "learning_rate": 0.0001563026558613496, "loss": 0.0685, "theoretical_loss": 3.368008725559843, "tokens_seen": 2789343232 }, { "epoch": 0.69, "learning_rate": 0.00015626253710984516, "loss": 0.0671, "theoretical_loss": 3.367996481171545, "tokens_seen": 2789474304 }, { "epoch": 0.69, "learning_rate": 0.0001562224183583407, "loss": 0.064, "theoretical_loss": 3.36798423751966, "tokens_seen": 2789605376 }, { "epoch": 0.69, "learning_rate": 0.00015618229960683624, "loss": 0.0643, "theoretical_loss": 3.3679719946041096, "tokens_seen": 2789736448 }, { "epoch": 0.69, "learning_rate": 0.00015614218085533178, "loss": 0.0621, "theoretical_loss": 3.3679597524248144, "tokens_seen": 2789867520 }, { "epoch": 0.69, "learning_rate": 0.00015610206210382732, "loss": 0.0633, "theoretical_loss": 3.367947510981696, "tokens_seen": 2789998592 }, { "epoch": 0.69, "learning_rate": 0.0001560619433523229, "loss": 0.0724, "theoretical_loss": 3.367935270274675, "tokens_seen": 2790129664 }, { "epoch": 0.69, "objective/train/advantage_avg": -0.00021696147450711578, "objective/train/docs_used": 1013591, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3348642587661743, "objective/train/original_loss": 1.3348641395568848, "objective/train/theoretical_loss": 3.3679230303036736, "objective/train/tokens_used": 1160785376, "objective/train/value_avg": -0.006633758544921875, "objective/train/value_loss": 0.0002043350541498512, "objective/train/value_max": -4.13060188293457e-05, "objective/train/value_min": -0.223388671875, "objective/train/value_reward_corr": 0.621393300536431, "objective/train/value_std": 0.01126861572265625, "objective/train/weight_avg": 0.9998730421066284, "objective/train/weighted_lm_loss": 1.3348489999771118, "objective/train/weights_max": 1.1245360374450684, "objective/train/weights_min": 0.374957799911499, "theoretical_loss": 3.3679230303036736, "tokens_seen": 2790260736 }, { "epoch": 0.69, "learning_rate": 0.00015602182460081843, "loss": 0.0683, "theoretical_loss": 3.3679230303036736, "tokens_seen": 2790260736 }, { "epoch": 0.69, "learning_rate": 0.00015598170584931397, "loss": 0.0715, "theoretical_loss": 3.3679107910686117, "tokens_seen": 2790391808 }, { "epoch": 0.69, "learning_rate": 0.0001559415870978095, "loss": 0.0633, "theoretical_loss": 3.367898552569411, "tokens_seen": 2790522880 }, { "epoch": 0.69, "learning_rate": 0.00015590146834630505, "loss": 0.0683, "theoretical_loss": 3.3678863148059928, "tokens_seen": 2790653952 }, { "epoch": 0.69, "learning_rate": 0.00015586134959480062, "loss": 0.0667, "theoretical_loss": 3.3678740777782785, "tokens_seen": 2790785024 }, { "epoch": 0.69, "learning_rate": 0.00015582123084329616, "loss": 0.07, "theoretical_loss": 3.3678618414861887, "tokens_seen": 2790916096 }, { "epoch": 0.69, "learning_rate": 0.0001557811120917917, "loss": 0.0657, "theoretical_loss": 3.367849605929645, "tokens_seen": 2791047168 }, { "epoch": 0.69, "learning_rate": 0.00015574099334028725, "loss": 0.0644, "theoretical_loss": 3.367837371108569, "tokens_seen": 2791178240 }, { "epoch": 0.69, "learning_rate": 0.0001557008745887828, "loss": 0.0667, "theoretical_loss": 3.3678251370228813, "tokens_seen": 2791309312 }, { "epoch": 0.69, "learning_rate": 0.00015566075583727836, "loss": 0.0672, "theoretical_loss": 3.3678129036725037, "tokens_seen": 2791440384 }, { "epoch": 0.69, "learning_rate": 0.0001556206370857739, "loss": 0.0648, "theoretical_loss": 3.367800671057357, "tokens_seen": 2791571456 }, { "epoch": 0.69, "learning_rate": 0.00015558051833426944, "loss": 0.0684, "theoretical_loss": 3.367788439177363, "tokens_seen": 2791702528 }, { "epoch": 0.69, "learning_rate": 0.00015554039958276498, "loss": 0.0658, "theoretical_loss": 3.3677762080324425, "tokens_seen": 2791833600 }, { "epoch": 0.69, "learning_rate": 0.00015550028083126052, "loss": 0.0677, "theoretical_loss": 3.3677639776225172, "tokens_seen": 2791964672 }, { "epoch": 0.69, "learning_rate": 0.0001554601620797561, "loss": 0.0653, "theoretical_loss": 3.3677517479475085, "tokens_seen": 2792095744 }, { "epoch": 0.69, "learning_rate": 0.00015542004332825163, "loss": 0.0664, "theoretical_loss": 3.3677395190073374, "tokens_seen": 2792226816 }, { "epoch": 0.69, "learning_rate": 0.00015537992457674717, "loss": 0.0637, "theoretical_loss": 3.367727290801925, "tokens_seen": 2792357888 }, { "epoch": 0.69, "learning_rate": 0.0001553398058252427, "loss": 0.063, "theoretical_loss": 3.367715063331194, "tokens_seen": 2792488960 }, { "epoch": 0.69, "learning_rate": 0.00015529968707373825, "loss": 0.0636, "theoretical_loss": 3.367702836595064, "tokens_seen": 2792620032 }, { "epoch": 0.69, "learning_rate": 0.00015525956832223382, "loss": 0.0666, "theoretical_loss": 3.3676906105934576, "tokens_seen": 2792751104 }, { "epoch": 0.69, "learning_rate": 0.00015521944957072936, "loss": 0.0651, "theoretical_loss": 3.367678385326296, "tokens_seen": 2792882176 }, { "epoch": 0.69, "learning_rate": 0.0001551793308192249, "loss": 0.0657, "theoretical_loss": 3.3676661607935, "tokens_seen": 2793013248 }, { "epoch": 0.69, "learning_rate": 0.00015513921206772045, "loss": 0.0631, "theoretical_loss": 3.3676539369949916, "tokens_seen": 2793144320 }, { "epoch": 0.69, "learning_rate": 0.000155099093316216, "loss": 0.0673, "theoretical_loss": 3.3676417139306922, "tokens_seen": 2793275392 }, { "epoch": 0.69, "learning_rate": 0.00015505897456471156, "loss": 0.0637, "theoretical_loss": 3.367629491600524, "tokens_seen": 2793406464 }, { "epoch": 0.69, "objective/train/advantage_avg": 0.0005050148465670645, "objective/train/docs_used": 1014727, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.1839812994003296, "objective/train/original_loss": 1.1839812994003296, "objective/train/theoretical_loss": 3.3676172700044065, "objective/train/tokens_used": 1164062176, "objective/train/value_avg": -0.00690460205078125, "objective/train/value_loss": 0.00015838661056477576, "objective/train/value_max": -3.11732292175293e-05, "objective/train/value_min": -0.56298828125, "objective/train/value_reward_corr": 0.7408055298606501, "objective/train/value_std": 0.0140228271484375, "objective/train/weight_avg": 1.000579833984375, "objective/train/weighted_lm_loss": 1.1843218803405762, "objective/train/weights_max": 1.6992744207382202, "objective/train/weights_min": 0.3734019696712494, "theoretical_loss": 3.3676172700044065, "tokens_seen": 2793537536 }, { "epoch": 0.69, "learning_rate": 0.0001550188558132071, "loss": 0.0644, "theoretical_loss": 3.3676172700044065, "tokens_seen": 2793537536 }, { "epoch": 0.69, "learning_rate": 0.00015497873706170264, "loss": 0.0673, "theoretical_loss": 3.3676050491422624, "tokens_seen": 2793668608 }, { "epoch": 0.69, "learning_rate": 0.00015493861831019818, "loss": 0.0626, "theoretical_loss": 3.3675928290140136, "tokens_seen": 2793799680 }, { "epoch": 0.69, "learning_rate": 0.00015489849955869372, "loss": 0.0671, "theoretical_loss": 3.367580609619581, "tokens_seen": 2793930752 }, { "epoch": 0.69, "learning_rate": 0.0001548583808071893, "loss": 0.0654, "theoretical_loss": 3.367568390958886, "tokens_seen": 2794061824 }, { "epoch": 0.69, "learning_rate": 0.00015481826205568483, "loss": 0.0657, "theoretical_loss": 3.3675561730318506, "tokens_seen": 2794192896 }, { "epoch": 0.69, "learning_rate": 0.00015477814330418037, "loss": 0.065, "theoretical_loss": 3.367543955838396, "tokens_seen": 2794323968 }, { "epoch": 0.69, "learning_rate": 0.00015473802455267594, "loss": 0.065, "theoretical_loss": 3.3675317393784434, "tokens_seen": 2794455040 }, { "epoch": 0.69, "learning_rate": 0.00015469790580117145, "loss": 0.0624, "theoretical_loss": 3.3675195236519153, "tokens_seen": 2794586112 }, { "epoch": 0.69, "learning_rate": 0.00015465778704966702, "loss": 0.0647, "theoretical_loss": 3.3675073086587326, "tokens_seen": 2794717184 }, { "epoch": 0.69, "learning_rate": 0.00015461766829816256, "loss": 0.0637, "theoretical_loss": 3.367495094398817, "tokens_seen": 2794848256 }, { "epoch": 0.69, "learning_rate": 0.00015457754954665813, "loss": 0.0671, "theoretical_loss": 3.3674828808720902, "tokens_seen": 2794979328 }, { "epoch": 0.69, "learning_rate": 0.00015453743079515367, "loss": 0.067, "theoretical_loss": 3.367470668078474, "tokens_seen": 2795110400 }, { "epoch": 0.69, "learning_rate": 0.0001544973120436492, "loss": 0.063, "theoretical_loss": 3.3674584560178893, "tokens_seen": 2795241472 }, { "epoch": 0.69, "learning_rate": 0.00015445719329214476, "loss": 0.0648, "theoretical_loss": 3.3674462446902584, "tokens_seen": 2795372544 }, { "epoch": 0.69, "learning_rate": 0.0001544170745406403, "loss": 0.0694, "theoretical_loss": 3.3674340340955027, "tokens_seen": 2795503616 }, { "epoch": 0.69, "learning_rate": 0.00015437695578913587, "loss": 0.067, "theoretical_loss": 3.367421824233544, "tokens_seen": 2795634688 }, { "epoch": 0.69, "learning_rate": 0.0001543368370376314, "loss": 0.065, "theoretical_loss": 3.367409615104304, "tokens_seen": 2795765760 }, { "epoch": 0.69, "learning_rate": 0.00015429671828612692, "loss": 0.0672, "theoretical_loss": 3.367397406707704, "tokens_seen": 2795896832 }, { "epoch": 0.69, "learning_rate": 0.0001542565995346225, "loss": 0.0633, "theoretical_loss": 3.3673851990436656, "tokens_seen": 2796027904 }, { "epoch": 0.69, "learning_rate": 0.00015421648078311803, "loss": 0.0644, "theoretical_loss": 3.367372992112111, "tokens_seen": 2796158976 }, { "epoch": 0.69, "learning_rate": 0.0001541763620316136, "loss": 0.0667, "theoretical_loss": 3.3673607859129624, "tokens_seen": 2796290048 }, { "epoch": 0.69, "learning_rate": 0.00015413624328010914, "loss": 0.0674, "theoretical_loss": 3.3673485804461407, "tokens_seen": 2796421120 }, { "epoch": 0.69, "learning_rate": 0.00015409612452860466, "loss": 0.0668, "theoretical_loss": 3.3673363757115675, "tokens_seen": 2796552192 }, { "epoch": 0.69, "learning_rate": 0.00015405600577710022, "loss": 0.0633, "theoretical_loss": 3.367324171709165, "tokens_seen": 2796683264 }, { "epoch": 0.69, "objective/train/advantage_avg": 0.0006816575187258422, "objective/train/docs_used": 1015946, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3809250593185425, "objective/train/original_loss": 1.380924940109253, "objective/train/theoretical_loss": 3.367311968438855, "objective/train/tokens_used": 1167338976, "objective/train/value_avg": -0.0098876953125, "objective/train/value_loss": 0.000385892519261688, "objective/train/value_max": -3.9458274841308594e-05, "objective/train/value_min": -0.64013671875, "objective/train/value_reward_corr": 0.6652364218554537, "objective/train/value_std": 0.01849365234375, "objective/train/weight_avg": 1.0008504390716553, "objective/train/weighted_lm_loss": 1.3817356824874878, "objective/train/weights_max": 1.41559898853302, "objective/train/weights_min": 0.3683558702468872, "theoretical_loss": 3.367311968438855, "tokens_seen": 2796814336 }, { "epoch": 0.7, "learning_rate": 0.00015401588702559577, "loss": 0.0678, "theoretical_loss": 3.367311968438855, "tokens_seen": 2796814336 }, { "epoch": 0.7, "learning_rate": 0.00015397576827409133, "loss": 0.0679, "theoretical_loss": 3.367299765900559, "tokens_seen": 2796945408 }, { "epoch": 0.7, "learning_rate": 0.00015393564952258687, "loss": 0.0635, "theoretical_loss": 3.367287564094199, "tokens_seen": 2797076480 }, { "epoch": 0.7, "learning_rate": 0.0001538955307710824, "loss": 0.0606, "theoretical_loss": 3.3672753630196963, "tokens_seen": 2797207552 }, { "epoch": 0.7, "learning_rate": 0.00015385541201957796, "loss": 0.068, "theoretical_loss": 3.3672631626769736, "tokens_seen": 2797338624 }, { "epoch": 0.7, "learning_rate": 0.0001538152932680735, "loss": 0.0648, "theoretical_loss": 3.367250963065952, "tokens_seen": 2797469696 }, { "epoch": 0.7, "learning_rate": 0.00015377517451656907, "loss": 0.0653, "theoretical_loss": 3.367238764186554, "tokens_seen": 2797600768 }, { "epoch": 0.7, "learning_rate": 0.0001537350557650646, "loss": 0.0636, "theoretical_loss": 3.367226566038701, "tokens_seen": 2797731840 }, { "epoch": 0.7, "learning_rate": 0.00015369493701356012, "loss": 0.0701, "theoretical_loss": 3.3672143686223146, "tokens_seen": 2797862912 }, { "epoch": 0.7, "learning_rate": 0.0001536548182620557, "loss": 0.0656, "theoretical_loss": 3.367202171937317, "tokens_seen": 2797993984 }, { "epoch": 0.7, "learning_rate": 0.00015361469951055123, "loss": 0.0635, "theoretical_loss": 3.36718997598363, "tokens_seen": 2798125056 }, { "epoch": 0.7, "learning_rate": 0.0001535745807590468, "loss": 0.0643, "theoretical_loss": 3.3671777807611756, "tokens_seen": 2798256128 }, { "epoch": 0.7, "learning_rate": 0.00015353446200754234, "loss": 0.0683, "theoretical_loss": 3.367165586269876, "tokens_seen": 2798387200 }, { "epoch": 0.7, "learning_rate": 0.00015349434325603786, "loss": 0.067, "theoretical_loss": 3.3671533925096524, "tokens_seen": 2798518272 }, { "epoch": 0.7, "learning_rate": 0.00015345422450453342, "loss": 0.0644, "theoretical_loss": 3.3671411994804274, "tokens_seen": 2798649344 }, { "epoch": 0.7, "learning_rate": 0.00015341410575302897, "loss": 0.0647, "theoretical_loss": 3.3671290071821227, "tokens_seen": 2798780416 }, { "epoch": 0.7, "learning_rate": 0.00015337398700152453, "loss": 0.0632, "theoretical_loss": 3.36711681561466, "tokens_seen": 2798911488 }, { "epoch": 0.7, "learning_rate": 0.00015333386825002008, "loss": 0.0686, "theoretical_loss": 3.3671046247779617, "tokens_seen": 2799042560 }, { "epoch": 0.7, "learning_rate": 0.0001532937494985156, "loss": 0.064, "theoretical_loss": 3.3670924346719495, "tokens_seen": 2799173632 }, { "epoch": 0.7, "learning_rate": 0.00015325363074701116, "loss": 0.0613, "theoretical_loss": 3.3670802452965454, "tokens_seen": 2799304704 }, { "epoch": 0.7, "learning_rate": 0.0001532135119955067, "loss": 0.0647, "theoretical_loss": 3.3670680566516715, "tokens_seen": 2799435776 }, { "epoch": 0.7, "learning_rate": 0.00015317339324400227, "loss": 0.0648, "theoretical_loss": 3.36705586873725, "tokens_seen": 2799566848 }, { "epoch": 0.7, "learning_rate": 0.0001531332744924978, "loss": 0.0701, "theoretical_loss": 3.367043681553202, "tokens_seen": 2799697920 }, { "epoch": 0.7, "learning_rate": 0.00015309315574099332, "loss": 0.0638, "theoretical_loss": 3.367031495099451, "tokens_seen": 2799828992 }, { "epoch": 0.7, "learning_rate": 0.0001530530369894889, "loss": 0.0651, "theoretical_loss": 3.367019309375918, "tokens_seen": 2799960064 }, { "epoch": 0.7, "objective/train/advantage_avg": -3.3102835004683584e-05, "objective/train/docs_used": 1017141, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.1848413944244385, "objective/train/original_loss": 1.1848413944244385, "objective/train/theoretical_loss": 3.3670071243825257, "objective/train/tokens_used": 1170615776, "objective/train/value_avg": -0.007030487060546875, "objective/train/value_loss": 0.00010426461813040078, "objective/train/value_max": -5.519390106201172e-05, "objective/train/value_min": -0.345458984375, "objective/train/value_reward_corr": 0.8010847227836342, "objective/train/value_std": 0.01309967041015625, "objective/train/weight_avg": 1.0000180006027222, "objective/train/weighted_lm_loss": 1.1848127841949463, "objective/train/weights_max": 1.3105658292770386, "objective/train/weights_min": 0.7167502045631409, "theoretical_loss": 3.3670071243825257, "tokens_seen": 2800091136 }, { "epoch": 0.7, "learning_rate": 0.00015301291823798443, "loss": 0.0643, "theoretical_loss": 3.3670071243825257, "tokens_seen": 2800091136 }, { "epoch": 0.7, "learning_rate": 0.00015297279948648, "loss": 0.0668, "theoretical_loss": 3.3669949401191954, "tokens_seen": 2800222208 }, { "epoch": 0.7, "learning_rate": 0.00015293268073497554, "loss": 0.0663, "theoretical_loss": 3.36698275658585, "tokens_seen": 2800353280 }, { "epoch": 0.7, "learning_rate": 0.00015289256198347106, "loss": 0.0647, "theoretical_loss": 3.366970573782411, "tokens_seen": 2800484352 }, { "epoch": 0.7, "learning_rate": 0.00015285244323196662, "loss": 0.0689, "theoretical_loss": 3.3669583917088013, "tokens_seen": 2800615424 }, { "epoch": 0.7, "learning_rate": 0.00015281232448046217, "loss": 0.0662, "theoretical_loss": 3.366946210364942, "tokens_seen": 2800746496 }, { "epoch": 0.7, "learning_rate": 0.00015277220572895773, "loss": 0.0658, "theoretical_loss": 3.3669340297507557, "tokens_seen": 2800877568 }, { "epoch": 0.7, "learning_rate": 0.00015273208697745328, "loss": 0.0645, "theoretical_loss": 3.3669218498661646, "tokens_seen": 2801008640 }, { "epoch": 0.7, "learning_rate": 0.0001526919682259488, "loss": 0.065, "theoretical_loss": 3.3669096707110913, "tokens_seen": 2801139712 }, { "epoch": 0.7, "learning_rate": 0.00015265184947444436, "loss": 0.0644, "theoretical_loss": 3.3668974922854567, "tokens_seen": 2801270784 }, { "epoch": 0.7, "learning_rate": 0.0001526117307229399, "loss": 0.0673, "theoretical_loss": 3.3668853145891844, "tokens_seen": 2801401856 }, { "epoch": 0.7, "learning_rate": 0.00015257161197143547, "loss": 0.0642, "theoretical_loss": 3.366873137622196, "tokens_seen": 2801532928 }, { "epoch": 0.7, "learning_rate": 0.000152531493219931, "loss": 0.0643, "theoretical_loss": 3.3668609613844134, "tokens_seen": 2801664000 }, { "epoch": 0.7, "learning_rate": 0.00015249137446842652, "loss": 0.0672, "theoretical_loss": 3.366848785875759, "tokens_seen": 2801795072 }, { "epoch": 0.7, "learning_rate": 0.0001524512557169221, "loss": 0.0646, "theoretical_loss": 3.3668366110961556, "tokens_seen": 2801926144 }, { "epoch": 0.7, "learning_rate": 0.00015241113696541763, "loss": 0.0674, "theoretical_loss": 3.3668244370455245, "tokens_seen": 2802057216 }, { "epoch": 0.7, "learning_rate": 0.0001523710182139132, "loss": 0.0658, "theoretical_loss": 3.366812263723789, "tokens_seen": 2802188288 }, { "epoch": 0.7, "learning_rate": 0.00015233089946240874, "loss": 0.0679, "theoretical_loss": 3.3668000911308704, "tokens_seen": 2802319360 }, { "epoch": 0.7, "learning_rate": 0.00015229078071090426, "loss": 0.066, "theoretical_loss": 3.3667879192666916, "tokens_seen": 2802450432 }, { "epoch": 0.7, "learning_rate": 0.00015225066195939983, "loss": 0.0637, "theoretical_loss": 3.3667757481311744, "tokens_seen": 2802581504 }, { "epoch": 0.7, "learning_rate": 0.00015221054320789537, "loss": 0.0652, "theoretical_loss": 3.3667635777242415, "tokens_seen": 2802712576 }, { "epoch": 0.7, "learning_rate": 0.00015217042445639094, "loss": 0.0642, "theoretical_loss": 3.3667514080458156, "tokens_seen": 2802843648 }, { "epoch": 0.7, "learning_rate": 0.00015213030570488648, "loss": 0.0627, "theoretical_loss": 3.3667392390958177, "tokens_seen": 2802974720 }, { "epoch": 0.7, "learning_rate": 0.000152090186953382, "loss": 0.0628, "theoretical_loss": 3.3667270708741714, "tokens_seen": 2803105792 }, { "epoch": 0.7, "learning_rate": 0.00015205006820187756, "loss": 0.0682, "theoretical_loss": 3.3667149033807986, "tokens_seen": 2803236864 }, { "epoch": 0.7, "objective/train/advantage_avg": 0.0008467425359413028, "objective/train/docs_used": 1018418, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4795119762420654, "objective/train/original_loss": 1.4795119762420654, "objective/train/theoretical_loss": 3.366702736615622, "objective/train/tokens_used": 1173892576, "objective/train/value_avg": -0.006237030029296875, "objective/train/value_loss": 0.00010896077583311126, "objective/train/value_max": -1.7940998077392578e-05, "objective/train/value_min": -0.22900390625, "objective/train/value_reward_corr": 0.7640097233826086, "objective/train/value_std": 0.01177978515625, "objective/train/weight_avg": 1.000900149345398, "objective/train/weighted_lm_loss": 1.4812376499176025, "objective/train/weights_max": 1.1334158182144165, "objective/train/weights_min": 0.7607443928718567, "theoretical_loss": 3.366702736615622, "tokens_seen": 2803367936 }, { "epoch": 0.7, "learning_rate": 0.0001520099494503731, "loss": 0.0667, "theoretical_loss": 3.366702736615622, "tokens_seen": 2803367936 }, { "epoch": 0.7, "learning_rate": 0.00015196983069886867, "loss": 0.0638, "theoretical_loss": 3.3666905705785632, "tokens_seen": 2803499008 }, { "epoch": 0.7, "learning_rate": 0.0001519297119473642, "loss": 0.0634, "theoretical_loss": 3.3666784052695453, "tokens_seen": 2803630080 }, { "epoch": 0.7, "learning_rate": 0.00015188959319585975, "loss": 0.0638, "theoretical_loss": 3.3666662406884904, "tokens_seen": 2803761152 }, { "epoch": 0.7, "learning_rate": 0.0001518494744443553, "loss": 0.0642, "theoretical_loss": 3.366654076835321, "tokens_seen": 2803892224 }, { "epoch": 0.7, "learning_rate": 0.00015180935569285083, "loss": 0.0651, "theoretical_loss": 3.3666419137099592, "tokens_seen": 2804023296 }, { "epoch": 0.7, "learning_rate": 0.0001517692369413464, "loss": 0.0653, "theoretical_loss": 3.366629751312328, "tokens_seen": 2804154368 }, { "epoch": 0.7, "learning_rate": 0.00015172911818984194, "loss": 0.0645, "theoretical_loss": 3.36661758964235, "tokens_seen": 2804285440 }, { "epoch": 0.7, "learning_rate": 0.00015168899943833748, "loss": 0.0676, "theoretical_loss": 3.3666054286999465, "tokens_seen": 2804416512 }, { "epoch": 0.7, "learning_rate": 0.00015164888068683303, "loss": 0.0691, "theoretical_loss": 3.366593268485041, "tokens_seen": 2804547584 }, { "epoch": 0.7, "learning_rate": 0.00015160876193532857, "loss": 0.0684, "theoretical_loss": 3.3665811089975555, "tokens_seen": 2804678656 }, { "epoch": 0.7, "learning_rate": 0.00015156864318382414, "loss": 0.0627, "theoretical_loss": 3.3665689502374128, "tokens_seen": 2804809728 }, { "epoch": 0.7, "learning_rate": 0.00015152852443231968, "loss": 0.0647, "theoretical_loss": 3.366556792204535, "tokens_seen": 2804940800 }, { "epoch": 0.7, "learning_rate": 0.00015148840568081522, "loss": 0.0662, "theoretical_loss": 3.3665446348988453, "tokens_seen": 2805071872 }, { "epoch": 0.7, "learning_rate": 0.00015144828692931076, "loss": 0.0655, "theoretical_loss": 3.3665324783202655, "tokens_seen": 2805202944 }, { "epoch": 0.7, "learning_rate": 0.0001514081681778063, "loss": 0.0625, "theoretical_loss": 3.366520322468719, "tokens_seen": 2805334016 }, { "epoch": 0.7, "learning_rate": 0.00015136804942630187, "loss": 0.0663, "theoretical_loss": 3.3665081673441275, "tokens_seen": 2805465088 }, { "epoch": 0.7, "learning_rate": 0.0001513279306747974, "loss": 0.0671, "theoretical_loss": 3.3664960129464134, "tokens_seen": 2805596160 }, { "epoch": 0.7, "learning_rate": 0.00015128781192329295, "loss": 0.0639, "theoretical_loss": 3.3664838592755006, "tokens_seen": 2805727232 }, { "epoch": 0.7, "learning_rate": 0.0001512476931717885, "loss": 0.0613, "theoretical_loss": 3.3664717063313105, "tokens_seen": 2805858304 }, { "epoch": 0.7, "learning_rate": 0.00015120757442028403, "loss": 0.0688, "theoretical_loss": 3.366459554113766, "tokens_seen": 2805989376 }, { "epoch": 0.7, "learning_rate": 0.0001511674556687796, "loss": 0.0659, "theoretical_loss": 3.3664474026227897, "tokens_seen": 2806120448 }, { "epoch": 0.7, "learning_rate": 0.00015112733691727514, "loss": 0.0619, "theoretical_loss": 3.3664352518583045, "tokens_seen": 2806251520 }, { "epoch": 0.7, "learning_rate": 0.00015108721816577069, "loss": 0.066, "theoretical_loss": 3.3664231018202324, "tokens_seen": 2806382592 }, { "epoch": 0.7, "learning_rate": 0.00015104709941426623, "loss": 0.0665, "theoretical_loss": 3.366410952508497, "tokens_seen": 2806513664 }, { "epoch": 0.7, "objective/train/advantage_avg": 0.00015437317779287696, "objective/train/docs_used": 1019695, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2268017530441284, "objective/train/original_loss": 1.226801872253418, "objective/train/theoretical_loss": 3.36639880392302, "objective/train/tokens_used": 1177169376, "objective/train/value_avg": -0.004962921142578125, "objective/train/value_loss": 0.00016624602722004056, "objective/train/value_max": -3.451108932495117e-05, "objective/train/value_min": -0.251953125, "objective/train/value_reward_corr": 0.5856851033378696, "objective/train/value_std": 0.008819580078125, "objective/train/weight_avg": 1.000227451324463, "objective/train/weighted_lm_loss": 1.2270727157592773, "objective/train/weights_max": 1.088475227355957, "objective/train/weights_min": 0.37148112058639526, "theoretical_loss": 3.36639880392302, "tokens_seen": 2806644736 }, { "epoch": 0.7, "learning_rate": 0.00015100698066276177, "loss": 0.0657, "theoretical_loss": 3.36639880392302, "tokens_seen": 2806644736 }, { "epoch": 0.7, "learning_rate": 0.00015096686191125734, "loss": 0.0693, "theoretical_loss": 3.366386656063725, "tokens_seen": 2806775808 }, { "epoch": 0.7, "learning_rate": 0.00015092674315975288, "loss": 0.0681, "theoretical_loss": 3.3663745089305337, "tokens_seen": 2806906880 }, { "epoch": 0.7, "learning_rate": 0.00015088662440824842, "loss": 0.0679, "theoretical_loss": 3.3663623625233696, "tokens_seen": 2807037952 }, { "epoch": 0.7, "learning_rate": 0.00015084650565674396, "loss": 0.0601, "theoretical_loss": 3.366350216842155, "tokens_seen": 2807169024 }, { "epoch": 0.7, "learning_rate": 0.0001508063869052395, "loss": 0.0672, "theoretical_loss": 3.3663380718868128, "tokens_seen": 2807300096 }, { "epoch": 0.7, "learning_rate": 0.00015076626815373507, "loss": 0.0646, "theoretical_loss": 3.3663259276572655, "tokens_seen": 2807431168 }, { "epoch": 0.7, "learning_rate": 0.0001507261494022306, "loss": 0.0665, "theoretical_loss": 3.3663137841534363, "tokens_seen": 2807562240 }, { "epoch": 0.7, "learning_rate": 0.00015068603065072615, "loss": 0.0665, "theoretical_loss": 3.366301641375247, "tokens_seen": 2807693312 }, { "epoch": 0.7, "learning_rate": 0.0001506459118992217, "loss": 0.0645, "theoretical_loss": 3.3662894993226216, "tokens_seen": 2807824384 }, { "epoch": 0.7, "learning_rate": 0.00015060579314771723, "loss": 0.0687, "theoretical_loss": 3.3662773579954823, "tokens_seen": 2807955456 }, { "epoch": 0.7, "learning_rate": 0.0001505656743962128, "loss": 0.0692, "theoretical_loss": 3.3662652173937517, "tokens_seen": 2808086528 }, { "epoch": 0.7, "learning_rate": 0.00015052555564470834, "loss": 0.0666, "theoretical_loss": 3.366253077517353, "tokens_seen": 2808217600 }, { "epoch": 0.7, "learning_rate": 0.00015048543689320389, "loss": 0.0654, "theoretical_loss": 3.366240938366208, "tokens_seen": 2808348672 }, { "epoch": 0.7, "learning_rate": 0.00015044531814169943, "loss": 0.0626, "theoretical_loss": 3.366228799940241, "tokens_seen": 2808479744 }, { "epoch": 0.7, "learning_rate": 0.00015040519939019497, "loss": 0.0658, "theoretical_loss": 3.366216662239374, "tokens_seen": 2808610816 }, { "epoch": 0.7, "learning_rate": 0.00015036508063869054, "loss": 0.0652, "theoretical_loss": 3.36620452526353, "tokens_seen": 2808741888 }, { "epoch": 0.7, "learning_rate": 0.00015032496188718608, "loss": 0.0618, "theoretical_loss": 3.3661923890126326, "tokens_seen": 2808872960 }, { "epoch": 0.7, "learning_rate": 0.00015028484313568162, "loss": 0.0648, "theoretical_loss": 3.366180253486603, "tokens_seen": 2809004032 }, { "epoch": 0.7, "learning_rate": 0.00015024472438417716, "loss": 0.0646, "theoretical_loss": 3.366168118685365, "tokens_seen": 2809135104 }, { "epoch": 0.7, "learning_rate": 0.0001502046056326727, "loss": 0.0645, "theoretical_loss": 3.366155984608842, "tokens_seen": 2809266176 }, { "epoch": 0.7, "learning_rate": 0.00015016448688116827, "loss": 0.064, "theoretical_loss": 3.3661438512569566, "tokens_seen": 2809397248 }, { "epoch": 0.7, "learning_rate": 0.0001501243681296638, "loss": 0.0676, "theoretical_loss": 3.366131718629631, "tokens_seen": 2809528320 }, { "epoch": 0.7, "learning_rate": 0.00015008424937815935, "loss": 0.0661, "theoretical_loss": 3.3661195867267892, "tokens_seen": 2809659392 }, { "epoch": 0.7, "learning_rate": 0.0001500441306266549, "loss": 0.0647, "theoretical_loss": 3.3661074555483532, "tokens_seen": 2809790464 }, { "epoch": 0.7, "objective/train/advantage_avg": 0.0011828274000436068, "objective/train/docs_used": 1020904, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2213294506072998, "objective/train/original_loss": 1.2213294506072998, "objective/train/theoretical_loss": 3.3660953250942467, "objective/train/tokens_used": 1180446176, "objective/train/value_avg": -0.0085906982421875, "objective/train/value_loss": 0.00013128841237630695, "objective/train/value_max": -1.5795230865478516e-05, "objective/train/value_min": -0.650390625, "objective/train/value_reward_corr": 0.7653955344565246, "objective/train/value_std": 0.016357421875, "objective/train/weight_avg": 1.0012481212615967, "objective/train/weighted_lm_loss": 1.2227110862731934, "objective/train/weights_max": 1.5871089696884155, "objective/train/weights_min": 0.5417968034744263, "theoretical_loss": 3.3660953250942467, "tokens_seen": 2809921536 }, { "epoch": 0.7, "learning_rate": 0.00015000401187515044, "loss": 0.0663, "theoretical_loss": 3.3660953250942467, "tokens_seen": 2809921536 }, { "epoch": 0.7, "learning_rate": 0.000149963893123646, "loss": 0.0678, "theoretical_loss": 3.366083195364392, "tokens_seen": 2810052608 }, { "epoch": 0.7, "learning_rate": 0.00014992377437214155, "loss": 0.0661, "theoretical_loss": 3.366071066358713, "tokens_seen": 2810183680 }, { "epoch": 0.7, "learning_rate": 0.00014988365562063709, "loss": 0.0663, "theoretical_loss": 3.3660589380771317, "tokens_seen": 2810314752 }, { "epoch": 0.7, "learning_rate": 0.00014984353686913263, "loss": 0.0654, "theoretical_loss": 3.3660468105195713, "tokens_seen": 2810445824 }, { "epoch": 0.7, "learning_rate": 0.00014980341811762817, "loss": 0.0667, "theoretical_loss": 3.366034683685956, "tokens_seen": 2810576896 }, { "epoch": 0.7, "learning_rate": 0.00014976329936612374, "loss": 0.0622, "theoretical_loss": 3.3660225575762066, "tokens_seen": 2810707968 }, { "epoch": 0.7, "learning_rate": 0.00014972318061461928, "loss": 0.0624, "theoretical_loss": 3.366010432190248, "tokens_seen": 2810839040 }, { "epoch": 0.7, "learning_rate": 0.00014968306186311482, "loss": 0.0657, "theoretical_loss": 3.365998307528003, "tokens_seen": 2810970112 }, { "epoch": 0.7, "learning_rate": 0.00014964294311161036, "loss": 0.063, "theoretical_loss": 3.365986183589394, "tokens_seen": 2811101184 }, { "epoch": 0.7, "learning_rate": 0.0001496028243601059, "loss": 0.0679, "theoretical_loss": 3.3659740603743447, "tokens_seen": 2811232256 }, { "epoch": 0.7, "learning_rate": 0.00014956270560860147, "loss": 0.0645, "theoretical_loss": 3.365961937882777, "tokens_seen": 2811363328 }, { "epoch": 0.7, "learning_rate": 0.000149522586857097, "loss": 0.0685, "theoretical_loss": 3.365949816114616, "tokens_seen": 2811494400 }, { "epoch": 0.7, "learning_rate": 0.00014948246810559255, "loss": 0.0639, "theoretical_loss": 3.365937695069783, "tokens_seen": 2811625472 }, { "epoch": 0.7, "learning_rate": 0.0001494423493540881, "loss": 0.0598, "theoretical_loss": 3.3659255747482018, "tokens_seen": 2811756544 }, { "epoch": 0.7, "learning_rate": 0.00014940223060258366, "loss": 0.0657, "theoretical_loss": 3.365913455149796, "tokens_seen": 2811887616 }, { "epoch": 0.7, "learning_rate": 0.0001493621118510792, "loss": 0.062, "theoretical_loss": 3.365901336274488, "tokens_seen": 2812018688 }, { "epoch": 0.7, "learning_rate": 0.00014932199309957475, "loss": 0.0669, "theoretical_loss": 3.3658892181222013, "tokens_seen": 2812149760 }, { "epoch": 0.7, "learning_rate": 0.0001492818743480703, "loss": 0.0686, "theoretical_loss": 3.3658771006928587, "tokens_seen": 2812280832 }, { "epoch": 0.7, "learning_rate": 0.00014924175559656583, "loss": 0.068, "theoretical_loss": 3.365864983986384, "tokens_seen": 2812411904 }, { "epoch": 0.7, "learning_rate": 0.0001492016368450614, "loss": 0.0702, "theoretical_loss": 3.3658528680027002, "tokens_seen": 2812542976 }, { "epoch": 0.7, "learning_rate": 0.00014916151809355694, "loss": 0.061, "theoretical_loss": 3.36584075274173, "tokens_seen": 2812674048 }, { "epoch": 0.7, "learning_rate": 0.00014912139934205248, "loss": 0.0624, "theoretical_loss": 3.3658286382033973, "tokens_seen": 2812805120 }, { "epoch": 0.7, "learning_rate": 0.00014908128059054802, "loss": 0.0652, "theoretical_loss": 3.3658165243876246, "tokens_seen": 2812936192 }, { "epoch": 0.7, "learning_rate": 0.00014904116183904356, "loss": 0.0664, "theoretical_loss": 3.365804411294336, "tokens_seen": 2813067264 }, { "epoch": 0.7, "objective/train/advantage_avg": -0.00046131774433888495, "objective/train/docs_used": 1022064, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3144135475158691, "objective/train/original_loss": 1.3144134283065796, "objective/train/theoretical_loss": 3.3657922989234543, "objective/train/tokens_used": 1183722976, "objective/train/value_avg": -0.00754547119140625, "objective/train/value_loss": 0.0002928948379121721, "objective/train/value_max": -2.5093555450439453e-05, "objective/train/value_min": -0.689453125, "objective/train/value_reward_corr": 0.6718432294825962, "objective/train/value_std": 0.01428985595703125, "objective/train/weight_avg": 0.9996673464775085, "objective/train/weighted_lm_loss": 1.3136624097824097, "objective/train/weights_max": 1.7448012828826904, "objective/train/weights_min": 0.3703491687774658, "theoretical_loss": 3.3657922989234543, "tokens_seen": 2813198336 }, { "epoch": 0.7, "learning_rate": 0.00014900104308753913, "loss": 0.0683, "theoretical_loss": 3.3657922989234543, "tokens_seen": 2813198336 }, { "epoch": 0.71, "learning_rate": 0.00014896092433603467, "loss": 0.067, "theoretical_loss": 3.3657801872749027, "tokens_seen": 2813329408 }, { "epoch": 0.71, "learning_rate": 0.0001489208055845302, "loss": 0.0683, "theoretical_loss": 3.3657680763486044, "tokens_seen": 2813460480 }, { "epoch": 0.71, "learning_rate": 0.00014888068683302575, "loss": 0.0641, "theoretical_loss": 3.3657559661444827, "tokens_seen": 2813591552 }, { "epoch": 0.71, "learning_rate": 0.0001488405680815213, "loss": 0.0637, "theoretical_loss": 3.365743856662461, "tokens_seen": 2813722624 }, { "epoch": 0.71, "learning_rate": 0.00014880044933001686, "loss": 0.0664, "theoretical_loss": 3.3657317479024633, "tokens_seen": 2813853696 }, { "epoch": 0.71, "learning_rate": 0.0001487603305785124, "loss": 0.0643, "theoretical_loss": 3.365719639864412, "tokens_seen": 2813984768 }, { "epoch": 0.71, "learning_rate": 0.00014872021182700795, "loss": 0.0655, "theoretical_loss": 3.3657075325482304, "tokens_seen": 2814115840 }, { "epoch": 0.71, "learning_rate": 0.0001486800930755035, "loss": 0.0624, "theoretical_loss": 3.365695425953842, "tokens_seen": 2814246912 }, { "epoch": 0.71, "learning_rate": 0.00014863997432399903, "loss": 0.0638, "theoretical_loss": 3.3656833200811707, "tokens_seen": 2814377984 }, { "epoch": 0.71, "learning_rate": 0.0001485998555724946, "loss": 0.0632, "theoretical_loss": 3.3656712149301393, "tokens_seen": 2814509056 }, { "epoch": 0.71, "learning_rate": 0.00014855973682099014, "loss": 0.0635, "theoretical_loss": 3.3656591105006712, "tokens_seen": 2814640128 }, { "epoch": 0.71, "learning_rate": 0.00014851961806948568, "loss": 0.0659, "theoretical_loss": 3.36564700679269, "tokens_seen": 2814771200 }, { "epoch": 0.71, "learning_rate": 0.00014847949931798122, "loss": 0.0666, "theoretical_loss": 3.3656349038061193, "tokens_seen": 2814902272 }, { "epoch": 0.71, "learning_rate": 0.00014843938056647676, "loss": 0.0659, "theoretical_loss": 3.365622801540882, "tokens_seen": 2815033344 }, { "epoch": 0.71, "learning_rate": 0.00014839926181497233, "loss": 0.0673, "theoretical_loss": 3.3656106999969015, "tokens_seen": 2815164416 }, { "epoch": 0.71, "learning_rate": 0.00014835914306346787, "loss": 0.0638, "theoretical_loss": 3.3655985991741018, "tokens_seen": 2815295488 }, { "epoch": 0.71, "learning_rate": 0.0001483190243119634, "loss": 0.0637, "theoretical_loss": 3.365586499072406, "tokens_seen": 2815426560 }, { "epoch": 0.71, "learning_rate": 0.00014827890556045895, "loss": 0.0672, "theoretical_loss": 3.3655743996917376, "tokens_seen": 2815557632 }, { "epoch": 0.71, "learning_rate": 0.0001482387868089545, "loss": 0.0654, "theoretical_loss": 3.36556230103202, "tokens_seen": 2815688704 }, { "epoch": 0.71, "learning_rate": 0.00014819866805745006, "loss": 0.0653, "theoretical_loss": 3.3655502030931768, "tokens_seen": 2815819776 }, { "epoch": 0.71, "learning_rate": 0.0001481585493059456, "loss": 0.0643, "theoretical_loss": 3.3655381058751317, "tokens_seen": 2815950848 }, { "epoch": 0.71, "learning_rate": 0.00014811843055444115, "loss": 0.0633, "theoretical_loss": 3.3655260093778074, "tokens_seen": 2816081920 }, { "epoch": 0.71, "learning_rate": 0.0001480783118029367, "loss": 0.0669, "theoretical_loss": 3.3655139136011285, "tokens_seen": 2816212992 }, { "epoch": 0.71, "learning_rate": 0.00014803819305143223, "loss": 0.0639, "theoretical_loss": 3.365501818545018, "tokens_seen": 2816344064 }, { "epoch": 0.71, "objective/train/advantage_avg": 0.0009837693069130182, "objective/train/docs_used": 1023297, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3208014965057373, "objective/train/original_loss": 1.3208016157150269, "objective/train/theoretical_loss": 3.3654897242093993, "objective/train/tokens_used": 1186999776, "objective/train/value_avg": -0.0087738037109375, "objective/train/value_loss": 0.00035303368349559605, "objective/train/value_max": -4.649162292480469e-05, "objective/train/value_min": -0.70751953125, "objective/train/value_reward_corr": 0.7406274324002049, "objective/train/value_std": 0.0193634033203125, "objective/train/weight_avg": 1.0011411905288696, "objective/train/weighted_lm_loss": 1.321268081665039, "objective/train/weights_max": 1.2774919271469116, "objective/train/weights_min": 0.36936861276626587, "theoretical_loss": 3.3654897242093993, "tokens_seen": 2816475136 }, { "epoch": 0.71, "learning_rate": 0.0001479980742999278, "loss": 0.0632, "theoretical_loss": 3.3654897242093993, "tokens_seen": 2816475136 }, { "epoch": 0.71, "learning_rate": 0.00014795795554842334, "loss": 0.0653, "theoretical_loss": 3.3654776305941962, "tokens_seen": 2816606208 }, { "epoch": 0.71, "learning_rate": 0.00014791783679691888, "loss": 0.0652, "theoretical_loss": 3.365465537699332, "tokens_seen": 2816737280 }, { "epoch": 0.71, "learning_rate": 0.00014787771804541442, "loss": 0.0635, "theoretical_loss": 3.3654534455247305, "tokens_seen": 2816868352 }, { "epoch": 0.71, "learning_rate": 0.00014783759929390996, "loss": 0.0673, "theoretical_loss": 3.3654413540703154, "tokens_seen": 2816999424 }, { "epoch": 0.71, "learning_rate": 0.00014779748054240553, "loss": 0.0642, "theoretical_loss": 3.36542926333601, "tokens_seen": 2817130496 }, { "epoch": 0.71, "learning_rate": 0.00014775736179090107, "loss": 0.0666, "theoretical_loss": 3.365417173321738, "tokens_seen": 2817261568 }, { "epoch": 0.71, "learning_rate": 0.00014771724303939661, "loss": 0.0678, "theoretical_loss": 3.365405084027423, "tokens_seen": 2817392640 }, { "epoch": 0.71, "learning_rate": 0.00014767712428789216, "loss": 0.069, "theoretical_loss": 3.3653929954529893, "tokens_seen": 2817523712 }, { "epoch": 0.71, "learning_rate": 0.0001476370055363877, "loss": 0.0629, "theoretical_loss": 3.3653809075983596, "tokens_seen": 2817654784 }, { "epoch": 0.71, "learning_rate": 0.00014759688678488326, "loss": 0.0674, "theoretical_loss": 3.3653688204634578, "tokens_seen": 2817785856 }, { "epoch": 0.71, "learning_rate": 0.0001475567680333788, "loss": 0.0652, "theoretical_loss": 3.365356734048208, "tokens_seen": 2817916928 }, { "epoch": 0.71, "learning_rate": 0.00014751664928187435, "loss": 0.0667, "theoretical_loss": 3.3653446483525333, "tokens_seen": 2818048000 }, { "epoch": 0.71, "learning_rate": 0.0001474765305303699, "loss": 0.0635, "theoretical_loss": 3.3653325633763576, "tokens_seen": 2818179072 }, { "epoch": 0.71, "learning_rate": 0.00014743641177886543, "loss": 0.065, "theoretical_loss": 3.365320479119605, "tokens_seen": 2818310144 }, { "epoch": 0.71, "learning_rate": 0.000147396293027361, "loss": 0.0657, "theoretical_loss": 3.3653083955821987, "tokens_seen": 2818441216 }, { "epoch": 0.71, "learning_rate": 0.00014735617427585654, "loss": 0.0646, "theoretical_loss": 3.3652963127640625, "tokens_seen": 2818572288 }, { "epoch": 0.71, "learning_rate": 0.00014731605552435208, "loss": 0.0677, "theoretical_loss": 3.3652842306651203, "tokens_seen": 2818703360 }, { "epoch": 0.71, "learning_rate": 0.00014727593677284762, "loss": 0.0608, "theoretical_loss": 3.365272149285296, "tokens_seen": 2818834432 }, { "epoch": 0.71, "learning_rate": 0.00014723581802134316, "loss": 0.0657, "theoretical_loss": 3.3652600686245133, "tokens_seen": 2818965504 }, { "epoch": 0.71, "learning_rate": 0.00014719569926983873, "loss": 0.064, "theoretical_loss": 3.3652479886826954, "tokens_seen": 2819096576 }, { "epoch": 0.71, "learning_rate": 0.00014715558051833427, "loss": 0.0668, "theoretical_loss": 3.365235909459767, "tokens_seen": 2819227648 }, { "epoch": 0.71, "learning_rate": 0.00014711546176682981, "loss": 0.065, "theoretical_loss": 3.3652238309556513, "tokens_seen": 2819358720 }, { "epoch": 0.71, "learning_rate": 0.00014707534301532536, "loss": 0.0633, "theoretical_loss": 3.365211753170272, "tokens_seen": 2819489792 }, { "epoch": 0.71, "learning_rate": 0.0001470352242638209, "loss": 0.0656, "theoretical_loss": 3.365199676103553, "tokens_seen": 2819620864 }, { "epoch": 0.71, "objective/train/advantage_avg": 0.0008620913722552359, "objective/train/docs_used": 1024583, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.27927565574646, "objective/train/original_loss": 1.2792755365371704, "objective/train/theoretical_loss": 3.365187599755419, "objective/train/tokens_used": 1190276576, "objective/train/value_avg": -0.006069183349609375, "objective/train/value_loss": 9.731602040119469e-05, "objective/train/value_max": -3.707408905029297e-05, "objective/train/value_min": -0.34130859375, "objective/train/value_reward_corr": 0.7575894259653666, "objective/train/value_std": 0.011077880859375, "objective/train/weight_avg": 1.0009101629257202, "objective/train/weighted_lm_loss": 1.2797774076461792, "objective/train/weights_max": 1.2063775062561035, "objective/train/weights_min": 0.781027615070343, "theoretical_loss": 3.365187599755419, "tokens_seen": 2819751936 }, { "epoch": 0.71, "learning_rate": 0.00014699510551231647, "loss": 0.0638, "theoretical_loss": 3.365187599755419, "tokens_seen": 2819751936 }, { "epoch": 0.71, "learning_rate": 0.000146954986760812, "loss": 0.0655, "theoretical_loss": 3.3651755241257924, "tokens_seen": 2819883008 }, { "epoch": 0.71, "learning_rate": 0.00014691486800930758, "loss": 0.0661, "theoretical_loss": 3.3651634492145983, "tokens_seen": 2820014080 }, { "epoch": 0.71, "learning_rate": 0.0001468747492578031, "loss": 0.0672, "theoretical_loss": 3.36515137502176, "tokens_seen": 2820145152 }, { "epoch": 0.71, "learning_rate": 0.00014683463050629863, "loss": 0.0608, "theoretical_loss": 3.3651393015472015, "tokens_seen": 2820276224 }, { "epoch": 0.71, "learning_rate": 0.0001467945117547942, "loss": 0.0678, "theoretical_loss": 3.3651272287908465, "tokens_seen": 2820407296 }, { "epoch": 0.71, "learning_rate": 0.00014675439300328974, "loss": 0.0653, "theoretical_loss": 3.365115156752619, "tokens_seen": 2820538368 }, { "epoch": 0.71, "learning_rate": 0.0001467142742517853, "loss": 0.0659, "theoretical_loss": 3.365103085432443, "tokens_seen": 2820669440 }, { "epoch": 0.71, "learning_rate": 0.00014667415550028085, "loss": 0.0634, "theoretical_loss": 3.3650910148302424, "tokens_seen": 2820800512 }, { "epoch": 0.71, "learning_rate": 0.00014663403674877636, "loss": 0.0641, "theoretical_loss": 3.365078944945941, "tokens_seen": 2820931584 }, { "epoch": 0.71, "learning_rate": 0.00014659391799727193, "loss": 0.0643, "theoretical_loss": 3.3650668757794633, "tokens_seen": 2821062656 }, { "epoch": 0.71, "learning_rate": 0.00014655379924576747, "loss": 0.0674, "theoretical_loss": 3.3650548073307327, "tokens_seen": 2821193728 }, { "epoch": 0.71, "learning_rate": 0.00014651368049426304, "loss": 0.0646, "theoretical_loss": 3.365042739599673, "tokens_seen": 2821324800 }, { "epoch": 0.71, "learning_rate": 0.00014647356174275858, "loss": 0.0699, "theoretical_loss": 3.365030672586209, "tokens_seen": 2821455872 }, { "epoch": 0.71, "learning_rate": 0.0001464334429912541, "loss": 0.062, "theoretical_loss": 3.3650186062902634, "tokens_seen": 2821586944 }, { "epoch": 0.71, "learning_rate": 0.00014639332423974967, "loss": 0.0672, "theoretical_loss": 3.3650065407117618, "tokens_seen": 2821718016 }, { "epoch": 0.71, "learning_rate": 0.0001463532054882452, "loss": 0.0658, "theoretical_loss": 3.3649944758506267, "tokens_seen": 2821849088 }, { "epoch": 0.71, "learning_rate": 0.00014631308673674078, "loss": 0.0663, "theoretical_loss": 3.3649824117067832, "tokens_seen": 2821980160 }, { "epoch": 0.71, "learning_rate": 0.00014627296798523632, "loss": 0.0607, "theoretical_loss": 3.3649703482801554, "tokens_seen": 2822111232 }, { "epoch": 0.71, "learning_rate": 0.00014623284923373183, "loss": 0.0615, "theoretical_loss": 3.3649582855706663, "tokens_seen": 2822242304 }, { "epoch": 0.71, "learning_rate": 0.0001461927304822274, "loss": 0.0667, "theoretical_loss": 3.364946223578241, "tokens_seen": 2822373376 }, { "epoch": 0.71, "learning_rate": 0.00014615261173072294, "loss": 0.0624, "theoretical_loss": 3.364934162302803, "tokens_seen": 2822504448 }, { "epoch": 0.71, "learning_rate": 0.0001461124929792185, "loss": 0.0633, "theoretical_loss": 3.3649221017442765, "tokens_seen": 2822635520 }, { "epoch": 0.71, "learning_rate": 0.00014607237422771405, "loss": 0.0668, "theoretical_loss": 3.3649100419025855, "tokens_seen": 2822766592 }, { "epoch": 0.71, "learning_rate": 0.00014603225547620956, "loss": 0.0655, "theoretical_loss": 3.364897982777655, "tokens_seen": 2822897664 }, { "epoch": 0.71, "objective/train/advantage_avg": 0.00021851278143003583, "objective/train/docs_used": 1025771, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.381722092628479, "objective/train/original_loss": 1.3817222118377686, "objective/train/theoretical_loss": 3.364885924369408, "objective/train/tokens_used": 1193553376, "objective/train/value_avg": -0.008331298828125, "objective/train/value_loss": 0.00022409552184399217, "objective/train/value_max": -4.9114227294921875e-05, "objective/train/value_min": -0.26220703125, "objective/train/value_reward_corr": 0.7368420229695303, "objective/train/value_std": 0.0160369873046875, "objective/train/weight_avg": 1.000315546989441, "objective/train/weighted_lm_loss": 1.3813583850860596, "objective/train/weights_max": 1.1315587759017944, "objective/train/weights_min": 0.37150946259498596, "theoretical_loss": 3.364885924369408, "tokens_seen": 2823028736 }, { "epoch": 0.71, "learning_rate": 0.00014599213672470513, "loss": 0.0664, "theoretical_loss": 3.364885924369408, "tokens_seen": 2823028736 }, { "epoch": 0.71, "learning_rate": 0.00014595201797320067, "loss": 0.0681, "theoretical_loss": 3.364873866677769, "tokens_seen": 2823159808 }, { "epoch": 0.71, "learning_rate": 0.00014591189922169624, "loss": 0.0627, "theoretical_loss": 3.3648618097026617, "tokens_seen": 2823290880 }, { "epoch": 0.71, "learning_rate": 0.00014587178047019178, "loss": 0.0657, "theoretical_loss": 3.364849753444011, "tokens_seen": 2823421952 }, { "epoch": 0.71, "learning_rate": 0.0001458316617186873, "loss": 0.067, "theoretical_loss": 3.3648376979017414, "tokens_seen": 2823553024 }, { "epoch": 0.71, "learning_rate": 0.00014579154296718287, "loss": 0.0646, "theoretical_loss": 3.364825643075776, "tokens_seen": 2823684096 }, { "epoch": 0.71, "learning_rate": 0.0001457514242156784, "loss": 0.064, "theoretical_loss": 3.3648135889660398, "tokens_seen": 2823815168 }, { "epoch": 0.71, "learning_rate": 0.00014571130546417398, "loss": 0.0679, "theoretical_loss": 3.3648015355724565, "tokens_seen": 2823946240 }, { "epoch": 0.71, "learning_rate": 0.00014567118671266952, "loss": 0.0686, "theoretical_loss": 3.3647894828949503, "tokens_seen": 2824077312 }, { "epoch": 0.71, "learning_rate": 0.00014563106796116503, "loss": 0.0626, "theoretical_loss": 3.3647774309334455, "tokens_seen": 2824208384 }, { "epoch": 0.71, "learning_rate": 0.0001455909492096606, "loss": 0.0658, "theoretical_loss": 3.364765379687867, "tokens_seen": 2824339456 }, { "epoch": 0.71, "learning_rate": 0.00014555083045815614, "loss": 0.0675, "theoretical_loss": 3.3647533291581384, "tokens_seen": 2824470528 }, { "epoch": 0.71, "learning_rate": 0.0001455107117066517, "loss": 0.062, "theoretical_loss": 3.3647412793441838, "tokens_seen": 2824601600 }, { "epoch": 0.71, "learning_rate": 0.00014547059295514725, "loss": 0.0633, "theoretical_loss": 3.364729230245928, "tokens_seen": 2824732672 }, { "epoch": 0.71, "learning_rate": 0.00014543047420364277, "loss": 0.0662, "theoretical_loss": 3.364717181863295, "tokens_seen": 2824863744 }, { "epoch": 0.71, "learning_rate": 0.00014539035545213833, "loss": 0.0655, "theoretical_loss": 3.364705134196209, "tokens_seen": 2824994816 }, { "epoch": 0.71, "learning_rate": 0.00014535023670063387, "loss": 0.0638, "theoretical_loss": 3.3646930872445946, "tokens_seen": 2825125888 }, { "epoch": 0.71, "learning_rate": 0.00014531011794912944, "loss": 0.0654, "theoretical_loss": 3.3646810410083763, "tokens_seen": 2825256960 }, { "epoch": 0.71, "learning_rate": 0.00014526999919762498, "loss": 0.0675, "theoretical_loss": 3.3646689954874773, "tokens_seen": 2825388032 }, { "epoch": 0.71, "learning_rate": 0.0001452298804461205, "loss": 0.0661, "theoretical_loss": 3.364656950681823, "tokens_seen": 2825519104 }, { "epoch": 0.71, "learning_rate": 0.00014518976169461607, "loss": 0.0633, "theoretical_loss": 3.3646449065913377, "tokens_seen": 2825650176 }, { "epoch": 0.71, "learning_rate": 0.0001451496429431116, "loss": 0.0669, "theoretical_loss": 3.3646328632159457, "tokens_seen": 2825781248 }, { "epoch": 0.71, "learning_rate": 0.00014510952419160718, "loss": 0.0612, "theoretical_loss": 3.364620820555571, "tokens_seen": 2825912320 }, { "epoch": 0.71, "learning_rate": 0.00014506940544010272, "loss": 0.065, "theoretical_loss": 3.364608778610138, "tokens_seen": 2826043392 }, { "epoch": 0.71, "learning_rate": 0.00014502928668859823, "loss": 0.0645, "theoretical_loss": 3.3645967373795718, "tokens_seen": 2826174464 }, { "epoch": 0.71, "objective/train/advantage_avg": 0.0005149194621481001, "objective/train/docs_used": 1027020, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2884790897369385, "objective/train/original_loss": 1.2884790897369385, "objective/train/theoretical_loss": 3.364584696863796, "objective/train/tokens_used": 1196830176, "objective/train/value_avg": -0.01043701171875, "objective/train/value_loss": 0.00038959813537076116, "objective/train/value_max": -6.252527236938477e-05, "objective/train/value_min": -0.70751953125, "objective/train/value_reward_corr": 0.7313888344164405, "objective/train/value_std": 0.021148681640625, "objective/train/weight_avg": 1.0006871223449707, "objective/train/weighted_lm_loss": 1.2884820699691772, "objective/train/weights_max": 1.5337786674499512, "objective/train/weights_min": 0.3787992298603058, "theoretical_loss": 3.364584696863796, "tokens_seen": 2826305536 }, { "epoch": 0.71, "learning_rate": 0.0001449891679370938, "loss": 0.0652, "theoretical_loss": 3.364584696863796, "tokens_seen": 2826305536 }, { "epoch": 0.71, "learning_rate": 0.00014494904918558934, "loss": 0.0683, "theoretical_loss": 3.3645726570627357, "tokens_seen": 2826436608 }, { "epoch": 0.71, "learning_rate": 0.0001449089304340849, "loss": 0.0644, "theoretical_loss": 3.3645606179763146, "tokens_seen": 2826567680 }, { "epoch": 0.71, "learning_rate": 0.00014486881168258045, "loss": 0.0606, "theoretical_loss": 3.3645485796044574, "tokens_seen": 2826698752 }, { "epoch": 0.71, "learning_rate": 0.00014482869293107597, "loss": 0.0634, "theoretical_loss": 3.364536541947089, "tokens_seen": 2826829824 }, { "epoch": 0.71, "learning_rate": 0.00014478857417957153, "loss": 0.0688, "theoretical_loss": 3.3645245050041335, "tokens_seen": 2826960896 }, { "epoch": 0.71, "learning_rate": 0.00014474845542806708, "loss": 0.0609, "theoretical_loss": 3.3645124687755152, "tokens_seen": 2827091968 }, { "epoch": 0.71, "learning_rate": 0.00014470833667656264, "loss": 0.0675, "theoretical_loss": 3.364500433261159, "tokens_seen": 2827223040 }, { "epoch": 0.71, "learning_rate": 0.00014466821792505818, "loss": 0.0661, "theoretical_loss": 3.3644883984609892, "tokens_seen": 2827354112 }, { "epoch": 0.71, "learning_rate": 0.0001446280991735537, "loss": 0.0624, "theoretical_loss": 3.3644763643749305, "tokens_seen": 2827485184 }, { "epoch": 0.71, "learning_rate": 0.00014458798042204927, "loss": 0.0627, "theoretical_loss": 3.364464331002907, "tokens_seen": 2827616256 }, { "epoch": 0.71, "learning_rate": 0.0001445478616705448, "loss": 0.0637, "theoretical_loss": 3.364452298344844, "tokens_seen": 2827747328 }, { "epoch": 0.71, "learning_rate": 0.00014450774291904038, "loss": 0.0677, "theoretical_loss": 3.364440266400665, "tokens_seen": 2827878400 }, { "epoch": 0.71, "learning_rate": 0.00014446762416753592, "loss": 0.0642, "theoretical_loss": 3.3644282351702954, "tokens_seen": 2828009472 }, { "epoch": 0.71, "learning_rate": 0.00014442750541603143, "loss": 0.0667, "theoretical_loss": 3.3644162046536596, "tokens_seen": 2828140544 }, { "epoch": 0.71, "learning_rate": 0.000144387386664527, "loss": 0.0649, "theoretical_loss": 3.364404174850682, "tokens_seen": 2828271616 }, { "epoch": 0.71, "learning_rate": 0.00014434726791302254, "loss": 0.0646, "theoretical_loss": 3.364392145761287, "tokens_seen": 2828402688 }, { "epoch": 0.71, "learning_rate": 0.0001443071491615181, "loss": 0.068, "theoretical_loss": 3.3643801173853998, "tokens_seen": 2828533760 }, { "epoch": 0.71, "learning_rate": 0.00014426703041001365, "loss": 0.0646, "theoretical_loss": 3.3643680897229444, "tokens_seen": 2828664832 }, { "epoch": 0.71, "learning_rate": 0.0001442269116585092, "loss": 0.0659, "theoretical_loss": 3.364356062773846, "tokens_seen": 2828795904 }, { "epoch": 0.71, "learning_rate": 0.00014418679290700473, "loss": 0.0637, "theoretical_loss": 3.3643440365380286, "tokens_seen": 2828926976 }, { "epoch": 0.71, "learning_rate": 0.00014414667415550028, "loss": 0.0672, "theoretical_loss": 3.3643320110154176, "tokens_seen": 2829058048 }, { "epoch": 0.71, "learning_rate": 0.00014410655540399584, "loss": 0.0614, "theoretical_loss": 3.3643199862059374, "tokens_seen": 2829189120 }, { "epoch": 0.71, "learning_rate": 0.00014406643665249139, "loss": 0.0655, "theoretical_loss": 3.364307962109512, "tokens_seen": 2829320192 }, { "epoch": 0.71, "learning_rate": 0.00014402631790098693, "loss": 0.0677, "theoretical_loss": 3.364295938726067, "tokens_seen": 2829451264 }, { "epoch": 0.71, "objective/train/advantage_avg": -0.00017668164218775928, "objective/train/docs_used": 1028281, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.1964770555496216, "objective/train/original_loss": 1.1964770555496216, "objective/train/theoretical_loss": 3.364283916055527, "objective/train/tokens_used": 1200106976, "objective/train/value_avg": -0.007114410400390625, "objective/train/value_loss": 0.0001301548327319324, "objective/train/value_max": -2.5272369384765625e-05, "objective/train/value_min": -0.343017578125, "objective/train/value_reward_corr": 0.8301071528456152, "objective/train/value_std": 0.016143798828125, "objective/train/weight_avg": 0.9998840689659119, "objective/train/weighted_lm_loss": 1.1962757110595703, "objective/train/weights_max": 1.1537500619888306, "objective/train/weights_min": 0.3701203465461731, "theoretical_loss": 3.364283916055527, "tokens_seen": 2829582336 }, { "epoch": 0.71, "learning_rate": 0.00014398619914948247, "loss": 0.0622, "theoretical_loss": 3.364283916055527, "tokens_seen": 2829582336 }, { "epoch": 0.71, "learning_rate": 0.000143946080397978, "loss": 0.0649, "theoretical_loss": 3.364271894097816, "tokens_seen": 2829713408 }, { "epoch": 0.72, "learning_rate": 0.00014390596164647358, "loss": 0.0655, "theoretical_loss": 3.364259872852859, "tokens_seen": 2829844480 }, { "epoch": 0.72, "learning_rate": 0.00014386584289496912, "loss": 0.0646, "theoretical_loss": 3.364247852320581, "tokens_seen": 2829975552 }, { "epoch": 0.72, "learning_rate": 0.00014382572414346466, "loss": 0.0632, "theoretical_loss": 3.364235832500907, "tokens_seen": 2830106624 }, { "epoch": 0.72, "learning_rate": 0.0001437856053919602, "loss": 0.0643, "theoretical_loss": 3.364223813393761, "tokens_seen": 2830237696 }, { "epoch": 0.72, "learning_rate": 0.00014374548664045574, "loss": 0.0675, "theoretical_loss": 3.3642117949990684, "tokens_seen": 2830368768 }, { "epoch": 0.72, "learning_rate": 0.0001437053678889513, "loss": 0.0639, "theoretical_loss": 3.364199777316754, "tokens_seen": 2830499840 }, { "epoch": 0.72, "learning_rate": 0.00014366524913744685, "loss": 0.0632, "theoretical_loss": 3.364187760346742, "tokens_seen": 2830630912 }, { "epoch": 0.72, "learning_rate": 0.0001436251303859424, "loss": 0.0675, "theoretical_loss": 3.3641757440889575, "tokens_seen": 2830761984 }, { "epoch": 0.72, "learning_rate": 0.00014358501163443794, "loss": 0.0661, "theoretical_loss": 3.3641637285433257, "tokens_seen": 2830893056 }, { "epoch": 0.72, "learning_rate": 0.00014354489288293348, "loss": 0.0627, "theoretical_loss": 3.3641517137097705, "tokens_seen": 2831024128 }, { "epoch": 0.72, "learning_rate": 0.00014350477413142904, "loss": 0.0679, "theoretical_loss": 3.364139699588218, "tokens_seen": 2831155200 }, { "epoch": 0.72, "learning_rate": 0.00014346465537992459, "loss": 0.0674, "theoretical_loss": 3.3641276861785916, "tokens_seen": 2831286272 }, { "epoch": 0.72, "learning_rate": 0.00014342453662842013, "loss": 0.0655, "theoretical_loss": 3.3641156734808173, "tokens_seen": 2831417344 }, { "epoch": 0.72, "learning_rate": 0.00014338441787691567, "loss": 0.0653, "theoretical_loss": 3.3641036614948194, "tokens_seen": 2831548416 }, { "epoch": 0.72, "learning_rate": 0.0001433442991254112, "loss": 0.0679, "theoretical_loss": 3.364091650220523, "tokens_seen": 2831679488 }, { "epoch": 0.72, "learning_rate": 0.00014330418037390678, "loss": 0.0628, "theoretical_loss": 3.364079639657853, "tokens_seen": 2831810560 }, { "epoch": 0.72, "learning_rate": 0.00014326406162240232, "loss": 0.0606, "theoretical_loss": 3.3640676298067347, "tokens_seen": 2831941632 }, { "epoch": 0.72, "learning_rate": 0.00014322394287089786, "loss": 0.0625, "theoretical_loss": 3.364055620667092, "tokens_seen": 2832072704 }, { "epoch": 0.72, "learning_rate": 0.0001431838241193934, "loss": 0.063, "theoretical_loss": 3.3640436122388504, "tokens_seen": 2832203776 }, { "epoch": 0.72, "learning_rate": 0.00014314370536788894, "loss": 0.0679, "theoretical_loss": 3.3640316045219354, "tokens_seen": 2832334848 }, { "epoch": 0.72, "learning_rate": 0.0001431035866163845, "loss": 0.0668, "theoretical_loss": 3.364019597516271, "tokens_seen": 2832465920 }, { "epoch": 0.72, "learning_rate": 0.00014306346786488005, "loss": 0.0672, "theoretical_loss": 3.3640075912217826, "tokens_seen": 2832596992 }, { "epoch": 0.72, "learning_rate": 0.0001430233491133756, "loss": 0.0664, "theoretical_loss": 3.3639955856383947, "tokens_seen": 2832728064 }, { "epoch": 0.72, "objective/train/advantage_avg": -9.028084605233744e-05, "objective/train/docs_used": 1029660, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.1910018920898438, "objective/train/original_loss": 1.1910017728805542, "objective/train/theoretical_loss": 3.363983580766033, "objective/train/tokens_used": 1203383776, "objective/train/value_avg": -0.01001739501953125, "objective/train/value_loss": 0.00015867225010879338, "objective/train/value_max": -3.218650817871094e-05, "objective/train/value_min": -0.457275390625, "objective/train/value_reward_corr": 0.7987133617636948, "objective/train/value_std": 0.016143798828125, "objective/train/weight_avg": 0.999984860420227, "objective/train/weighted_lm_loss": 1.1910783052444458, "objective/train/weights_max": 1.1319502592086792, "objective/train/weights_min": 0.3850405216217041, "theoretical_loss": 3.363983580766033, "tokens_seen": 2832859136 }, { "epoch": 0.72, "learning_rate": 0.00014298323036187114, "loss": 0.0624, "theoretical_loss": 3.363983580766033, "tokens_seen": 2832859136 }, { "epoch": 0.72, "learning_rate": 0.00014294311161036668, "loss": 0.0665, "theoretical_loss": 3.363971576604622, "tokens_seen": 2832990208 }, { "epoch": 0.72, "learning_rate": 0.00014290299285886225, "loss": 0.069, "theoretical_loss": 3.3639595731540872, "tokens_seen": 2833121280 }, { "epoch": 0.72, "learning_rate": 0.0001428628741073578, "loss": 0.0627, "theoretical_loss": 3.3639475704143535, "tokens_seen": 2833252352 }, { "epoch": 0.72, "learning_rate": 0.00014282275535585333, "loss": 0.0649, "theoretical_loss": 3.3639355683853456, "tokens_seen": 2833383424 }, { "epoch": 0.72, "learning_rate": 0.00014278263660434887, "loss": 0.0636, "theoretical_loss": 3.3639235670669883, "tokens_seen": 2833514496 }, { "epoch": 0.72, "learning_rate": 0.0001427425178528444, "loss": 0.0673, "theoretical_loss": 3.3639115664592074, "tokens_seen": 2833645568 }, { "epoch": 0.72, "learning_rate": 0.00014270239910133998, "loss": 0.0653, "theoretical_loss": 3.3638995665619276, "tokens_seen": 2833776640 }, { "epoch": 0.72, "learning_rate": 0.00014266228034983552, "loss": 0.0647, "theoretical_loss": 3.3638875673750737, "tokens_seen": 2833907712 }, { "epoch": 0.72, "learning_rate": 0.00014262216159833106, "loss": 0.0659, "theoretical_loss": 3.363875568898571, "tokens_seen": 2834038784 }, { "epoch": 0.72, "learning_rate": 0.0001425820428468266, "loss": 0.065, "theoretical_loss": 3.3638635711323452, "tokens_seen": 2834169856 }, { "epoch": 0.72, "learning_rate": 0.00014254192409532214, "loss": 0.0675, "theoretical_loss": 3.36385157407632, "tokens_seen": 2834300928 }, { "epoch": 0.72, "learning_rate": 0.0001425018053438177, "loss": 0.0664, "theoretical_loss": 3.363839577730422, "tokens_seen": 2834432000 }, { "epoch": 0.72, "learning_rate": 0.00014246168659231325, "loss": 0.0683, "theoretical_loss": 3.3638275820945758, "tokens_seen": 2834563072 }, { "epoch": 0.72, "learning_rate": 0.0001424215678408088, "loss": 0.0663, "theoretical_loss": 3.3638155871687063, "tokens_seen": 2834694144 }, { "epoch": 0.72, "learning_rate": 0.00014238144908930434, "loss": 0.0664, "theoretical_loss": 3.3638035929527392, "tokens_seen": 2834825216 }, { "epoch": 0.72, "learning_rate": 0.00014234133033779988, "loss": 0.0667, "theoretical_loss": 3.3637915994465986, "tokens_seen": 2834956288 }, { "epoch": 0.72, "learning_rate": 0.00014230121158629545, "loss": 0.0663, "theoretical_loss": 3.3637796066502106, "tokens_seen": 2835087360 }, { "epoch": 0.72, "learning_rate": 0.000142261092834791, "loss": 0.0596, "theoretical_loss": 3.3637676145635003, "tokens_seen": 2835218432 }, { "epoch": 0.72, "learning_rate": 0.00014222097408328653, "loss": 0.0698, "theoretical_loss": 3.3637556231863925, "tokens_seen": 2835349504 }, { "epoch": 0.72, "learning_rate": 0.00014218085533178207, "loss": 0.0659, "theoretical_loss": 3.363743632518813, "tokens_seen": 2835480576 }, { "epoch": 0.72, "learning_rate": 0.0001421407365802776, "loss": 0.0649, "theoretical_loss": 3.363731642560686, "tokens_seen": 2835611648 }, { "epoch": 0.72, "learning_rate": 0.00014210061782877318, "loss": 0.0635, "theoretical_loss": 3.363719653311938, "tokens_seen": 2835742720 }, { "epoch": 0.72, "learning_rate": 0.00014206049907726872, "loss": 0.0665, "theoretical_loss": 3.3637076647724937, "tokens_seen": 2835873792 }, { "epoch": 0.72, "learning_rate": 0.00014202038032576426, "loss": 0.0656, "theoretical_loss": 3.363695676942278, "tokens_seen": 2836004864 }, { "epoch": 0.72, "objective/train/advantage_avg": 0.0003235231852158904, "objective/train/docs_used": 1030811, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3187352418899536, "objective/train/original_loss": 1.3187352418899536, "objective/train/theoretical_loss": 3.3636836898212166, "objective/train/tokens_used": 1206660576, "objective/train/value_avg": -0.0136566162109375, "objective/train/value_loss": 0.0004540090449154377, "objective/train/value_max": -5.227327346801758e-05, "objective/train/value_min": -0.72216796875, "objective/train/value_reward_corr": 0.7338506426120812, "objective/train/value_std": 0.02301025390625, "objective/train/weight_avg": 1.000527262687683, "objective/train/weighted_lm_loss": 1.3179471492767334, "objective/train/weights_max": 1.87997567653656, "objective/train/weights_min": 0.3785739243030548, "theoretical_loss": 3.3636836898212166, "tokens_seen": 2836135936 }, { "epoch": 0.72, "learning_rate": 0.0001419802615742598, "loss": 0.066, "theoretical_loss": 3.3636836898212166, "tokens_seen": 2836135936 }, { "epoch": 0.72, "learning_rate": 0.00014194014282275537, "loss": 0.0667, "theoretical_loss": 3.3636717034092345, "tokens_seen": 2836267008 }, { "epoch": 0.72, "learning_rate": 0.0001419000240712509, "loss": 0.0655, "theoretical_loss": 3.3636597177062573, "tokens_seen": 2836398080 }, { "epoch": 0.72, "learning_rate": 0.00014185990531974645, "loss": 0.0655, "theoretical_loss": 3.3636477327122103, "tokens_seen": 2836529152 }, { "epoch": 0.72, "learning_rate": 0.000141819786568242, "loss": 0.0658, "theoretical_loss": 3.363635748427018, "tokens_seen": 2836660224 }, { "epoch": 0.72, "learning_rate": 0.00014177966781673754, "loss": 0.0659, "theoretical_loss": 3.363623764850607, "tokens_seen": 2836791296 }, { "epoch": 0.72, "learning_rate": 0.0001417395490652331, "loss": 0.0656, "theoretical_loss": 3.363611781982902, "tokens_seen": 2836922368 }, { "epoch": 0.72, "learning_rate": 0.00014169943031372865, "loss": 0.0678, "theoretical_loss": 3.3635997998238283, "tokens_seen": 2837053440 }, { "epoch": 0.72, "learning_rate": 0.0001416593115622242, "loss": 0.064, "theoretical_loss": 3.3635878183733112, "tokens_seen": 2837184512 }, { "epoch": 0.72, "learning_rate": 0.00014161919281071973, "loss": 0.0648, "theoretical_loss": 3.363575837631276, "tokens_seen": 2837315584 }, { "epoch": 0.72, "learning_rate": 0.00014157907405921527, "loss": 0.0661, "theoretical_loss": 3.3635638575976485, "tokens_seen": 2837446656 }, { "epoch": 0.72, "learning_rate": 0.00014153895530771084, "loss": 0.0664, "theoretical_loss": 3.3635518782723537, "tokens_seen": 2837577728 }, { "epoch": 0.72, "learning_rate": 0.00014149883655620638, "loss": 0.0634, "theoretical_loss": 3.3635398996553176, "tokens_seen": 2837708800 }, { "epoch": 0.72, "learning_rate": 0.00014145871780470192, "loss": 0.0641, "theoretical_loss": 3.3635279217464644, "tokens_seen": 2837839872 }, { "epoch": 0.72, "learning_rate": 0.00014141859905319746, "loss": 0.0685, "theoretical_loss": 3.3635159445457212, "tokens_seen": 2837970944 }, { "epoch": 0.72, "learning_rate": 0.000141378480301693, "loss": 0.0639, "theoretical_loss": 3.3635039680530117, "tokens_seen": 2838102016 }, { "epoch": 0.72, "learning_rate": 0.00014133836155018857, "loss": 0.0663, "theoretical_loss": 3.363491992268263, "tokens_seen": 2838233088 }, { "epoch": 0.72, "learning_rate": 0.0001412982427986841, "loss": 0.0657, "theoretical_loss": 3.363480017191399, "tokens_seen": 2838364160 }, { "epoch": 0.72, "learning_rate": 0.00014125812404717965, "loss": 0.0666, "theoretical_loss": 3.3634680428223467, "tokens_seen": 2838495232 }, { "epoch": 0.72, "learning_rate": 0.0001412180052956752, "loss": 0.0653, "theoretical_loss": 3.36345606916103, "tokens_seen": 2838626304 }, { "epoch": 0.72, "learning_rate": 0.00014117788654417074, "loss": 0.0649, "theoretical_loss": 3.3634440962073757, "tokens_seen": 2838757376 }, { "epoch": 0.72, "learning_rate": 0.0001411377677926663, "loss": 0.0659, "theoretical_loss": 3.3634321239613083, "tokens_seen": 2838888448 }, { "epoch": 0.72, "learning_rate": 0.00014109764904116185, "loss": 0.0647, "theoretical_loss": 3.3634201524227545, "tokens_seen": 2839019520 }, { "epoch": 0.72, "learning_rate": 0.0001410575302896574, "loss": 0.0657, "theoretical_loss": 3.363408181591639, "tokens_seen": 2839150592 }, { "epoch": 0.72, "learning_rate": 0.00014101741153815293, "loss": 0.0678, "theoretical_loss": 3.363396211467887, "tokens_seen": 2839281664 }, { "epoch": 0.72, "objective/train/advantage_avg": 0.0014375870814546943, "objective/train/docs_used": 1031989, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2215967178344727, "objective/train/original_loss": 1.2215968370437622, "objective/train/theoretical_loss": 3.3633842420514246, "objective/train/tokens_used": 1209937376, "objective/train/value_avg": -0.01483917236328125, "objective/train/value_loss": 0.00039621046744287014, "objective/train/value_max": -5.7816505432128906e-05, "objective/train/value_min": -0.89013671875, "objective/train/value_reward_corr": 0.7940597646979769, "objective/train/value_std": 0.029693603515625, "objective/train/weight_avg": 1.0016255378723145, "objective/train/weighted_lm_loss": 1.2212287187576294, "objective/train/weights_max": 2.2407338619232178, "objective/train/weights_min": 0.37781789898872375, "theoretical_loss": 3.3633842420514246, "tokens_seen": 2839412736 }, { "epoch": 0.72, "learning_rate": 0.00014097729278664847, "loss": 0.0645, "theoretical_loss": 3.3633842420514246, "tokens_seen": 2839412736 }, { "epoch": 0.72, "learning_rate": 0.00014093717403514404, "loss": 0.0646, "theoretical_loss": 3.3633722733421774, "tokens_seen": 2839543808 }, { "epoch": 0.72, "learning_rate": 0.00014089705528363958, "loss": 0.0617, "theoretical_loss": 3.363360305340071, "tokens_seen": 2839674880 }, { "epoch": 0.72, "learning_rate": 0.00014085693653213512, "loss": 0.0646, "theoretical_loss": 3.363348338045031, "tokens_seen": 2839805952 }, { "epoch": 0.72, "learning_rate": 0.00014081681778063066, "loss": 0.0672, "theoretical_loss": 3.3633363714569824, "tokens_seen": 2839937024 }, { "epoch": 0.72, "learning_rate": 0.0001407766990291262, "loss": 0.0665, "theoretical_loss": 3.3633244055758516, "tokens_seen": 2840068096 }, { "epoch": 0.72, "learning_rate": 0.00014073658027762177, "loss": 0.0689, "theoretical_loss": 3.3633124404015637, "tokens_seen": 2840199168 }, { "epoch": 0.72, "learning_rate": 0.00014069646152611731, "loss": 0.0635, "theoretical_loss": 3.363300475934045, "tokens_seen": 2840330240 }, { "epoch": 0.72, "learning_rate": 0.00014065634277461286, "loss": 0.0666, "theoretical_loss": 3.3632885121732197, "tokens_seen": 2840461312 }, { "epoch": 0.72, "learning_rate": 0.0001406162240231084, "loss": 0.0691, "theoretical_loss": 3.363276549119015, "tokens_seen": 2840592384 }, { "epoch": 0.72, "learning_rate": 0.00014057610527160394, "loss": 0.065, "theoretical_loss": 3.3632645867713555, "tokens_seen": 2840723456 }, { "epoch": 0.72, "learning_rate": 0.0001405359865200995, "loss": 0.0653, "theoretical_loss": 3.3632526251301678, "tokens_seen": 2840854528 }, { "epoch": 0.72, "learning_rate": 0.00014049586776859505, "loss": 0.0649, "theoretical_loss": 3.3632406641953767, "tokens_seen": 2840985600 }, { "epoch": 0.72, "learning_rate": 0.0001404557490170906, "loss": 0.0658, "theoretical_loss": 3.3632287039669087, "tokens_seen": 2841116672 }, { "epoch": 0.72, "learning_rate": 0.00014041563026558613, "loss": 0.0612, "theoretical_loss": 3.363216744444689, "tokens_seen": 2841247744 }, { "epoch": 0.72, "learning_rate": 0.00014037551151408167, "loss": 0.0651, "theoretical_loss": 3.363204785628643, "tokens_seen": 2841378816 }, { "epoch": 0.72, "learning_rate": 0.00014033539276257724, "loss": 0.0658, "theoretical_loss": 3.3631928275186973, "tokens_seen": 2841509888 }, { "epoch": 0.72, "learning_rate": 0.00014029527401107278, "loss": 0.0647, "theoretical_loss": 3.363180870114777, "tokens_seen": 2841640960 }, { "epoch": 0.72, "learning_rate": 0.00014025515525956832, "loss": 0.0677, "theoretical_loss": 3.3631689134168083, "tokens_seen": 2841772032 }, { "epoch": 0.72, "learning_rate": 0.00014021503650806386, "loss": 0.0679, "theoretical_loss": 3.3631569574247164, "tokens_seen": 2841903104 }, { "epoch": 0.72, "learning_rate": 0.0001401749177565594, "loss": 0.0668, "theoretical_loss": 3.3631450021384275, "tokens_seen": 2842034176 }, { "epoch": 0.72, "learning_rate": 0.00014013479900505497, "loss": 0.0641, "theoretical_loss": 3.3631330475578674, "tokens_seen": 2842165248 }, { "epoch": 0.72, "learning_rate": 0.00014009468025355051, "loss": 0.071, "theoretical_loss": 3.3631210936829614, "tokens_seen": 2842296320 }, { "epoch": 0.72, "learning_rate": 0.00014005456150204606, "loss": 0.0639, "theoretical_loss": 3.363109140513636, "tokens_seen": 2842427392 }, { "epoch": 0.72, "learning_rate": 0.0001400144427505416, "loss": 0.067, "theoretical_loss": 3.363097188049817, "tokens_seen": 2842558464 }, { "epoch": 0.72, "objective/train/advantage_avg": 0.0004393623967189342, "objective/train/docs_used": 1033097, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3669618368148804, "objective/train/original_loss": 1.3669618368148804, "objective/train/theoretical_loss": 3.3630852362914294, "objective/train/tokens_used": 1213214176, "objective/train/value_avg": -0.005992889404296875, "objective/train/value_loss": 0.00012793233327101916, "objective/train/value_max": -4.4345855712890625e-05, "objective/train/value_min": -0.377197265625, "objective/train/value_reward_corr": 0.7383606933652521, "objective/train/value_std": 0.013153076171875, "objective/train/weight_avg": 1.0005019903182983, "objective/train/weighted_lm_loss": 1.3671168088912964, "objective/train/weights_max": 1.3319194316864014, "objective/train/weights_min": 0.6315780878067017, "theoretical_loss": 3.3630852362914294, "tokens_seen": 2842689536 }, { "epoch": 0.72, "learning_rate": 0.00013997432399903714, "loss": 0.0684, "theoretical_loss": 3.3630852362914294, "tokens_seen": 2842689536 }, { "epoch": 0.72, "learning_rate": 0.0001399342052475327, "loss": 0.0627, "theoretical_loss": 3.3630732852384, "tokens_seen": 2842820608 }, { "epoch": 0.72, "learning_rate": 0.00013989408649602825, "loss": 0.0693, "theoretical_loss": 3.363061334890654, "tokens_seen": 2842951680 }, { "epoch": 0.72, "learning_rate": 0.0001398539677445238, "loss": 0.0676, "theoretical_loss": 3.363049385248117, "tokens_seen": 2843082752 }, { "epoch": 0.72, "learning_rate": 0.00013981384899301933, "loss": 0.0654, "theoretical_loss": 3.363037436310716, "tokens_seen": 2843213824 }, { "epoch": 0.72, "learning_rate": 0.00013977373024151487, "loss": 0.071, "theoretical_loss": 3.363025488078376, "tokens_seen": 2843344896 }, { "epoch": 0.72, "learning_rate": 0.00013973361149001044, "loss": 0.0687, "theoretical_loss": 3.363013540551023, "tokens_seen": 2843475968 }, { "epoch": 0.72, "learning_rate": 0.00013969349273850598, "loss": 0.0627, "theoretical_loss": 3.3630015937285833, "tokens_seen": 2843607040 }, { "epoch": 0.72, "learning_rate": 0.00013965337398700152, "loss": 0.0616, "theoretical_loss": 3.362989647610983, "tokens_seen": 2843738112 }, { "epoch": 0.72, "learning_rate": 0.00013961325523549706, "loss": 0.0633, "theoretical_loss": 3.3629777021981466, "tokens_seen": 2843869184 }, { "epoch": 0.72, "learning_rate": 0.0001395731364839926, "loss": 0.064, "theoretical_loss": 3.362965757490002, "tokens_seen": 2844000256 }, { "epoch": 0.72, "learning_rate": 0.00013953301773248817, "loss": 0.0673, "theoretical_loss": 3.362953813486474, "tokens_seen": 2844131328 }, { "epoch": 0.72, "learning_rate": 0.00013949289898098372, "loss": 0.0631, "theoretical_loss": 3.3629418701874885, "tokens_seen": 2844262400 }, { "epoch": 0.72, "learning_rate": 0.00013945278022947928, "loss": 0.0698, "theoretical_loss": 3.3629299275929716, "tokens_seen": 2844393472 }, { "epoch": 0.72, "learning_rate": 0.0001394126614779748, "loss": 0.0661, "theoretical_loss": 3.3629179857028495, "tokens_seen": 2844524544 }, { "epoch": 0.72, "learning_rate": 0.00013937254272647034, "loss": 0.0636, "theoretical_loss": 3.3629060445170484, "tokens_seen": 2844655616 }, { "epoch": 0.72, "learning_rate": 0.0001393324239749659, "loss": 0.0657, "theoretical_loss": 3.362894104035494, "tokens_seen": 2844786688 }, { "epoch": 0.72, "learning_rate": 0.00013929230522346145, "loss": 0.0688, "theoretical_loss": 3.3628821642581124, "tokens_seen": 2844917760 }, { "epoch": 0.72, "learning_rate": 0.00013925218647195702, "loss": 0.0682, "theoretical_loss": 3.3628702251848295, "tokens_seen": 2845048832 }, { "epoch": 0.72, "learning_rate": 0.00013921206772045253, "loss": 0.066, "theoretical_loss": 3.3628582868155714, "tokens_seen": 2845179904 }, { "epoch": 0.72, "learning_rate": 0.00013917194896894807, "loss": 0.069, "theoretical_loss": 3.3628463491502645, "tokens_seen": 2845310976 }, { "epoch": 0.72, "learning_rate": 0.00013913183021744364, "loss": 0.0621, "theoretical_loss": 3.362834412188834, "tokens_seen": 2845442048 }, { "epoch": 0.72, "learning_rate": 0.00013909171146593918, "loss": 0.063, "theoretical_loss": 3.362822475931207, "tokens_seen": 2845573120 }, { "epoch": 0.72, "learning_rate": 0.00013905159271443475, "loss": 0.063, "theoretical_loss": 3.3628105403773088, "tokens_seen": 2845704192 }, { "epoch": 0.72, "learning_rate": 0.00013901147396293026, "loss": 0.0677, "theoretical_loss": 3.362798605527066, "tokens_seen": 2845835264 }, { "epoch": 0.72, "objective/train/advantage_avg": 0.0006195381283760071, "objective/train/docs_used": 1034283, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.327319860458374, "objective/train/original_loss": 1.327319622039795, "objective/train/theoretical_loss": 3.3627866713804044, "objective/train/tokens_used": 1216490976, "objective/train/value_avg": -0.006969451904296875, "objective/train/value_loss": 0.00041040245559997857, "objective/train/value_max": -3.790855407714844e-05, "objective/train/value_min": -0.47802734375, "objective/train/value_reward_corr": 0.5761509686842871, "objective/train/value_std": 0.01568603515625, "objective/train/weight_avg": 1.0007901191711426, "objective/train/weighted_lm_loss": 1.3286657333374023, "objective/train/weights_max": 1.5088694095611572, "objective/train/weights_min": 0.394267737865448, "theoretical_loss": 3.3627866713804044, "tokens_seen": 2845966336 }, { "epoch": 0.72, "learning_rate": 0.0001389713552114258, "loss": 0.0659, "theoretical_loss": 3.3627866713804044, "tokens_seen": 2845966336 }, { "epoch": 0.72, "learning_rate": 0.00013893123645992137, "loss": 0.0665, "theoretical_loss": 3.362774737937251, "tokens_seen": 2846097408 }, { "epoch": 0.72, "learning_rate": 0.00013889111770841692, "loss": 0.0663, "theoretical_loss": 3.36276280519753, "tokens_seen": 2846228480 }, { "epoch": 0.73, "learning_rate": 0.00013885099895691248, "loss": 0.0667, "theoretical_loss": 3.3627508731611697, "tokens_seen": 2846359552 }, { "epoch": 0.73, "learning_rate": 0.00013881088020540803, "loss": 0.065, "theoretical_loss": 3.362738941828095, "tokens_seen": 2846490624 }, { "epoch": 0.73, "learning_rate": 0.00013877076145390354, "loss": 0.0638, "theoretical_loss": 3.362727011198232, "tokens_seen": 2846621696 }, { "epoch": 0.73, "learning_rate": 0.0001387306427023991, "loss": 0.068, "theoretical_loss": 3.3627150812715074, "tokens_seen": 2846752768 }, { "epoch": 0.73, "learning_rate": 0.00013869052395089465, "loss": 0.0655, "theoretical_loss": 3.362703152047848, "tokens_seen": 2846883840 }, { "epoch": 0.73, "learning_rate": 0.00013865040519939022, "loss": 0.0628, "theoretical_loss": 3.362691223527178, "tokens_seen": 2847014912 }, { "epoch": 0.73, "learning_rate": 0.00013861028644788576, "loss": 0.0644, "theoretical_loss": 3.362679295709426, "tokens_seen": 2847145984 }, { "epoch": 0.73, "learning_rate": 0.00013857016769638127, "loss": 0.0662, "theoretical_loss": 3.362667368594516, "tokens_seen": 2847277056 }, { "epoch": 0.73, "learning_rate": 0.00013853004894487684, "loss": 0.065, "theoretical_loss": 3.3626554421823758, "tokens_seen": 2847408128 }, { "epoch": 0.73, "learning_rate": 0.00013848993019337238, "loss": 0.0645, "theoretical_loss": 3.3626435164729314, "tokens_seen": 2847539200 }, { "epoch": 0.73, "learning_rate": 0.00013844981144186795, "loss": 0.0681, "theoretical_loss": 3.3626315914661085, "tokens_seen": 2847670272 }, { "epoch": 0.73, "learning_rate": 0.0001384096926903635, "loss": 0.0667, "theoretical_loss": 3.3626196671618334, "tokens_seen": 2847801344 }, { "epoch": 0.73, "learning_rate": 0.000138369573938859, "loss": 0.0661, "theoretical_loss": 3.362607743560033, "tokens_seen": 2847932416 }, { "epoch": 0.73, "learning_rate": 0.00013832945518735458, "loss": 0.0652, "theoretical_loss": 3.362595820660633, "tokens_seen": 2848063488 }, { "epoch": 0.73, "learning_rate": 0.00013828933643585012, "loss": 0.0641, "theoretical_loss": 3.36258389846356, "tokens_seen": 2848194560 }, { "epoch": 0.73, "learning_rate": 0.00013824921768434568, "loss": 0.0669, "theoretical_loss": 3.3625719769687406, "tokens_seen": 2848325632 }, { "epoch": 0.73, "learning_rate": 0.00013820909893284123, "loss": 0.0667, "theoretical_loss": 3.3625600561761, "tokens_seen": 2848456704 }, { "epoch": 0.73, "learning_rate": 0.00013816898018133674, "loss": 0.0674, "theoretical_loss": 3.362548136085566, "tokens_seen": 2848587776 }, { "epoch": 0.73, "learning_rate": 0.0001381288614298323, "loss": 0.0671, "theoretical_loss": 3.362536216697064, "tokens_seen": 2848718848 }, { "epoch": 0.73, "learning_rate": 0.00013808874267832785, "loss": 0.0653, "theoretical_loss": 3.3625242980105203, "tokens_seen": 2848849920 }, { "epoch": 0.73, "learning_rate": 0.00013804862392682342, "loss": 0.0664, "theoretical_loss": 3.362512380025862, "tokens_seen": 2848980992 }, { "epoch": 0.73, "learning_rate": 0.00013800850517531896, "loss": 0.0652, "theoretical_loss": 3.362500462743015, "tokens_seen": 2849112064 }, { "epoch": 0.73, "objective/train/advantage_avg": 0.0014490442117676139, "objective/train/docs_used": 1035566, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2591935396194458, "objective/train/original_loss": 1.2591936588287354, "objective/train/theoretical_loss": 3.3624885461619054, "objective/train/tokens_used": 1219767776, "objective/train/value_avg": -0.00991058349609375, "objective/train/value_loss": 0.00030698615591973066, "objective/train/value_max": -5.346536636352539e-05, "objective/train/value_min": -0.320068359375, "objective/train/value_reward_corr": 0.5410916322171793, "objective/train/value_std": 0.01690673828125, "objective/train/weight_avg": 1.0015928745269775, "objective/train/weighted_lm_loss": 1.2603315114974976, "objective/train/weights_max": 1.377221941947937, "objective/train/weights_min": 0.3783486783504486, "theoretical_loss": 3.3624885461619054, "tokens_seen": 2849243136 }, { "epoch": 0.73, "learning_rate": 0.00013796838642381447, "loss": 0.0653, "theoretical_loss": 3.3624885461619054, "tokens_seen": 2849243136 }, { "epoch": 0.73, "learning_rate": 0.00013792826767231004, "loss": 0.0649, "theoretical_loss": 3.36247663028246, "tokens_seen": 2849374208 }, { "epoch": 0.73, "learning_rate": 0.00013788814892080558, "loss": 0.0645, "theoretical_loss": 3.362464715104605, "tokens_seen": 2849505280 }, { "epoch": 0.73, "learning_rate": 0.00013784803016930115, "loss": 0.0686, "theoretical_loss": 3.3624528006282675, "tokens_seen": 2849636352 }, { "epoch": 0.73, "learning_rate": 0.0001378079114177967, "loss": 0.0629, "theoretical_loss": 3.362440886853373, "tokens_seen": 2849767424 }, { "epoch": 0.73, "learning_rate": 0.0001377677926662922, "loss": 0.0654, "theoretical_loss": 3.3624289737798483, "tokens_seen": 2849898496 }, { "epoch": 0.73, "learning_rate": 0.00013772767391478778, "loss": 0.0665, "theoretical_loss": 3.36241706140762, "tokens_seen": 2850029568 }, { "epoch": 0.73, "learning_rate": 0.00013768755516328332, "loss": 0.0633, "theoretical_loss": 3.3624051497366145, "tokens_seen": 2850160640 }, { "epoch": 0.73, "learning_rate": 0.00013764743641177889, "loss": 0.0665, "theoretical_loss": 3.3623932387667583, "tokens_seen": 2850291712 }, { "epoch": 0.73, "learning_rate": 0.00013760731766027443, "loss": 0.0642, "theoretical_loss": 3.3623813284979778, "tokens_seen": 2850422784 }, { "epoch": 0.73, "learning_rate": 0.00013756719890876994, "loss": 0.0675, "theoretical_loss": 3.362369418930199, "tokens_seen": 2850553856 }, { "epoch": 0.73, "learning_rate": 0.0001375270801572655, "loss": 0.0667, "theoretical_loss": 3.3623575100633496, "tokens_seen": 2850684928 }, { "epoch": 0.73, "learning_rate": 0.00013748696140576105, "loss": 0.0675, "theoretical_loss": 3.3623456018973554, "tokens_seen": 2850816000 }, { "epoch": 0.73, "learning_rate": 0.00013744684265425662, "loss": 0.0627, "theoretical_loss": 3.362333694432143, "tokens_seen": 2850947072 }, { "epoch": 0.73, "learning_rate": 0.00013740672390275216, "loss": 0.0647, "theoretical_loss": 3.362321787667639, "tokens_seen": 2851078144 }, { "epoch": 0.73, "learning_rate": 0.00013736660515124767, "loss": 0.0619, "theoretical_loss": 3.3623098816037693, "tokens_seen": 2851209216 }, { "epoch": 0.73, "learning_rate": 0.00013732648639974324, "loss": 0.0662, "theoretical_loss": 3.3622979762404617, "tokens_seen": 2851340288 }, { "epoch": 0.73, "learning_rate": 0.00013728636764823878, "loss": 0.0624, "theoretical_loss": 3.3622860715776417, "tokens_seen": 2851471360 }, { "epoch": 0.73, "learning_rate": 0.00013724624889673435, "loss": 0.0643, "theoretical_loss": 3.3622741676152366, "tokens_seen": 2851602432 }, { "epoch": 0.73, "learning_rate": 0.0001372061301452299, "loss": 0.0638, "theoretical_loss": 3.3622622643531725, "tokens_seen": 2851733504 }, { "epoch": 0.73, "learning_rate": 0.0001371660113937254, "loss": 0.064, "theoretical_loss": 3.3622503617913764, "tokens_seen": 2851864576 }, { "epoch": 0.73, "learning_rate": 0.00013712589264222098, "loss": 0.0615, "theoretical_loss": 3.3622384599297748, "tokens_seen": 2851995648 }, { "epoch": 0.73, "learning_rate": 0.00013708577389071652, "loss": 0.0663, "theoretical_loss": 3.362226558768294, "tokens_seen": 2852126720 }, { "epoch": 0.73, "learning_rate": 0.00013704565513921209, "loss": 0.0616, "theoretical_loss": 3.3622146583068613, "tokens_seen": 2852257792 }, { "epoch": 0.73, "learning_rate": 0.00013700553638770763, "loss": 0.0635, "theoretical_loss": 3.3622027585454024, "tokens_seen": 2852388864 }, { "epoch": 0.73, "objective/train/advantage_avg": 0.0003668861754704267, "objective/train/docs_used": 1036772, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4189653396606445, "objective/train/original_loss": 1.4189653396606445, "objective/train/theoretical_loss": 3.362190859483845, "objective/train/tokens_used": 1223044576, "objective/train/value_avg": -0.00823211669921875, "objective/train/value_loss": 0.0003074555716011673, "objective/train/value_max": -3.349781036376953e-05, "objective/train/value_min": -0.2313232421875, "objective/train/value_reward_corr": 0.5715057020952345, "objective/train/value_std": 0.012176513671875, "objective/train/weight_avg": 1.0004957914352417, "objective/train/weighted_lm_loss": 1.4188746213912964, "objective/train/weights_max": 1.1466456651687622, "objective/train/weights_min": 0.3694700598716736, "theoretical_loss": 3.362190859483845, "tokens_seen": 2852519936 }, { "epoch": 0.73, "learning_rate": 0.00013696541763620314, "loss": 0.0689, "theoretical_loss": 3.362190859483845, "tokens_seen": 2852519936 }, { "epoch": 0.73, "learning_rate": 0.0001369252988846987, "loss": 0.0677, "theoretical_loss": 3.3621789611221153, "tokens_seen": 2852651008 }, { "epoch": 0.73, "learning_rate": 0.00013688518013319425, "loss": 0.0618, "theoretical_loss": 3.36216706346014, "tokens_seen": 2852782080 }, { "epoch": 0.73, "learning_rate": 0.00013684506138168982, "loss": 0.0661, "theoretical_loss": 3.362155166497846, "tokens_seen": 2852913152 }, { "epoch": 0.73, "learning_rate": 0.00013680494263018536, "loss": 0.0629, "theoretical_loss": 3.3621432702351597, "tokens_seen": 2853044224 }, { "epoch": 0.73, "learning_rate": 0.0001367648238786809, "loss": 0.0674, "theoretical_loss": 3.362131374672008, "tokens_seen": 2853175296 }, { "epoch": 0.73, "learning_rate": 0.00013672470512717644, "loss": 0.0641, "theoretical_loss": 3.3621194798083174, "tokens_seen": 2853306368 }, { "epoch": 0.73, "learning_rate": 0.00013668458637567198, "loss": 0.0647, "theoretical_loss": 3.362107585644015, "tokens_seen": 2853437440 }, { "epoch": 0.73, "learning_rate": 0.00013664446762416755, "loss": 0.0643, "theoretical_loss": 3.362095692179027, "tokens_seen": 2853568512 }, { "epoch": 0.73, "learning_rate": 0.0001366043488726631, "loss": 0.0653, "theoretical_loss": 3.362083799413281, "tokens_seen": 2853699584 }, { "epoch": 0.73, "learning_rate": 0.00013656423012115864, "loss": 0.0677, "theoretical_loss": 3.3620719073467034, "tokens_seen": 2853830656 }, { "epoch": 0.73, "learning_rate": 0.00013652411136965418, "loss": 0.0634, "theoretical_loss": 3.3620600159792207, "tokens_seen": 2853961728 }, { "epoch": 0.73, "learning_rate": 0.00013648399261814972, "loss": 0.0641, "theoretical_loss": 3.3620481253107597, "tokens_seen": 2854092800 }, { "epoch": 0.73, "learning_rate": 0.00013644387386664529, "loss": 0.0633, "theoretical_loss": 3.3620362353412476, "tokens_seen": 2854223872 }, { "epoch": 0.73, "learning_rate": 0.00013640375511514083, "loss": 0.0647, "theoretical_loss": 3.362024346070611, "tokens_seen": 2854354944 }, { "epoch": 0.73, "learning_rate": 0.00013636363636363637, "loss": 0.0653, "theoretical_loss": 3.362012457498777, "tokens_seen": 2854486016 }, { "epoch": 0.73, "learning_rate": 0.0001363235176121319, "loss": 0.0644, "theoretical_loss": 3.362000569625672, "tokens_seen": 2854617088 }, { "epoch": 0.73, "learning_rate": 0.00013628339886062745, "loss": 0.0676, "theoretical_loss": 3.361988682451223, "tokens_seen": 2854748160 }, { "epoch": 0.73, "learning_rate": 0.00013624328010912302, "loss": 0.0636, "theoretical_loss": 3.361976795975357, "tokens_seen": 2854879232 }, { "epoch": 0.73, "learning_rate": 0.00013620316135761856, "loss": 0.0631, "theoretical_loss": 3.361964910198001, "tokens_seen": 2855010304 }, { "epoch": 0.73, "learning_rate": 0.0001361630426061141, "loss": 0.0655, "theoretical_loss": 3.3619530251190817, "tokens_seen": 2855141376 }, { "epoch": 0.73, "learning_rate": 0.00013612292385460964, "loss": 0.0655, "theoretical_loss": 3.3619411407385256, "tokens_seen": 2855272448 }, { "epoch": 0.73, "learning_rate": 0.00013608280510310519, "loss": 0.0669, "theoretical_loss": 3.3619292570562602, "tokens_seen": 2855403520 }, { "epoch": 0.73, "learning_rate": 0.00013604268635160075, "loss": 0.0678, "theoretical_loss": 3.3619173740722124, "tokens_seen": 2855534592 }, { "epoch": 0.73, "learning_rate": 0.0001360025676000963, "loss": 0.0643, "theoretical_loss": 3.3619054917863087, "tokens_seen": 2855665664 }, { "epoch": 0.73, "objective/train/advantage_avg": 0.0002977001422550529, "objective/train/docs_used": 1037860, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3654943704605103, "objective/train/original_loss": 1.3654943704605103, "objective/train/theoretical_loss": 3.361893610198476, "objective/train/tokens_used": 1226321376, "objective/train/value_avg": -0.0053863525390625, "objective/train/value_loss": 0.0001252575748367235, "objective/train/value_max": -2.7954578399658203e-05, "objective/train/value_min": -0.2017822265625, "objective/train/value_reward_corr": 0.6313819482328016, "objective/train/value_std": 0.0090179443359375, "objective/train/weight_avg": 1.000355839729309, "objective/train/weighted_lm_loss": 1.3656089305877686, "objective/train/weights_max": 1.1670680046081543, "objective/train/weights_min": 0.3729492723941803, "theoretical_loss": 3.361893610198476, "tokens_seen": 2855796736 }, { "epoch": 0.73, "learning_rate": 0.00013596244884859184, "loss": 0.066, "theoretical_loss": 3.361893610198476, "tokens_seen": 2855796736 }, { "epoch": 0.73, "learning_rate": 0.00013592233009708738, "loss": 0.0652, "theoretical_loss": 3.3618817293086423, "tokens_seen": 2855927808 }, { "epoch": 0.73, "learning_rate": 0.00013588221134558292, "loss": 0.0649, "theoretical_loss": 3.3618698491167334, "tokens_seen": 2856058880 }, { "epoch": 0.73, "learning_rate": 0.0001358420925940785, "loss": 0.0669, "theoretical_loss": 3.3618579696226765, "tokens_seen": 2856189952 }, { "epoch": 0.73, "learning_rate": 0.00013580197384257403, "loss": 0.0667, "theoretical_loss": 3.3618460908263987, "tokens_seen": 2856321024 }, { "epoch": 0.73, "learning_rate": 0.00013576185509106957, "loss": 0.067, "theoretical_loss": 3.3618342127278273, "tokens_seen": 2856452096 }, { "epoch": 0.73, "learning_rate": 0.0001357217363395651, "loss": 0.068, "theoretical_loss": 3.361822335326889, "tokens_seen": 2856583168 }, { "epoch": 0.73, "learning_rate": 0.00013568161758806065, "loss": 0.0688, "theoretical_loss": 3.3618104586235114, "tokens_seen": 2856714240 }, { "epoch": 0.73, "learning_rate": 0.00013564149883655622, "loss": 0.0634, "theoretical_loss": 3.3617985826176207, "tokens_seen": 2856845312 }, { "epoch": 0.73, "learning_rate": 0.00013560138008505176, "loss": 0.0658, "theoretical_loss": 3.361786707309144, "tokens_seen": 2856976384 }, { "epoch": 0.73, "learning_rate": 0.0001355612613335473, "loss": 0.071, "theoretical_loss": 3.361774832698009, "tokens_seen": 2857107456 }, { "epoch": 0.73, "learning_rate": 0.00013552114258204284, "loss": 0.0652, "theoretical_loss": 3.361762958784142, "tokens_seen": 2857238528 }, { "epoch": 0.73, "learning_rate": 0.00013548102383053839, "loss": 0.0638, "theoretical_loss": 3.3617510855674704, "tokens_seen": 2857369600 }, { "epoch": 0.73, "learning_rate": 0.00013544090507903395, "loss": 0.0674, "theoretical_loss": 3.3617392130479216, "tokens_seen": 2857500672 }, { "epoch": 0.73, "learning_rate": 0.0001354007863275295, "loss": 0.0653, "theoretical_loss": 3.3617273412254223, "tokens_seen": 2857631744 }, { "epoch": 0.73, "learning_rate": 0.00013536066757602504, "loss": 0.0665, "theoretical_loss": 3.3617154700998997, "tokens_seen": 2857762816 }, { "epoch": 0.73, "learning_rate": 0.00013532054882452058, "loss": 0.0661, "theoretical_loss": 3.361703599671281, "tokens_seen": 2857893888 }, { "epoch": 0.73, "learning_rate": 0.00013528043007301612, "loss": 0.0671, "theoretical_loss": 3.3616917299394933, "tokens_seen": 2858024960 }, { "epoch": 0.73, "learning_rate": 0.0001352403113215117, "loss": 0.0641, "theoretical_loss": 3.361679860904464, "tokens_seen": 2858156032 }, { "epoch": 0.73, "learning_rate": 0.00013520019257000723, "loss": 0.0681, "theoretical_loss": 3.3616679925661197, "tokens_seen": 2858287104 }, { "epoch": 0.73, "learning_rate": 0.00013516007381850277, "loss": 0.0625, "theoretical_loss": 3.3616561249243877, "tokens_seen": 2858418176 }, { "epoch": 0.73, "learning_rate": 0.0001351199550669983, "loss": 0.0631, "theoretical_loss": 3.3616442579791954, "tokens_seen": 2858549248 }, { "epoch": 0.73, "learning_rate": 0.00013507983631549385, "loss": 0.0642, "theoretical_loss": 3.36163239173047, "tokens_seen": 2858680320 }, { "epoch": 0.73, "learning_rate": 0.00013503971756398942, "loss": 0.0687, "theoretical_loss": 3.3616205261781382, "tokens_seen": 2858811392 }, { "epoch": 0.73, "learning_rate": 0.00013499959881248496, "loss": 0.0619, "theoretical_loss": 3.361608661322128, "tokens_seen": 2858942464 }, { "epoch": 0.73, "objective/train/advantage_avg": -5.613389930658741e-06, "objective/train/docs_used": 1039109, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3117191791534424, "objective/train/original_loss": 1.311719298362732, "objective/train/theoretical_loss": 3.361596797162366, "objective/train/tokens_used": 1229598176, "objective/train/value_avg": -0.007610321044921875, "objective/train/value_loss": 0.0001732174278004095, "objective/train/value_max": -2.467632293701172e-05, "objective/train/value_min": -0.281494140625, "objective/train/value_reward_corr": 0.6762052948056612, "objective/train/value_std": 0.01218414306640625, "objective/train/weight_avg": 1.0000758171081543, "objective/train/weighted_lm_loss": 1.3115131855010986, "objective/train/weights_max": 1.1034588813781738, "objective/train/weights_min": 0.3886411488056183, "theoretical_loss": 3.361596797162366, "tokens_seen": 2859073536 }, { "epoch": 0.73, "learning_rate": 0.0001349594800609805, "loss": 0.0637, "theoretical_loss": 3.361596797162366, "tokens_seen": 2859073536 }, { "epoch": 0.73, "learning_rate": 0.00013491936130947604, "loss": 0.0639, "theoretical_loss": 3.3615849336987798, "tokens_seen": 2859204608 }, { "epoch": 0.73, "learning_rate": 0.00013487924255797159, "loss": 0.0664, "theoretical_loss": 3.3615730709312963, "tokens_seen": 2859335680 }, { "epoch": 0.73, "learning_rate": 0.00013483912380646715, "loss": 0.0679, "theoretical_loss": 3.361561208859843, "tokens_seen": 2859466752 }, { "epoch": 0.73, "learning_rate": 0.0001347990050549627, "loss": 0.0689, "theoretical_loss": 3.361549347484347, "tokens_seen": 2859597824 }, { "epoch": 0.73, "learning_rate": 0.00013475888630345824, "loss": 0.0643, "theoretical_loss": 3.361537486804736, "tokens_seen": 2859728896 }, { "epoch": 0.73, "learning_rate": 0.00013471876755195378, "loss": 0.065, "theoretical_loss": 3.3615256268209364, "tokens_seen": 2859859968 }, { "epoch": 0.73, "learning_rate": 0.00013467864880044932, "loss": 0.0675, "theoretical_loss": 3.3615137675328763, "tokens_seen": 2859991040 }, { "epoch": 0.73, "learning_rate": 0.0001346385300489449, "loss": 0.0638, "theoretical_loss": 3.3615019089404825, "tokens_seen": 2860122112 }, { "epoch": 0.73, "learning_rate": 0.00013459841129744043, "loss": 0.0653, "theoretical_loss": 3.361490051043683, "tokens_seen": 2860253184 }, { "epoch": 0.73, "learning_rate": 0.00013455829254593597, "loss": 0.0642, "theoretical_loss": 3.3614781938424043, "tokens_seen": 2860384256 }, { "epoch": 0.73, "learning_rate": 0.0001345181737944315, "loss": 0.0648, "theoretical_loss": 3.361466337336574, "tokens_seen": 2860515328 }, { "epoch": 0.73, "learning_rate": 0.00013447805504292705, "loss": 0.0633, "theoretical_loss": 3.3614544815261196, "tokens_seen": 2860646400 }, { "epoch": 0.73, "learning_rate": 0.00013443793629142262, "loss": 0.0655, "theoretical_loss": 3.3614426264109687, "tokens_seen": 2860777472 }, { "epoch": 0.73, "learning_rate": 0.00013439781753991816, "loss": 0.065, "theoretical_loss": 3.3614307719910483, "tokens_seen": 2860908544 }, { "epoch": 0.73, "learning_rate": 0.0001343576987884137, "loss": 0.0659, "theoretical_loss": 3.3614189182662857, "tokens_seen": 2861039616 }, { "epoch": 0.73, "learning_rate": 0.00013431758003690925, "loss": 0.067, "theoretical_loss": 3.3614070652366084, "tokens_seen": 2861170688 }, { "epoch": 0.73, "learning_rate": 0.00013427746128540481, "loss": 0.0659, "theoretical_loss": 3.361395212901944, "tokens_seen": 2861301760 }, { "epoch": 0.73, "learning_rate": 0.00013423734253390036, "loss": 0.0611, "theoretical_loss": 3.3613833612622193, "tokens_seen": 2861432832 }, { "epoch": 0.73, "learning_rate": 0.0001341972237823959, "loss": 0.0657, "theoretical_loss": 3.3613715103173627, "tokens_seen": 2861563904 }, { "epoch": 0.73, "learning_rate": 0.00013415710503089144, "loss": 0.064, "theoretical_loss": 3.361359660067301, "tokens_seen": 2861694976 }, { "epoch": 0.73, "learning_rate": 0.00013411698627938698, "loss": 0.0651, "theoretical_loss": 3.361347810511962, "tokens_seen": 2861826048 }, { "epoch": 0.73, "learning_rate": 0.00013407686752788255, "loss": 0.0676, "theoretical_loss": 3.3613359616512724, "tokens_seen": 2861957120 }, { "epoch": 0.73, "learning_rate": 0.0001340367487763781, "loss": 0.0673, "theoretical_loss": 3.36132411348516, "tokens_seen": 2862088192 }, { "epoch": 0.73, "learning_rate": 0.00013399663002487363, "loss": 0.0641, "theoretical_loss": 3.3613122660135533, "tokens_seen": 2862219264 }, { "epoch": 0.73, "objective/train/advantage_avg": -0.00018165487563237548, "objective/train/docs_used": 1040389, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.152327537536621, "objective/train/original_loss": 1.152327299118042, "objective/train/theoretical_loss": 3.3613004192363785, "objective/train/tokens_used": 1232874976, "objective/train/value_avg": -0.00969696044921875, "objective/train/value_loss": 0.00020525077707134187, "objective/train/value_max": -5.3048133850097656e-05, "objective/train/value_min": -0.31005859375, "objective/train/value_reward_corr": 0.8177291979739908, "objective/train/value_std": 0.0177001953125, "objective/train/weight_avg": 0.9999122023582458, "objective/train/weighted_lm_loss": 1.1518640518188477, "objective/train/weights_max": 1.2077035903930664, "objective/train/weights_min": 0.3752497136592865, "theoretical_loss": 3.3613004192363785, "tokens_seen": 2862350336 }, { "epoch": 0.73, "learning_rate": 0.00013395651127336917, "loss": 0.064, "theoretical_loss": 3.3613004192363785, "tokens_seen": 2862350336 }, { "epoch": 0.73, "learning_rate": 0.0001339163925218647, "loss": 0.0619, "theoretical_loss": 3.3612885731535633, "tokens_seen": 2862481408 }, { "epoch": 0.73, "learning_rate": 0.00013387627377036028, "loss": 0.067, "theoretical_loss": 3.361276727765036, "tokens_seen": 2862612480 }, { "epoch": 0.74, "learning_rate": 0.00013383615501885582, "loss": 0.0675, "theoretical_loss": 3.3612648830707235, "tokens_seen": 2862743552 }, { "epoch": 0.74, "learning_rate": 0.00013379603626735136, "loss": 0.0643, "theoretical_loss": 3.361253039070553, "tokens_seen": 2862874624 }, { "epoch": 0.74, "learning_rate": 0.0001337559175158469, "loss": 0.0655, "theoretical_loss": 3.361241195764453, "tokens_seen": 2863005696 }, { "epoch": 0.74, "learning_rate": 0.00013371579876434245, "loss": 0.0633, "theoretical_loss": 3.36122935315235, "tokens_seen": 2863136768 }, { "epoch": 0.74, "learning_rate": 0.00013367568001283801, "loss": 0.0668, "theoretical_loss": 3.3612175112341722, "tokens_seen": 2863267840 }, { "epoch": 0.74, "learning_rate": 0.00013363556126133356, "loss": 0.0665, "theoretical_loss": 3.361205670009847, "tokens_seen": 2863398912 }, { "epoch": 0.74, "learning_rate": 0.0001335954425098291, "loss": 0.0649, "theoretical_loss": 3.3611938294793022, "tokens_seen": 2863529984 }, { "epoch": 0.74, "learning_rate": 0.00013355532375832464, "loss": 0.0687, "theoretical_loss": 3.361181989642465, "tokens_seen": 2863661056 }, { "epoch": 0.74, "learning_rate": 0.00013351520500682018, "loss": 0.065, "theoretical_loss": 3.3611701504992637, "tokens_seen": 2863792128 }, { "epoch": 0.74, "learning_rate": 0.00013347508625531575, "loss": 0.0652, "theoretical_loss": 3.3611583120496253, "tokens_seen": 2863923200 }, { "epoch": 0.74, "learning_rate": 0.0001334349675038113, "loss": 0.065, "theoretical_loss": 3.3611464742934776, "tokens_seen": 2864054272 }, { "epoch": 0.74, "learning_rate": 0.00013339484875230683, "loss": 0.065, "theoretical_loss": 3.361134637230748, "tokens_seen": 2864185344 }, { "epoch": 0.74, "learning_rate": 0.00013335473000080237, "loss": 0.0643, "theoretical_loss": 3.3611228008613647, "tokens_seen": 2864316416 }, { "epoch": 0.74, "learning_rate": 0.0001333146112492979, "loss": 0.0636, "theoretical_loss": 3.361110965185255, "tokens_seen": 2864447488 }, { "epoch": 0.74, "learning_rate": 0.00013327449249779348, "loss": 0.0638, "theoretical_loss": 3.361099130202346, "tokens_seen": 2864578560 }, { "epoch": 0.74, "learning_rate": 0.00013323437374628902, "loss": 0.0644, "theoretical_loss": 3.361087295912567, "tokens_seen": 2864709632 }, { "epoch": 0.74, "learning_rate": 0.00013319425499478456, "loss": 0.0615, "theoretical_loss": 3.361075462315844, "tokens_seen": 2864840704 }, { "epoch": 0.74, "learning_rate": 0.0001331541362432801, "loss": 0.0657, "theoretical_loss": 3.3610636294121057, "tokens_seen": 2864971776 }, { "epoch": 0.74, "learning_rate": 0.00013311401749177565, "loss": 0.0646, "theoretical_loss": 3.3610517972012794, "tokens_seen": 2865102848 }, { "epoch": 0.74, "learning_rate": 0.00013307389874027121, "loss": 0.0663, "theoretical_loss": 3.361039965683293, "tokens_seen": 2865233920 }, { "epoch": 0.74, "learning_rate": 0.00013303377998876676, "loss": 0.0624, "theoretical_loss": 3.3610281348580746, "tokens_seen": 2865364992 }, { "epoch": 0.74, "learning_rate": 0.0001329936612372623, "loss": 0.0647, "theoretical_loss": 3.361016304725551, "tokens_seen": 2865496064 }, { "epoch": 0.74, "objective/train/advantage_avg": 0.0005463280249387026, "objective/train/docs_used": 1041644, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.304354190826416, "objective/train/original_loss": 1.304354190826416, "objective/train/theoretical_loss": 3.3610044752856507, "objective/train/tokens_used": 1236151776, "objective/train/value_avg": -0.005382537841796875, "objective/train/value_loss": 0.00027012094506062567, "objective/train/value_max": -4.035234451293945e-05, "objective/train/value_min": -0.97216796875, "objective/train/value_reward_corr": 0.8238349722980827, "objective/train/value_std": 0.021026611328125, "objective/train/weight_avg": 1.0006674528121948, "objective/train/weighted_lm_loss": 1.305031657218933, "objective/train/weights_max": 1.9906805753707886, "objective/train/weights_min": 0.3749292194843292, "theoretical_loss": 3.3610044752856507, "tokens_seen": 2865627136 }, { "epoch": 0.74, "learning_rate": 0.00013295354248575784, "loss": 0.0646, "theoretical_loss": 3.3610044752856507, "tokens_seen": 2865627136 }, { "epoch": 0.74, "learning_rate": 0.00013291342373425338, "loss": 0.068, "theoretical_loss": 3.360992646538301, "tokens_seen": 2865758208 }, { "epoch": 0.74, "learning_rate": 0.00013287330498274895, "loss": 0.0661, "theoretical_loss": 3.3609808184834304, "tokens_seen": 2865889280 }, { "epoch": 0.74, "learning_rate": 0.0001328331862312445, "loss": 0.0653, "theoretical_loss": 3.3609689911209664, "tokens_seen": 2866020352 }, { "epoch": 0.74, "learning_rate": 0.00013279306747974003, "loss": 0.0624, "theoretical_loss": 3.360957164450836, "tokens_seen": 2866151424 }, { "epoch": 0.74, "learning_rate": 0.00013275294872823557, "loss": 0.0653, "theoretical_loss": 3.360945338472968, "tokens_seen": 2866282496 }, { "epoch": 0.74, "learning_rate": 0.00013271282997673111, "loss": 0.0661, "theoretical_loss": 3.36093351318729, "tokens_seen": 2866413568 }, { "epoch": 0.74, "learning_rate": 0.00013267271122522668, "loss": 0.0658, "theoretical_loss": 3.36092168859373, "tokens_seen": 2866544640 }, { "epoch": 0.74, "learning_rate": 0.00013263259247372222, "loss": 0.0645, "theoretical_loss": 3.360909864692215, "tokens_seen": 2866675712 }, { "epoch": 0.74, "learning_rate": 0.00013259247372221776, "loss": 0.0629, "theoretical_loss": 3.3608980414826735, "tokens_seen": 2866806784 }, { "epoch": 0.74, "learning_rate": 0.0001325523549707133, "loss": 0.0665, "theoretical_loss": 3.3608862189650335, "tokens_seen": 2866937856 }, { "epoch": 0.74, "learning_rate": 0.00013251223621920885, "loss": 0.0656, "theoretical_loss": 3.3608743971392228, "tokens_seen": 2867068928 }, { "epoch": 0.74, "learning_rate": 0.00013247211746770442, "loss": 0.0664, "theoretical_loss": 3.360862576005169, "tokens_seen": 2867200000 }, { "epoch": 0.74, "learning_rate": 0.00013243199871619996, "loss": 0.0633, "theoretical_loss": 3.3608507555628, "tokens_seen": 2867331072 }, { "epoch": 0.74, "learning_rate": 0.0001323918799646955, "loss": 0.0667, "theoretical_loss": 3.360838935812044, "tokens_seen": 2867462144 }, { "epoch": 0.74, "learning_rate": 0.00013235176121319104, "loss": 0.0673, "theoretical_loss": 3.360827116752829, "tokens_seen": 2867593216 }, { "epoch": 0.74, "learning_rate": 0.00013231164246168658, "loss": 0.0665, "theoretical_loss": 3.3608152983850825, "tokens_seen": 2867724288 }, { "epoch": 0.74, "learning_rate": 0.00013227152371018215, "loss": 0.0646, "theoretical_loss": 3.360803480708733, "tokens_seen": 2867855360 }, { "epoch": 0.74, "learning_rate": 0.0001322314049586777, "loss": 0.0614, "theoretical_loss": 3.360791663723708, "tokens_seen": 2867986432 }, { "epoch": 0.74, "learning_rate": 0.00013219128620717323, "loss": 0.067, "theoretical_loss": 3.360779847429936, "tokens_seen": 2868117504 }, { "epoch": 0.74, "learning_rate": 0.00013215116745566877, "loss": 0.0664, "theoretical_loss": 3.360768031827344, "tokens_seen": 2868248576 }, { "epoch": 0.74, "learning_rate": 0.00013211104870416431, "loss": 0.0621, "theoretical_loss": 3.3607562169158607, "tokens_seen": 2868379648 }, { "epoch": 0.74, "learning_rate": 0.00013207092995265988, "loss": 0.0657, "theoretical_loss": 3.3607444026954143, "tokens_seen": 2868510720 }, { "epoch": 0.74, "learning_rate": 0.00013203081120115542, "loss": 0.0648, "theoretical_loss": 3.360732589165932, "tokens_seen": 2868641792 }, { "epoch": 0.74, "learning_rate": 0.000131990692449651, "loss": 0.0651, "theoretical_loss": 3.3607207763273426, "tokens_seen": 2868772864 }, { "epoch": 0.74, "objective/train/advantage_avg": -0.0005321147036738694, "objective/train/docs_used": 1042847, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2634778022766113, "objective/train/original_loss": 1.2634778022766113, "objective/train/theoretical_loss": 3.3607089641795738, "objective/train/tokens_used": 1239428576, "objective/train/value_avg": -0.007198333740234375, "objective/train/value_loss": 0.0002403511170996353, "objective/train/value_max": -5.227327346801758e-05, "objective/train/value_min": -0.352783203125, "objective/train/value_reward_corr": 0.7548222655327702, "objective/train/value_std": 0.0162811279296875, "objective/train/weight_avg": 0.9995782971382141, "objective/train/weighted_lm_loss": 1.2628518342971802, "objective/train/weights_max": 1.1682624816894531, "objective/train/weights_min": 0.37240904569625854, "theoretical_loss": 3.3607089641795738, "tokens_seen": 2868903936 }, { "epoch": 0.74, "learning_rate": 0.0001319505736981465, "loss": 0.0638, "theoretical_loss": 3.3607089641795738, "tokens_seen": 2868903936 }, { "epoch": 0.74, "learning_rate": 0.00013191045494664205, "loss": 0.0652, "theoretical_loss": 3.3606971527225538, "tokens_seen": 2869035008 }, { "epoch": 0.74, "learning_rate": 0.00013187033619513762, "loss": 0.0637, "theoretical_loss": 3.36068534195621, "tokens_seen": 2869166080 }, { "epoch": 0.74, "learning_rate": 0.00013183021744363316, "loss": 0.0644, "theoretical_loss": 3.3606735318804715, "tokens_seen": 2869297152 }, { "epoch": 0.74, "learning_rate": 0.00013179009869212873, "loss": 0.0642, "theoretical_loss": 3.3606617224952657, "tokens_seen": 2869428224 }, { "epoch": 0.74, "learning_rate": 0.00013174997994062424, "loss": 0.064, "theoretical_loss": 3.360649913800521, "tokens_seen": 2869559296 }, { "epoch": 0.74, "learning_rate": 0.00013170986118911978, "loss": 0.064, "theoretical_loss": 3.3606381057961654, "tokens_seen": 2869690368 }, { "epoch": 0.74, "learning_rate": 0.00013166974243761535, "loss": 0.0665, "theoretical_loss": 3.360626298482127, "tokens_seen": 2869821440 }, { "epoch": 0.74, "learning_rate": 0.0001316296236861109, "loss": 0.0659, "theoretical_loss": 3.3606144918583336, "tokens_seen": 2869952512 }, { "epoch": 0.74, "learning_rate": 0.00013158950493460646, "loss": 0.0644, "theoretical_loss": 3.3606026859247136, "tokens_seen": 2870083584 }, { "epoch": 0.74, "learning_rate": 0.00013154938618310197, "loss": 0.0667, "theoretical_loss": 3.3605908806811953, "tokens_seen": 2870214656 }, { "epoch": 0.74, "learning_rate": 0.00013150926743159751, "loss": 0.0672, "theoretical_loss": 3.3605790761277063, "tokens_seen": 2870345728 }, { "epoch": 0.74, "learning_rate": 0.00013146914868009308, "loss": 0.0652, "theoretical_loss": 3.3605672722641757, "tokens_seen": 2870476800 }, { "epoch": 0.74, "learning_rate": 0.00013142902992858862, "loss": 0.0655, "theoretical_loss": 3.3605554690905306, "tokens_seen": 2870607872 }, { "epoch": 0.74, "learning_rate": 0.0001313889111770842, "loss": 0.0666, "theoretical_loss": 3.3605436666067003, "tokens_seen": 2870738944 }, { "epoch": 0.74, "learning_rate": 0.0001313487924255797, "loss": 0.067, "theoretical_loss": 3.3605318648126117, "tokens_seen": 2870870016 }, { "epoch": 0.74, "learning_rate": 0.00013130867367407525, "loss": 0.064, "theoretical_loss": 3.3605200637081944, "tokens_seen": 2871001088 }, { "epoch": 0.74, "learning_rate": 0.00013126855492257082, "loss": 0.0662, "theoretical_loss": 3.3605082632933754, "tokens_seen": 2871132160 }, { "epoch": 0.74, "learning_rate": 0.00013122843617106636, "loss": 0.062, "theoretical_loss": 3.360496463568084, "tokens_seen": 2871263232 }, { "epoch": 0.74, "learning_rate": 0.00013118831741956193, "loss": 0.0646, "theoretical_loss": 3.360484664532247, "tokens_seen": 2871394304 }, { "epoch": 0.74, "learning_rate": 0.00013114819866805744, "loss": 0.0631, "theoretical_loss": 3.360472866185794, "tokens_seen": 2871525376 }, { "epoch": 0.74, "learning_rate": 0.00013110807991655298, "loss": 0.068, "theoretical_loss": 3.360461068528653, "tokens_seen": 2871656448 }, { "epoch": 0.74, "learning_rate": 0.00013106796116504855, "loss": 0.0644, "theoretical_loss": 3.3604492715607517, "tokens_seen": 2871787520 }, { "epoch": 0.74, "learning_rate": 0.0001310278424135441, "loss": 0.0683, "theoretical_loss": 3.3604374752820187, "tokens_seen": 2871918592 }, { "epoch": 0.74, "learning_rate": 0.00013098772366203966, "loss": 0.0678, "theoretical_loss": 3.3604256796923826, "tokens_seen": 2872049664 }, { "epoch": 0.74, "objective/train/advantage_avg": 2.42861333390465e-05, "objective/train/docs_used": 1044056, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2885448932647705, "objective/train/original_loss": 1.2885446548461914, "objective/train/theoretical_loss": 3.3604138847917713, "objective/train/tokens_used": 1242705376, "objective/train/value_avg": -0.0062255859375, "objective/train/value_loss": 0.0002257262240163982, "objective/train/value_max": -1.3828277587890625e-05, "objective/train/value_min": -0.496826171875, "objective/train/value_reward_corr": 0.6460159137279275, "objective/train/value_std": 0.01323699951171875, "objective/train/weight_avg": 1.0001238584518433, "objective/train/weighted_lm_loss": 1.2885701656341553, "objective/train/weights_max": 1.5391839742660522, "objective/train/weights_min": 0.3835393786430359, "theoretical_loss": 3.3604138847917713, "tokens_seen": 2872180736 }, { "epoch": 0.74, "learning_rate": 0.0001309476049105352, "loss": 0.0625, "theoretical_loss": 3.3604138847917713, "tokens_seen": 2872180736 }, { "epoch": 0.74, "learning_rate": 0.00013090748615903072, "loss": 0.0613, "theoretical_loss": 3.360402090580113, "tokens_seen": 2872311808 }, { "epoch": 0.74, "learning_rate": 0.00013086736740752628, "loss": 0.0655, "theoretical_loss": 3.3603902970573367, "tokens_seen": 2872442880 }, { "epoch": 0.74, "learning_rate": 0.00013082724865602182, "loss": 0.064, "theoretical_loss": 3.3603785042233696, "tokens_seen": 2872573952 }, { "epoch": 0.74, "learning_rate": 0.0001307871299045174, "loss": 0.0685, "theoretical_loss": 3.3603667120781413, "tokens_seen": 2872705024 }, { "epoch": 0.74, "learning_rate": 0.00013074701115301293, "loss": 0.0691, "theoretical_loss": 3.360354920621579, "tokens_seen": 2872836096 }, { "epoch": 0.74, "learning_rate": 0.00013070689240150845, "loss": 0.0664, "theoretical_loss": 3.360343129853612, "tokens_seen": 2872967168 }, { "epoch": 0.74, "learning_rate": 0.00013066677365000402, "loss": 0.0637, "theoretical_loss": 3.3603313397741683, "tokens_seen": 2873098240 }, { "epoch": 0.74, "learning_rate": 0.00013062665489849956, "loss": 0.0617, "theoretical_loss": 3.3603195503831764, "tokens_seen": 2873229312 }, { "epoch": 0.74, "learning_rate": 0.00013058653614699513, "loss": 0.0642, "theoretical_loss": 3.3603077616805646, "tokens_seen": 2873360384 }, { "epoch": 0.74, "learning_rate": 0.00013054641739549067, "loss": 0.0633, "theoretical_loss": 3.360295973666261, "tokens_seen": 2873491456 }, { "epoch": 0.74, "learning_rate": 0.00013050629864398618, "loss": 0.0668, "theoretical_loss": 3.3602841863401944, "tokens_seen": 2873622528 }, { "epoch": 0.74, "learning_rate": 0.00013046617989248175, "loss": 0.0652, "theoretical_loss": 3.360272399702293, "tokens_seen": 2873753600 }, { "epoch": 0.74, "learning_rate": 0.0001304260611409773, "loss": 0.0654, "theoretical_loss": 3.360260613752486, "tokens_seen": 2873884672 }, { "epoch": 0.74, "learning_rate": 0.00013038594238947286, "loss": 0.062, "theoretical_loss": 3.360248828490701, "tokens_seen": 2874015744 }, { "epoch": 0.74, "learning_rate": 0.0001303458236379684, "loss": 0.0633, "theoretical_loss": 3.3602370439168663, "tokens_seen": 2874146816 }, { "epoch": 0.74, "learning_rate": 0.00013030570488646392, "loss": 0.0624, "theoretical_loss": 3.360225260030911, "tokens_seen": 2874277888 }, { "epoch": 0.74, "learning_rate": 0.00013026558613495948, "loss": 0.0703, "theoretical_loss": 3.3602134768327634, "tokens_seen": 2874408960 }, { "epoch": 0.74, "learning_rate": 0.00013022546738345503, "loss": 0.0696, "theoretical_loss": 3.3602016943223516, "tokens_seen": 2874540032 }, { "epoch": 0.74, "learning_rate": 0.0001301853486319506, "loss": 0.0634, "theoretical_loss": 3.360189912499605, "tokens_seen": 2874671104 }, { "epoch": 0.74, "learning_rate": 0.00013014522988044614, "loss": 0.0681, "theoretical_loss": 3.360178131364451, "tokens_seen": 2874802176 }, { "epoch": 0.74, "learning_rate": 0.00013010511112894165, "loss": 0.0652, "theoretical_loss": 3.360166350916819, "tokens_seen": 2874933248 }, { "epoch": 0.74, "learning_rate": 0.00013006499237743722, "loss": 0.0651, "theoretical_loss": 3.360154571156637, "tokens_seen": 2875064320 }, { "epoch": 0.74, "learning_rate": 0.00013002487362593276, "loss": 0.0672, "theoretical_loss": 3.360142792083834, "tokens_seen": 2875195392 }, { "epoch": 0.74, "learning_rate": 0.00012998475487442833, "loss": 0.0649, "theoretical_loss": 3.3601310136983384, "tokens_seen": 2875326464 }, { "epoch": 0.74, "objective/train/advantage_avg": 0.0005424400442279875, "objective/train/docs_used": 1045259, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2813242673873901, "objective/train/original_loss": 1.2813243865966797, "objective/train/theoretical_loss": 3.3601192360000782, "objective/train/tokens_used": 1245982176, "objective/train/value_avg": -0.006374359130859375, "objective/train/value_loss": 0.0001361749309580773, "objective/train/value_max": -3.0219554901123047e-05, "objective/train/value_min": -0.30322265625, "objective/train/value_reward_corr": 0.71382431323693, "objective/train/value_std": 0.01149749755859375, "objective/train/weight_avg": 1.0006053447723389, "objective/train/weighted_lm_loss": 1.2816129922866821, "objective/train/weights_max": 1.1637078523635864, "objective/train/weights_min": 0.3704989552497864, "theoretical_loss": 3.3601192360000782, "tokens_seen": 2875457536 }, { "epoch": 0.74, "learning_rate": 0.00012994463612292387, "loss": 0.0649, "theoretical_loss": 3.3601192360000782, "tokens_seen": 2875457536 }, { "epoch": 0.74, "learning_rate": 0.00012990451737141938, "loss": 0.0609, "theoretical_loss": 3.360107458988983, "tokens_seen": 2875588608 }, { "epoch": 0.74, "learning_rate": 0.00012986439861991495, "loss": 0.0699, "theoretical_loss": 3.360095682664981, "tokens_seen": 2875719680 }, { "epoch": 0.74, "learning_rate": 0.0001298242798684105, "loss": 0.0666, "theoretical_loss": 3.3600839070279998, "tokens_seen": 2875850752 }, { "epoch": 0.74, "learning_rate": 0.00012978416111690606, "loss": 0.0667, "theoretical_loss": 3.3600721320779696, "tokens_seen": 2875981824 }, { "epoch": 0.74, "learning_rate": 0.0001297440423654016, "loss": 0.0656, "theoretical_loss": 3.3600603578148176, "tokens_seen": 2876112896 }, { "epoch": 0.74, "learning_rate": 0.00012970392361389712, "loss": 0.0686, "theoretical_loss": 3.3600485842384735, "tokens_seen": 2876243968 }, { "epoch": 0.74, "learning_rate": 0.00012966380486239268, "loss": 0.0646, "theoretical_loss": 3.360036811348866, "tokens_seen": 2876375040 }, { "epoch": 0.74, "learning_rate": 0.00012962368611088823, "loss": 0.0615, "theoretical_loss": 3.3600250391459223, "tokens_seen": 2876506112 }, { "epoch": 0.74, "learning_rate": 0.0001295835673593838, "loss": 0.0651, "theoretical_loss": 3.360013267629573, "tokens_seen": 2876637184 }, { "epoch": 0.74, "learning_rate": 0.00012954344860787934, "loss": 0.0659, "theoretical_loss": 3.360001496799746, "tokens_seen": 2876768256 }, { "epoch": 0.74, "learning_rate": 0.00012950332985637485, "loss": 0.069, "theoretical_loss": 3.359989726656369, "tokens_seen": 2876899328 }, { "epoch": 0.74, "learning_rate": 0.00012946321110487042, "loss": 0.0655, "theoretical_loss": 3.359977957199372, "tokens_seen": 2877030400 }, { "epoch": 0.74, "learning_rate": 0.00012942309235336596, "loss": 0.0655, "theoretical_loss": 3.359966188428683, "tokens_seen": 2877161472 }, { "epoch": 0.74, "learning_rate": 0.00012938297360186153, "loss": 0.0655, "theoretical_loss": 3.3599544203442306, "tokens_seen": 2877292544 }, { "epoch": 0.74, "learning_rate": 0.00012934285485035707, "loss": 0.0647, "theoretical_loss": 3.3599426529459446, "tokens_seen": 2877423616 }, { "epoch": 0.74, "learning_rate": 0.00012930273609885258, "loss": 0.0687, "theoretical_loss": 3.359930886233753, "tokens_seen": 2877554688 }, { "epoch": 0.74, "learning_rate": 0.00012926261734734815, "loss": 0.0649, "theoretical_loss": 3.359919120207584, "tokens_seen": 2877685760 }, { "epoch": 0.74, "learning_rate": 0.0001292224985958437, "loss": 0.0656, "theoretical_loss": 3.3599073548673672, "tokens_seen": 2877816832 }, { "epoch": 0.74, "learning_rate": 0.00012918237984433926, "loss": 0.0686, "theoretical_loss": 3.359895590213031, "tokens_seen": 2877947904 }, { "epoch": 0.74, "learning_rate": 0.0001291422610928348, "loss": 0.0635, "theoretical_loss": 3.3598838262445048, "tokens_seen": 2878078976 }, { "epoch": 0.74, "learning_rate": 0.00012910214234133034, "loss": 0.0716, "theoretical_loss": 3.3598720629617165, "tokens_seen": 2878210048 }, { "epoch": 0.74, "learning_rate": 0.00012906202358982589, "loss": 0.064, "theoretical_loss": 3.359860300364595, "tokens_seen": 2878341120 }, { "epoch": 0.74, "learning_rate": 0.00012902190483832143, "loss": 0.0641, "theoretical_loss": 3.3598485384530696, "tokens_seen": 2878472192 }, { "epoch": 0.74, "learning_rate": 0.000128981786086817, "loss": 0.0598, "theoretical_loss": 3.359836777227069, "tokens_seen": 2878603264 }, { "epoch": 0.74, "objective/train/advantage_avg": -0.002215935382992029, "objective/train/docs_used": 1046332, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.514567255973816, "objective/train/original_loss": 1.5145673751831055, "objective/train/theoretical_loss": 3.359825016686522, "objective/train/tokens_used": 1249258976, "objective/train/value_avg": -0.0142059326171875, "objective/train/value_loss": 0.0005027984734624624, "objective/train/value_max": -2.193450927734375e-05, "objective/train/value_min": -0.65185546875, "objective/train/value_reward_corr": 0.9095205524257886, "objective/train/value_std": 0.03802490234375, "objective/train/weight_avg": 0.9980130195617676, "objective/train/weighted_lm_loss": 1.5122640132904053, "objective/train/weights_max": 1.8491870164871216, "objective/train/weights_min": 0.36990442872047424, "theoretical_loss": 3.359825016686522, "tokens_seen": 2878734336 }, { "epoch": 0.74, "learning_rate": 0.00012894166733531254, "loss": 0.0684, "theoretical_loss": 3.359825016686522, "tokens_seen": 2878734336 }, { "epoch": 0.74, "learning_rate": 0.00012890154858380808, "loss": 0.07, "theoretical_loss": 3.359813256831357, "tokens_seen": 2878865408 }, { "epoch": 0.74, "learning_rate": 0.00012886142983230362, "loss": 0.0637, "theoretical_loss": 3.3598014976615036, "tokens_seen": 2878996480 }, { "epoch": 0.74, "learning_rate": 0.00012882131108079916, "loss": 0.0671, "theoretical_loss": 3.3597897391768905, "tokens_seen": 2879127552 }, { "epoch": 0.75, "learning_rate": 0.00012878119232929473, "loss": 0.0673, "theoretical_loss": 3.3597779813774458, "tokens_seen": 2879258624 }, { "epoch": 0.75, "learning_rate": 0.00012874107357779027, "loss": 0.0627, "theoretical_loss": 3.3597662242630992, "tokens_seen": 2879389696 }, { "epoch": 0.75, "learning_rate": 0.0001287009548262858, "loss": 0.0623, "theoretical_loss": 3.3597544678337794, "tokens_seen": 2879520768 }, { "epoch": 0.75, "learning_rate": 0.00012866083607478135, "loss": 0.066, "theoretical_loss": 3.359742712089415, "tokens_seen": 2879651840 }, { "epoch": 0.75, "learning_rate": 0.0001286207173232769, "loss": 0.0573, "theoretical_loss": 3.3597309570299356, "tokens_seen": 2879782912 }, { "epoch": 0.75, "learning_rate": 0.00012858059857177246, "loss": 0.0661, "theoretical_loss": 3.3597192026552696, "tokens_seen": 2879913984 }, { "epoch": 0.75, "learning_rate": 0.000128540479820268, "loss": 0.0679, "theoretical_loss": 3.359707448965346, "tokens_seen": 2880045056 }, { "epoch": 0.75, "learning_rate": 0.00012850036106876354, "loss": 0.0689, "theoretical_loss": 3.3596956959600943, "tokens_seen": 2880176128 }, { "epoch": 0.75, "learning_rate": 0.00012846024231725909, "loss": 0.0673, "theoretical_loss": 3.3596839436394426, "tokens_seen": 2880307200 }, { "epoch": 0.75, "learning_rate": 0.00012842012356575463, "loss": 0.0616, "theoretical_loss": 3.3596721920033197, "tokens_seen": 2880438272 }, { "epoch": 0.75, "learning_rate": 0.0001283800048142502, "loss": 0.0633, "theoretical_loss": 3.3596604410516555, "tokens_seen": 2880569344 }, { "epoch": 0.75, "learning_rate": 0.00012833988606274574, "loss": 0.0663, "theoretical_loss": 3.359648690784379, "tokens_seen": 2880700416 }, { "epoch": 0.75, "learning_rate": 0.00012829976731124128, "loss": 0.063, "theoretical_loss": 3.3596369412014186, "tokens_seen": 2880831488 }, { "epoch": 0.75, "learning_rate": 0.00012825964855973682, "loss": 0.0625, "theoretical_loss": 3.3596251923027034, "tokens_seen": 2880962560 }, { "epoch": 0.75, "learning_rate": 0.00012821952980823236, "loss": 0.0645, "theoretical_loss": 3.3596134440881626, "tokens_seen": 2881093632 }, { "epoch": 0.75, "learning_rate": 0.00012817941105672793, "loss": 0.063, "theoretical_loss": 3.3596016965577253, "tokens_seen": 2881224704 }, { "epoch": 0.75, "learning_rate": 0.00012813929230522347, "loss": 0.0665, "theoretical_loss": 3.3595899497113204, "tokens_seen": 2881355776 }, { "epoch": 0.75, "learning_rate": 0.000128099173553719, "loss": 0.064, "theoretical_loss": 3.359578203548877, "tokens_seen": 2881486848 }, { "epoch": 0.75, "learning_rate": 0.00012805905480221455, "loss": 0.0626, "theoretical_loss": 3.359566458070324, "tokens_seen": 2881617920 }, { "epoch": 0.75, "learning_rate": 0.0001280189360507101, "loss": 0.0671, "theoretical_loss": 3.3595547132755907, "tokens_seen": 2881748992 }, { "epoch": 0.75, "learning_rate": 0.00012797881729920566, "loss": 0.0619, "theoretical_loss": 3.3595429691646066, "tokens_seen": 2881880064 }, { "epoch": 0.75, "objective/train/advantage_avg": 0.001414592727087438, "objective/train/docs_used": 1047629, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.300643801689148, "objective/train/original_loss": 1.3006439208984375, "objective/train/theoretical_loss": 3.3595312257372996, "objective/train/tokens_used": 1252535776, "objective/train/value_avg": -0.00640106201171875, "objective/train/value_loss": 0.0002168550417991355, "objective/train/value_max": -2.9087066650390625e-05, "objective/train/value_min": -0.66162109375, "objective/train/value_reward_corr": 0.622693283543592, "objective/train/value_std": 0.0134124755859375, "objective/train/weight_avg": 1.0015125274658203, "objective/train/weighted_lm_loss": 1.3024556636810303, "objective/train/weights_max": 1.83857262134552, "objective/train/weights_min": 0.3723919987678528, "theoretical_loss": 3.3595312257372996, "tokens_seen": 2882011136 }, { "epoch": 0.75, "learning_rate": 0.0001279386985477012, "loss": 0.0662, "theoretical_loss": 3.3595312257372996, "tokens_seen": 2882011136 }, { "epoch": 0.75, "learning_rate": 0.00012789857979619675, "loss": 0.0656, "theoretical_loss": 3.3595194829936, "tokens_seen": 2882142208 }, { "epoch": 0.75, "learning_rate": 0.00012785846104469229, "loss": 0.0675, "theoretical_loss": 3.359507740933436, "tokens_seen": 2882273280 }, { "epoch": 0.75, "learning_rate": 0.00012781834229318783, "loss": 0.0659, "theoretical_loss": 3.3594959995567377, "tokens_seen": 2882404352 }, { "epoch": 0.75, "learning_rate": 0.0001277782235416834, "loss": 0.0635, "theoretical_loss": 3.359484258863434, "tokens_seen": 2882535424 }, { "epoch": 0.75, "learning_rate": 0.00012773810479017894, "loss": 0.0667, "theoretical_loss": 3.3594725188534533, "tokens_seen": 2882666496 }, { "epoch": 0.75, "learning_rate": 0.00012769798603867448, "loss": 0.0662, "theoretical_loss": 3.3594607795267253, "tokens_seen": 2882797568 }, { "epoch": 0.75, "learning_rate": 0.00012765786728717002, "loss": 0.0627, "theoretical_loss": 3.359449040883179, "tokens_seen": 2882928640 }, { "epoch": 0.75, "learning_rate": 0.00012761774853566556, "loss": 0.0687, "theoretical_loss": 3.359437302922744, "tokens_seen": 2883059712 }, { "epoch": 0.75, "learning_rate": 0.00012757762978416113, "loss": 0.0706, "theoretical_loss": 3.359425565645349, "tokens_seen": 2883190784 }, { "epoch": 0.75, "learning_rate": 0.00012753751103265667, "loss": 0.0639, "theoretical_loss": 3.3594138290509235, "tokens_seen": 2883321856 }, { "epoch": 0.75, "learning_rate": 0.0001274973922811522, "loss": 0.0696, "theoretical_loss": 3.359402093139397, "tokens_seen": 2883452928 }, { "epoch": 0.75, "learning_rate": 0.00012745727352964775, "loss": 0.066, "theoretical_loss": 3.3593903579106974, "tokens_seen": 2883584000 }, { "epoch": 0.75, "learning_rate": 0.0001274171547781433, "loss": 0.0614, "theoretical_loss": 3.3593786233647553, "tokens_seen": 2883715072 }, { "epoch": 0.75, "learning_rate": 0.00012737703602663886, "loss": 0.0669, "theoretical_loss": 3.3593668895014996, "tokens_seen": 2883846144 }, { "epoch": 0.75, "learning_rate": 0.0001273369172751344, "loss": 0.0655, "theoretical_loss": 3.3593551563208597, "tokens_seen": 2883977216 }, { "epoch": 0.75, "learning_rate": 0.00012729679852362995, "loss": 0.0634, "theoretical_loss": 3.3593434238227644, "tokens_seen": 2884108288 }, { "epoch": 0.75, "learning_rate": 0.0001272566797721255, "loss": 0.0648, "theoretical_loss": 3.359331692007143, "tokens_seen": 2884239360 }, { "epoch": 0.75, "learning_rate": 0.00012721656102062103, "loss": 0.0627, "theoretical_loss": 3.3593199608739255, "tokens_seen": 2884370432 }, { "epoch": 0.75, "learning_rate": 0.0001271764422691166, "loss": 0.071, "theoretical_loss": 3.3593082304230406, "tokens_seen": 2884501504 }, { "epoch": 0.75, "learning_rate": 0.00012713632351761214, "loss": 0.0672, "theoretical_loss": 3.359296500654418, "tokens_seen": 2884632576 }, { "epoch": 0.75, "learning_rate": 0.00012709620476610768, "loss": 0.0656, "theoretical_loss": 3.359284771567986, "tokens_seen": 2884763648 }, { "epoch": 0.75, "learning_rate": 0.00012705608601460322, "loss": 0.0648, "theoretical_loss": 3.3592730431636753, "tokens_seen": 2884894720 }, { "epoch": 0.75, "learning_rate": 0.00012701596726309876, "loss": 0.0651, "theoretical_loss": 3.3592613154414144, "tokens_seen": 2885025792 }, { "epoch": 0.75, "learning_rate": 0.00012697584851159433, "loss": 0.0645, "theoretical_loss": 3.359249588401133, "tokens_seen": 2885156864 }, { "epoch": 0.75, "objective/train/advantage_avg": 0.0008653160184621811, "objective/train/docs_used": 1048861, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.070478081703186, "objective/train/original_loss": 1.0704782009124756, "objective/train/theoretical_loss": 3.35923786204276, "objective/train/tokens_used": 1255812576, "objective/train/value_avg": -0.0070648193359375, "objective/train/value_loss": 0.0001455918827559799, "objective/train/value_max": -2.1278858184814453e-05, "objective/train/value_min": -0.195556640625, "objective/train/value_reward_corr": 0.6339466490287458, "objective/train/value_std": 0.01073455810546875, "objective/train/weight_avg": 1.0009305477142334, "objective/train/weighted_lm_loss": 1.0707522630691528, "objective/train/weights_max": 1.142063021659851, "objective/train/weights_min": 0.38205578923225403, "theoretical_loss": 3.35923786204276, "tokens_seen": 2885287936 }, { "epoch": 0.75, "learning_rate": 0.00012693572976008987, "loss": 0.0644, "theoretical_loss": 3.35923786204276, "tokens_seen": 2885287936 }, { "epoch": 0.75, "learning_rate": 0.0001268956110085854, "loss": 0.0655, "theoretical_loss": 3.3592261363662255, "tokens_seen": 2885419008 }, { "epoch": 0.75, "learning_rate": 0.00012685549225708095, "loss": 0.065, "theoretical_loss": 3.3592144113714584, "tokens_seen": 2885550080 }, { "epoch": 0.75, "learning_rate": 0.00012681537350557652, "loss": 0.064, "theoretical_loss": 3.359202687058388, "tokens_seen": 2885681152 }, { "epoch": 0.75, "learning_rate": 0.00012677525475407206, "loss": 0.0644, "theoretical_loss": 3.3591909634269443, "tokens_seen": 2885812224 }, { "epoch": 0.75, "learning_rate": 0.0001267351360025676, "loss": 0.0634, "theoretical_loss": 3.3591792404770557, "tokens_seen": 2885943296 }, { "epoch": 0.75, "learning_rate": 0.00012669501725106315, "loss": 0.0663, "theoretical_loss": 3.3591675182086527, "tokens_seen": 2886074368 }, { "epoch": 0.75, "learning_rate": 0.0001266548984995587, "loss": 0.0662, "theoretical_loss": 3.359155796621664, "tokens_seen": 2886205440 }, { "epoch": 0.75, "learning_rate": 0.00012661477974805426, "loss": 0.0638, "theoretical_loss": 3.35914407571602, "tokens_seen": 2886336512 }, { "epoch": 0.75, "learning_rate": 0.0001265746609965498, "loss": 0.0646, "theoretical_loss": 3.3591323554916483, "tokens_seen": 2886467584 }, { "epoch": 0.75, "learning_rate": 0.00012653454224504534, "loss": 0.0654, "theoretical_loss": 3.3591206359484804, "tokens_seen": 2886598656 }, { "epoch": 0.75, "learning_rate": 0.00012649442349354088, "loss": 0.0657, "theoretical_loss": 3.3591089170864445, "tokens_seen": 2886729728 }, { "epoch": 0.75, "learning_rate": 0.00012645430474203642, "loss": 0.0644, "theoretical_loss": 3.359097198905471, "tokens_seen": 2886860800 }, { "epoch": 0.75, "learning_rate": 0.000126414185990532, "loss": 0.0651, "theoretical_loss": 3.3590854814054882, "tokens_seen": 2886991872 }, { "epoch": 0.75, "learning_rate": 0.00012637406723902753, "loss": 0.0677, "theoretical_loss": 3.359073764586427, "tokens_seen": 2887122944 }, { "epoch": 0.75, "learning_rate": 0.00012633394848752307, "loss": 0.0636, "theoretical_loss": 3.359062048448216, "tokens_seen": 2887254016 }, { "epoch": 0.75, "learning_rate": 0.0001262938297360186, "loss": 0.0617, "theoretical_loss": 3.359050332990785, "tokens_seen": 2887385088 }, { "epoch": 0.75, "learning_rate": 0.00012625371098451415, "loss": 0.0656, "theoretical_loss": 3.3590386182140635, "tokens_seen": 2887516160 }, { "epoch": 0.75, "learning_rate": 0.00012621359223300972, "loss": 0.0608, "theoretical_loss": 3.359026904117981, "tokens_seen": 2887647232 }, { "epoch": 0.75, "learning_rate": 0.00012617347348150526, "loss": 0.0674, "theoretical_loss": 3.3590151907024675, "tokens_seen": 2887778304 }, { "epoch": 0.75, "learning_rate": 0.0001261333547300008, "loss": 0.0638, "theoretical_loss": 3.359003477967452, "tokens_seen": 2887909376 }, { "epoch": 0.75, "learning_rate": 0.00012609323597849635, "loss": 0.0634, "theoretical_loss": 3.3589917659128634, "tokens_seen": 2888040448 }, { "epoch": 0.75, "learning_rate": 0.0001260531172269919, "loss": 0.066, "theoretical_loss": 3.358980054538633, "tokens_seen": 2888171520 }, { "epoch": 0.75, "learning_rate": 0.00012601299847548746, "loss": 0.065, "theoretical_loss": 3.3589683438446896, "tokens_seen": 2888302592 }, { "epoch": 0.75, "learning_rate": 0.000125972879723983, "loss": 0.0613, "theoretical_loss": 3.358956633830963, "tokens_seen": 2888433664 }, { "epoch": 0.75, "objective/train/advantage_avg": 0.0007659186376258731, "objective/train/docs_used": 1050131, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4683316946029663, "objective/train/original_loss": 1.4683315753936768, "objective/train/theoretical_loss": 3.358944924497382, "objective/train/tokens_used": 1259089376, "objective/train/value_avg": -0.004940032958984375, "objective/train/value_loss": 0.00017852937162388116, "objective/train/value_max": -1.4960765838623047e-05, "objective/train/value_min": -0.63623046875, "objective/train/value_reward_corr": 0.6506813956770939, "objective/train/value_std": 0.01183319091796875, "objective/train/weight_avg": 1.0008436441421509, "objective/train/weighted_lm_loss": 1.469918966293335, "objective/train/weights_max": 1.2207468748092651, "objective/train/weights_min": 0.3688047230243683, "theoretical_loss": 3.358944924497382, "tokens_seen": 2888564736 }, { "epoch": 0.75, "learning_rate": 0.00012593276097247854, "loss": 0.0703, "theoretical_loss": 3.358944924497382, "tokens_seen": 2888564736 }, { "epoch": 0.75, "learning_rate": 0.00012589264222097408, "loss": 0.0648, "theoretical_loss": 3.3589332158438774, "tokens_seen": 2888695808 }, { "epoch": 0.75, "learning_rate": 0.00012585252346946962, "loss": 0.0622, "theoretical_loss": 3.3589215078703782, "tokens_seen": 2888826880 }, { "epoch": 0.75, "learning_rate": 0.0001258124047179652, "loss": 0.0624, "theoretical_loss": 3.358909800576814, "tokens_seen": 2888957952 }, { "epoch": 0.75, "learning_rate": 0.00012577228596646073, "loss": 0.064, "theoretical_loss": 3.3588980939631146, "tokens_seen": 2889089024 }, { "epoch": 0.75, "learning_rate": 0.00012573216721495627, "loss": 0.0651, "theoretical_loss": 3.3588863880292097, "tokens_seen": 2889220096 }, { "epoch": 0.75, "learning_rate": 0.00012569204846345181, "loss": 0.0641, "theoretical_loss": 3.358874682775029, "tokens_seen": 2889351168 }, { "epoch": 0.75, "learning_rate": 0.00012565192971194736, "loss": 0.0626, "theoretical_loss": 3.358862978200502, "tokens_seen": 2889482240 }, { "epoch": 0.75, "learning_rate": 0.00012561181096044292, "loss": 0.0651, "theoretical_loss": 3.358851274305559, "tokens_seen": 2889613312 }, { "epoch": 0.75, "learning_rate": 0.00012557169220893846, "loss": 0.0653, "theoretical_loss": 3.3588395710901295, "tokens_seen": 2889744384 }, { "epoch": 0.75, "learning_rate": 0.000125531573457434, "loss": 0.0638, "theoretical_loss": 3.3588278685541426, "tokens_seen": 2889875456 }, { "epoch": 0.75, "learning_rate": 0.00012549145470592955, "loss": 0.0667, "theoretical_loss": 3.3588161666975287, "tokens_seen": 2890006528 }, { "epoch": 0.75, "learning_rate": 0.0001254513359544251, "loss": 0.0638, "theoretical_loss": 3.358804465520217, "tokens_seen": 2890137600 }, { "epoch": 0.75, "learning_rate": 0.00012541121720292066, "loss": 0.0667, "theoretical_loss": 3.358792765022138, "tokens_seen": 2890268672 }, { "epoch": 0.75, "learning_rate": 0.0001253710984514162, "loss": 0.0675, "theoretical_loss": 3.358781065203221, "tokens_seen": 2890399744 }, { "epoch": 0.75, "learning_rate": 0.00012533097969991174, "loss": 0.065, "theoretical_loss": 3.3587693660633957, "tokens_seen": 2890530816 }, { "epoch": 0.75, "learning_rate": 0.00012529086094840728, "loss": 0.0687, "theoretical_loss": 3.358757667602592, "tokens_seen": 2890661888 }, { "epoch": 0.75, "learning_rate": 0.00012525074219690282, "loss": 0.0644, "theoretical_loss": 3.35874596982074, "tokens_seen": 2890792960 }, { "epoch": 0.75, "learning_rate": 0.0001252106234453984, "loss": 0.0666, "theoretical_loss": 3.3587342727177694, "tokens_seen": 2890924032 }, { "epoch": 0.75, "learning_rate": 0.00012517050469389393, "loss": 0.0636, "theoretical_loss": 3.3587225762936095, "tokens_seen": 2891055104 }, { "epoch": 0.75, "learning_rate": 0.00012513038594238947, "loss": 0.0657, "theoretical_loss": 3.3587108805481907, "tokens_seen": 2891186176 }, { "epoch": 0.75, "learning_rate": 0.00012509026719088501, "loss": 0.0655, "theoretical_loss": 3.3586991854814423, "tokens_seen": 2891317248 }, { "epoch": 0.75, "learning_rate": 0.00012505014843938056, "loss": 0.0645, "theoretical_loss": 3.358687491093295, "tokens_seen": 2891448320 }, { "epoch": 0.75, "learning_rate": 0.00012501002968787612, "loss": 0.0631, "theoretical_loss": 3.358675797383678, "tokens_seen": 2891579392 }, { "epoch": 0.75, "learning_rate": 0.00012496991093637167, "loss": 0.0638, "theoretical_loss": 3.3586641043525214, "tokens_seen": 2891710464 }, { "epoch": 0.75, "objective/train/advantage_avg": -0.00020467348804231733, "objective/train/docs_used": 1051420, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2116799354553223, "objective/train/original_loss": 1.2116799354553223, "objective/train/theoretical_loss": 3.3586524119997554, "objective/train/tokens_used": 1262366176, "objective/train/value_avg": -0.00939178466796875, "objective/train/value_loss": 0.0002575020189397037, "objective/train/value_max": -3.24249267578125e-05, "objective/train/value_min": -0.36181640625, "objective/train/value_reward_corr": 0.847062811644659, "objective/train/value_std": 0.02362060546875, "objective/train/weight_avg": 0.9999155402183533, "objective/train/weighted_lm_loss": 1.211982011795044, "objective/train/weights_max": 1.211819052696228, "objective/train/weights_min": 0.3954969048500061, "theoretical_loss": 3.3586524119997554, "tokens_seen": 2891841536 }, { "epoch": 0.75, "learning_rate": 0.0001249297921848672, "loss": 0.064, "theoretical_loss": 3.3586524119997554, "tokens_seen": 2891841536 }, { "epoch": 0.75, "learning_rate": 0.00012488967343336275, "loss": 0.067, "theoretical_loss": 3.358640720325309, "tokens_seen": 2891972608 }, { "epoch": 0.75, "learning_rate": 0.00012484955468185832, "loss": 0.0684, "theoretical_loss": 3.358629029329113, "tokens_seen": 2892103680 }, { "epoch": 0.75, "learning_rate": 0.00012480943593035386, "loss": 0.0641, "theoretical_loss": 3.3586173390110967, "tokens_seen": 2892234752 }, { "epoch": 0.75, "learning_rate": 0.0001247693171788494, "loss": 0.0674, "theoretical_loss": 3.3586056493711904, "tokens_seen": 2892365824 }, { "epoch": 0.75, "learning_rate": 0.00012472919842734494, "loss": 0.066, "theoretical_loss": 3.358593960409324, "tokens_seen": 2892496896 }, { "epoch": 0.75, "learning_rate": 0.00012468907967584048, "loss": 0.0672, "theoretical_loss": 3.3585822721254273, "tokens_seen": 2892627968 }, { "epoch": 0.75, "learning_rate": 0.00012464896092433605, "loss": 0.0681, "theoretical_loss": 3.3585705845194305, "tokens_seen": 2892759040 }, { "epoch": 0.75, "learning_rate": 0.0001246088421728316, "loss": 0.0659, "theoretical_loss": 3.358558897591263, "tokens_seen": 2892890112 }, { "epoch": 0.75, "learning_rate": 0.00012456872342132713, "loss": 0.0608, "theoretical_loss": 3.3585472113408557, "tokens_seen": 2893021184 }, { "epoch": 0.75, "learning_rate": 0.00012452860466982267, "loss": 0.0653, "theoretical_loss": 3.358535525768138, "tokens_seen": 2893152256 }, { "epoch": 0.75, "learning_rate": 0.00012448848591831821, "loss": 0.0636, "theoretical_loss": 3.35852384087304, "tokens_seen": 2893283328 }, { "epoch": 0.75, "learning_rate": 0.00012444836716681378, "loss": 0.0638, "theoretical_loss": 3.3585121566554914, "tokens_seen": 2893414400 }, { "epoch": 0.75, "learning_rate": 0.00012440824841530932, "loss": 0.0644, "theoretical_loss": 3.3585004731154227, "tokens_seen": 2893545472 }, { "epoch": 0.75, "learning_rate": 0.00012436812966380487, "loss": 0.0642, "theoretical_loss": 3.358488790252764, "tokens_seen": 2893676544 }, { "epoch": 0.75, "learning_rate": 0.0001243280109123004, "loss": 0.0588, "theoretical_loss": 3.358477108067445, "tokens_seen": 2893807616 }, { "epoch": 0.75, "learning_rate": 0.00012428789216079595, "loss": 0.0633, "theoretical_loss": 3.3584654265593956, "tokens_seen": 2893938688 }, { "epoch": 0.75, "learning_rate": 0.00012424777340929152, "loss": 0.0666, "theoretical_loss": 3.3584537457285464, "tokens_seen": 2894069760 }, { "epoch": 0.75, "learning_rate": 0.00012420765465778706, "loss": 0.0639, "theoretical_loss": 3.3584420655748266, "tokens_seen": 2894200832 }, { "epoch": 0.75, "learning_rate": 0.0001241675359062826, "loss": 0.0641, "theoretical_loss": 3.3584303860981675, "tokens_seen": 2894331904 }, { "epoch": 0.75, "learning_rate": 0.00012412741715477814, "loss": 0.0676, "theoretical_loss": 3.3584187072984983, "tokens_seen": 2894462976 }, { "epoch": 0.75, "learning_rate": 0.00012408729840327368, "loss": 0.0655, "theoretical_loss": 3.3584070291757495, "tokens_seen": 2894594048 }, { "epoch": 0.75, "learning_rate": 0.00012404717965176925, "loss": 0.0635, "theoretical_loss": 3.3583953517298513, "tokens_seen": 2894725120 }, { "epoch": 0.75, "learning_rate": 0.0001240070609002648, "loss": 0.0626, "theoretical_loss": 3.358383674960733, "tokens_seen": 2894856192 }, { "epoch": 0.75, "learning_rate": 0.00012396694214876033, "loss": 0.0644, "theoretical_loss": 3.358371998868326, "tokens_seen": 2894987264 }, { "epoch": 0.75, "objective/train/advantage_avg": 0.0003908388316631317, "objective/train/docs_used": 1052611, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2360262870788574, "objective/train/original_loss": 1.2360260486602783, "objective/train/theoretical_loss": 3.358360323452559, "objective/train/tokens_used": 1265642976, "objective/train/value_avg": -0.0048065185546875, "objective/train/value_loss": 0.00011950876796618104, "objective/train/value_max": -2.6702880859375e-05, "objective/train/value_min": -0.296875, "objective/train/value_reward_corr": 0.6757093328401667, "objective/train/value_std": 0.00971221923828125, "objective/train/weight_avg": 1.0004431009292603, "objective/train/weighted_lm_loss": 1.2371835708618164, "objective/train/weights_max": 1.3184633255004883, "objective/train/weights_min": 0.3779505491256714, "theoretical_loss": 3.358360323452559, "tokens_seen": 2895118336 }, { "epoch": 0.75, "learning_rate": 0.00012392682339725587, "loss": 0.064, "theoretical_loss": 3.358360323452559, "tokens_seen": 2895118336 }, { "epoch": 0.75, "learning_rate": 0.00012388670464575142, "loss": 0.0602, "theoretical_loss": 3.358348648713364, "tokens_seen": 2895249408 }, { "epoch": 0.75, "learning_rate": 0.00012384658589424698, "loss": 0.0665, "theoretical_loss": 3.358336974650669, "tokens_seen": 2895380480 }, { "epoch": 0.75, "learning_rate": 0.00012380646714274253, "loss": 0.0643, "theoretical_loss": 3.358325301264406, "tokens_seen": 2895511552 }, { "epoch": 0.75, "learning_rate": 0.00012376634839123807, "loss": 0.064, "theoretical_loss": 3.3583136285545043, "tokens_seen": 2895642624 }, { "epoch": 0.76, "learning_rate": 0.0001237262296397336, "loss": 0.0685, "theoretical_loss": 3.3583019565208945, "tokens_seen": 2895773696 }, { "epoch": 0.76, "learning_rate": 0.00012368611088822915, "loss": 0.0657, "theoretical_loss": 3.3582902851635064, "tokens_seen": 2895904768 }, { "epoch": 0.76, "learning_rate": 0.00012364599213672472, "loss": 0.0659, "theoretical_loss": 3.3582786144822703, "tokens_seen": 2896035840 }, { "epoch": 0.76, "learning_rate": 0.00012360587338522026, "loss": 0.0621, "theoretical_loss": 3.3582669444771165, "tokens_seen": 2896166912 }, { "epoch": 0.76, "learning_rate": 0.0001235657546337158, "loss": 0.0688, "theoretical_loss": 3.3582552751479757, "tokens_seen": 2896297984 }, { "epoch": 0.76, "learning_rate": 0.00012352563588221134, "loss": 0.0661, "theoretical_loss": 3.3582436064947774, "tokens_seen": 2896429056 }, { "epoch": 0.76, "learning_rate": 0.00012348551713070688, "loss": 0.069, "theoretical_loss": 3.358231938517452, "tokens_seen": 2896560128 }, { "epoch": 0.76, "learning_rate": 0.00012344539837920245, "loss": 0.0613, "theoretical_loss": 3.358220271215931, "tokens_seen": 2896691200 }, { "epoch": 0.76, "learning_rate": 0.000123405279627698, "loss": 0.0639, "theoretical_loss": 3.3582086045901427, "tokens_seen": 2896822272 }, { "epoch": 0.76, "learning_rate": 0.00012336516087619353, "loss": 0.0618, "theoretical_loss": 3.358196938640018, "tokens_seen": 2896953344 }, { "epoch": 0.76, "learning_rate": 0.00012332504212468907, "loss": 0.0647, "theoretical_loss": 3.358185273365488, "tokens_seen": 2897084416 }, { "epoch": 0.76, "learning_rate": 0.00012328492337318462, "loss": 0.0671, "theoretical_loss": 3.358173608766483, "tokens_seen": 2897215488 }, { "epoch": 0.76, "learning_rate": 0.00012324480462168018, "loss": 0.0638, "theoretical_loss": 3.3581619448429323, "tokens_seen": 2897346560 }, { "epoch": 0.76, "learning_rate": 0.00012320468587017573, "loss": 0.0697, "theoretical_loss": 3.358150281594767, "tokens_seen": 2897477632 }, { "epoch": 0.76, "learning_rate": 0.00012316456711867127, "loss": 0.0667, "theoretical_loss": 3.358138619021917, "tokens_seen": 2897608704 }, { "epoch": 0.76, "learning_rate": 0.0001231244483671668, "loss": 0.0617, "theoretical_loss": 3.358126957124313, "tokens_seen": 2897739776 }, { "epoch": 0.76, "learning_rate": 0.00012308432961566238, "loss": 0.0644, "theoretical_loss": 3.3581152959018854, "tokens_seen": 2897870848 }, { "epoch": 0.76, "learning_rate": 0.00012304421086415792, "loss": 0.065, "theoretical_loss": 3.3581036353545644, "tokens_seen": 2898001920 }, { "epoch": 0.76, "learning_rate": 0.00012300409211265346, "loss": 0.0649, "theoretical_loss": 3.3580919754822807, "tokens_seen": 2898132992 }, { "epoch": 0.76, "learning_rate": 0.000122963973361149, "loss": 0.0608, "theoretical_loss": 3.3580803162849637, "tokens_seen": 2898264064 }, { "epoch": 0.76, "objective/train/advantage_avg": -0.0002765578683465719, "objective/train/docs_used": 1053920, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3802586793899536, "objective/train/original_loss": 1.3802587985992432, "objective/train/theoretical_loss": 3.358068657762545, "objective/train/tokens_used": 1268919776, "objective/train/value_avg": -0.00749969482421875, "objective/train/value_loss": 0.0005114933592267334, "objective/train/value_max": -3.147125244140625e-05, "objective/train/value_min": -0.95849609375, "objective/train/value_reward_corr": 0.7384656941559885, "objective/train/value_std": 0.023284912109375, "objective/train/weight_avg": 0.9999518990516663, "objective/train/weighted_lm_loss": 1.3791468143463135, "objective/train/weights_max": 1.9994478225708008, "objective/train/weights_min": 0.3696053922176361, "theoretical_loss": 3.358068657762545, "tokens_seen": 2898395136 }, { "epoch": 0.76, "learning_rate": 0.00012292385460964454, "loss": 0.0648, "theoretical_loss": 3.358068657762545, "tokens_seen": 2898395136 }, { "epoch": 0.76, "learning_rate": 0.0001228837358581401, "loss": 0.0661, "theoretical_loss": 3.3580569999149548, "tokens_seen": 2898526208 }, { "epoch": 0.76, "learning_rate": 0.00012284361710663565, "loss": 0.0628, "theoretical_loss": 3.358045342742123, "tokens_seen": 2898657280 }, { "epoch": 0.76, "learning_rate": 0.0001228034983551312, "loss": 0.0664, "theoretical_loss": 3.35803368624398, "tokens_seen": 2898788352 }, { "epoch": 0.76, "learning_rate": 0.00012276337960362673, "loss": 0.0645, "theoretical_loss": 3.358022030420457, "tokens_seen": 2898919424 }, { "epoch": 0.76, "learning_rate": 0.00012272326085212228, "loss": 0.0604, "theoretical_loss": 3.358010375271484, "tokens_seen": 2899050496 }, { "epoch": 0.76, "learning_rate": 0.00012268314210061784, "loss": 0.0666, "theoretical_loss": 3.357998720796991, "tokens_seen": 2899181568 }, { "epoch": 0.76, "learning_rate": 0.00012264302334911339, "loss": 0.0674, "theoretical_loss": 3.357987066996909, "tokens_seen": 2899312640 }, { "epoch": 0.76, "learning_rate": 0.00012260290459760893, "loss": 0.0646, "theoretical_loss": 3.357975413871169, "tokens_seen": 2899443712 }, { "epoch": 0.76, "learning_rate": 0.00012256278584610447, "loss": 0.0629, "theoretical_loss": 3.3579637614197004, "tokens_seen": 2899574784 }, { "epoch": 0.76, "learning_rate": 0.0001225226670946, "loss": 0.0652, "theoretical_loss": 3.3579521096424343, "tokens_seen": 2899705856 }, { "epoch": 0.76, "learning_rate": 0.00012248254834309558, "loss": 0.0701, "theoretical_loss": 3.3579404585393013, "tokens_seen": 2899836928 }, { "epoch": 0.76, "learning_rate": 0.00012244242959159112, "loss": 0.0648, "theoretical_loss": 3.357928808110232, "tokens_seen": 2899968000 }, { "epoch": 0.76, "learning_rate": 0.00012240231084008666, "loss": 0.0654, "theoretical_loss": 3.357917158355156, "tokens_seen": 2900099072 }, { "epoch": 0.76, "learning_rate": 0.0001223621920885822, "loss": 0.0611, "theoretical_loss": 3.3579055092740053, "tokens_seen": 2900230144 }, { "epoch": 0.76, "learning_rate": 0.00012232207333707774, "loss": 0.0644, "theoretical_loss": 3.3578938608667097, "tokens_seen": 2900361216 }, { "epoch": 0.76, "learning_rate": 0.0001222819545855733, "loss": 0.0679, "theoretical_loss": 3.3578822131331996, "tokens_seen": 2900492288 }, { "epoch": 0.76, "learning_rate": 0.00012224183583406885, "loss": 0.0642, "theoretical_loss": 3.3578705660734056, "tokens_seen": 2900623360 }, { "epoch": 0.76, "learning_rate": 0.0001222017170825644, "loss": 0.067, "theoretical_loss": 3.357858919687259, "tokens_seen": 2900754432 }, { "epoch": 0.76, "learning_rate": 0.00012216159833105993, "loss": 0.0654, "theoretical_loss": 3.3578472739746896, "tokens_seen": 2900885504 }, { "epoch": 0.76, "learning_rate": 0.00012212147957955548, "loss": 0.0641, "theoretical_loss": 3.357835628935628, "tokens_seen": 2901016576 }, { "epoch": 0.76, "learning_rate": 0.00012208136082805104, "loss": 0.067, "theoretical_loss": 3.3578239845700053, "tokens_seen": 2901147648 }, { "epoch": 0.76, "learning_rate": 0.00012204124207654659, "loss": 0.0641, "theoretical_loss": 3.3578123408777523, "tokens_seen": 2901278720 }, { "epoch": 0.76, "learning_rate": 0.00012200112332504213, "loss": 0.0655, "theoretical_loss": 3.357800697858799, "tokens_seen": 2901409792 }, { "epoch": 0.76, "learning_rate": 0.00012196100457353768, "loss": 0.0635, "theoretical_loss": 3.3577890555130763, "tokens_seen": 2901540864 }, { "epoch": 0.76, "objective/train/advantage_avg": -0.001064891810528934, "objective/train/docs_used": 1055165, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.299460530281067, "objective/train/original_loss": 1.299460530281067, "objective/train/theoretical_loss": 3.3577774138405148, "objective/train/tokens_used": 1272196576, "objective/train/value_avg": -0.01178741455078125, "objective/train/value_loss": 0.0003991331614088267, "objective/train/value_max": -7.212162017822266e-06, "objective/train/value_min": -0.495361328125, "objective/train/value_reward_corr": 0.7901745254269208, "objective/train/value_std": 0.0240325927734375, "objective/train/weight_avg": 0.9991129040718079, "objective/train/weighted_lm_loss": 1.2976912260055542, "objective/train/weights_max": 1.3551589250564575, "objective/train/weights_min": 0.3717447817325592, "theoretical_loss": 3.3577774138405148, "tokens_seen": 2901671936 }, { "epoch": 0.76, "learning_rate": 0.00012192088582203321, "loss": 0.0661, "theoretical_loss": 3.3577774138405148, "tokens_seen": 2901671936 }, { "epoch": 0.76, "learning_rate": 0.00012188076707052876, "loss": 0.0662, "theoretical_loss": 3.3577657728410455, "tokens_seen": 2901803008 }, { "epoch": 0.76, "learning_rate": 0.00012184064831902432, "loss": 0.0606, "theoretical_loss": 3.3577541325145988, "tokens_seen": 2901934080 }, { "epoch": 0.76, "learning_rate": 0.00012180052956751986, "loss": 0.0675, "theoretical_loss": 3.3577424928611053, "tokens_seen": 2902065152 }, { "epoch": 0.76, "learning_rate": 0.00012176041081601542, "loss": 0.0639, "theoretical_loss": 3.357730853880496, "tokens_seen": 2902196224 }, { "epoch": 0.76, "learning_rate": 0.00012172029206451094, "loss": 0.0665, "theoretical_loss": 3.3577192155727014, "tokens_seen": 2902327296 }, { "epoch": 0.76, "learning_rate": 0.0001216801733130065, "loss": 0.063, "theoretical_loss": 3.3577075779376524, "tokens_seen": 2902458368 }, { "epoch": 0.76, "learning_rate": 0.00012164005456150205, "loss": 0.0638, "theoretical_loss": 3.3576959409752796, "tokens_seen": 2902589440 }, { "epoch": 0.76, "learning_rate": 0.0001215999358099976, "loss": 0.0655, "theoretical_loss": 3.3576843046855136, "tokens_seen": 2902720512 }, { "epoch": 0.76, "learning_rate": 0.00012155981705849315, "loss": 0.0645, "theoretical_loss": 3.357672669068285, "tokens_seen": 2902851584 }, { "epoch": 0.76, "learning_rate": 0.00012151969830698868, "loss": 0.0648, "theoretical_loss": 3.3576610341235256, "tokens_seen": 2902982656 }, { "epoch": 0.76, "learning_rate": 0.00012147957955548423, "loss": 0.0666, "theoretical_loss": 3.3576493998511654, "tokens_seen": 2903113728 }, { "epoch": 0.76, "learning_rate": 0.00012143946080397979, "loss": 0.0602, "theoretical_loss": 3.357637766251135, "tokens_seen": 2903244800 }, { "epoch": 0.76, "learning_rate": 0.00012139934205247533, "loss": 0.0651, "theoretical_loss": 3.3576261333233655, "tokens_seen": 2903375872 }, { "epoch": 0.76, "learning_rate": 0.00012135922330097088, "loss": 0.0646, "theoretical_loss": 3.3576145010677876, "tokens_seen": 2903506944 }, { "epoch": 0.76, "learning_rate": 0.00012131910454946641, "loss": 0.0651, "theoretical_loss": 3.3576028694843325, "tokens_seen": 2903638016 }, { "epoch": 0.76, "learning_rate": 0.00012127898579796196, "loss": 0.0651, "theoretical_loss": 3.35759123857293, "tokens_seen": 2903769088 }, { "epoch": 0.76, "learning_rate": 0.00012123886704645752, "loss": 0.0663, "theoretical_loss": 3.3575796083335123, "tokens_seen": 2903900160 }, { "epoch": 0.76, "learning_rate": 0.00012119874829495306, "loss": 0.0659, "theoretical_loss": 3.357567978766009, "tokens_seen": 2904031232 }, { "epoch": 0.76, "learning_rate": 0.00012115862954344862, "loss": 0.0642, "theoretical_loss": 3.357556349870352, "tokens_seen": 2904162304 }, { "epoch": 0.76, "learning_rate": 0.00012111851079194416, "loss": 0.0647, "theoretical_loss": 3.3575447216464718, "tokens_seen": 2904293376 }, { "epoch": 0.76, "learning_rate": 0.0001210783920404397, "loss": 0.0669, "theoretical_loss": 3.3575330940942987, "tokens_seen": 2904424448 }, { "epoch": 0.76, "learning_rate": 0.00012103827328893525, "loss": 0.0638, "theoretical_loss": 3.3575214672137648, "tokens_seen": 2904555520 }, { "epoch": 0.76, "learning_rate": 0.0001209981545374308, "loss": 0.0663, "theoretical_loss": 3.3575098410047994, "tokens_seen": 2904686592 }, { "epoch": 0.76, "learning_rate": 0.00012095803578592635, "loss": 0.0648, "theoretical_loss": 3.357498215467335, "tokens_seen": 2904817664 }, { "epoch": 0.76, "objective/train/advantage_avg": 0.00033984374022111297, "objective/train/docs_used": 1056274, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.249295949935913, "objective/train/original_loss": 1.2492960691452026, "objective/train/theoretical_loss": 3.3574865906013014, "objective/train/tokens_used": 1275473376, "objective/train/value_avg": -0.005870819091796875, "objective/train/value_loss": 0.0003162994689773768, "objective/train/value_max": -2.282857894897461e-05, "objective/train/value_min": -0.60009765625, "objective/train/value_reward_corr": 0.6733933642453468, "objective/train/value_std": 0.0167388916015625, "objective/train/weight_avg": 1.0004817247390747, "objective/train/weighted_lm_loss": 1.249788761138916, "objective/train/weights_max": 1.683703899383545, "objective/train/weights_min": 0.36992841958999634, "theoretical_loss": 3.3574865906013014, "tokens_seen": 2904948736 }, { "epoch": 0.76, "learning_rate": 0.0001209179170344219, "loss": 0.0654, "theoretical_loss": 3.3574865906013014, "tokens_seen": 2904948736 }, { "epoch": 0.76, "learning_rate": 0.00012087779828291743, "loss": 0.0647, "theoretical_loss": 3.35747496640663, "tokens_seen": 2905079808 }, { "epoch": 0.76, "learning_rate": 0.00012083767953141299, "loss": 0.068, "theoretical_loss": 3.3574633428832517, "tokens_seen": 2905210880 }, { "epoch": 0.76, "learning_rate": 0.00012079756077990853, "loss": 0.0637, "theoretical_loss": 3.3574517200310976, "tokens_seen": 2905341952 }, { "epoch": 0.76, "learning_rate": 0.00012075744202840408, "loss": 0.0679, "theoretical_loss": 3.3574400978500982, "tokens_seen": 2905473024 }, { "epoch": 0.76, "learning_rate": 0.00012071732327689964, "loss": 0.0655, "theoretical_loss": 3.3574284763401847, "tokens_seen": 2905604096 }, { "epoch": 0.76, "learning_rate": 0.00012067720452539517, "loss": 0.0626, "theoretical_loss": 3.357416855501288, "tokens_seen": 2905735168 }, { "epoch": 0.76, "learning_rate": 0.00012063708577389072, "loss": 0.0617, "theoretical_loss": 3.35740523533334, "tokens_seen": 2905866240 }, { "epoch": 0.76, "learning_rate": 0.00012059696702238626, "loss": 0.0633, "theoretical_loss": 3.3573936158362705, "tokens_seen": 2905997312 }, { "epoch": 0.76, "learning_rate": 0.00012055684827088182, "loss": 0.0615, "theoretical_loss": 3.3573819970100107, "tokens_seen": 2906128384 }, { "epoch": 0.76, "learning_rate": 0.00012051672951937737, "loss": 0.0625, "theoretical_loss": 3.357370378854492, "tokens_seen": 2906259456 }, { "epoch": 0.76, "learning_rate": 0.0001204766107678729, "loss": 0.0632, "theoretical_loss": 3.357358761369645, "tokens_seen": 2906390528 }, { "epoch": 0.76, "learning_rate": 0.00012043649201636845, "loss": 0.0653, "theoretical_loss": 3.3573471445554013, "tokens_seen": 2906521600 }, { "epoch": 0.76, "learning_rate": 0.000120396373264864, "loss": 0.0637, "theoretical_loss": 3.357335528411692, "tokens_seen": 2906652672 }, { "epoch": 0.76, "learning_rate": 0.00012035625451335955, "loss": 0.0629, "theoretical_loss": 3.3573239129384476, "tokens_seen": 2906783744 }, { "epoch": 0.76, "learning_rate": 0.0001203161357618551, "loss": 0.0621, "theoretical_loss": 3.357312298135599, "tokens_seen": 2906914816 }, { "epoch": 0.76, "learning_rate": 0.00012027601701035063, "loss": 0.0655, "theoretical_loss": 3.357300684003078, "tokens_seen": 2907045888 }, { "epoch": 0.76, "learning_rate": 0.00012023589825884619, "loss": 0.0652, "theoretical_loss": 3.3572890705408156, "tokens_seen": 2907176960 }, { "epoch": 0.76, "learning_rate": 0.00012019577950734173, "loss": 0.0626, "theoretical_loss": 3.3572774577487423, "tokens_seen": 2907308032 }, { "epoch": 0.76, "learning_rate": 0.00012015566075583728, "loss": 0.0651, "theoretical_loss": 3.35726584562679, "tokens_seen": 2907439104 }, { "epoch": 0.76, "learning_rate": 0.00012011554200433284, "loss": 0.0629, "theoretical_loss": 3.357254234174889, "tokens_seen": 2907570176 }, { "epoch": 0.76, "learning_rate": 0.00012007542325282837, "loss": 0.0644, "theoretical_loss": 3.357242623392971, "tokens_seen": 2907701248 }, { "epoch": 0.76, "learning_rate": 0.00012003530450132392, "loss": 0.0666, "theoretical_loss": 3.357231013280967, "tokens_seen": 2907832320 }, { "epoch": 0.76, "learning_rate": 0.00011999518574981946, "loss": 0.0626, "theoretical_loss": 3.3572194038388083, "tokens_seen": 2907963392 }, { "epoch": 0.76, "learning_rate": 0.00011995506699831502, "loss": 0.0683, "theoretical_loss": 3.357207795066426, "tokens_seen": 2908094464 }, { "epoch": 0.76, "objective/train/advantage_avg": -0.0005530610796995461, "objective/train/docs_used": 1057433, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.195137858390808, "objective/train/original_loss": 1.1951377391815186, "objective/train/theoretical_loss": 3.3571961869637508, "objective/train/tokens_used": 1278750176, "objective/train/value_avg": -0.007045745849609375, "objective/train/value_loss": 0.00023520112154074013, "objective/train/value_max": -2.390146255493164e-05, "objective/train/value_min": -0.2646484375, "objective/train/value_reward_corr": 0.6884125532403791, "objective/train/value_std": 0.01390838623046875, "objective/train/weight_avg": 0.9995513558387756, "objective/train/weighted_lm_loss": 1.1938782930374146, "objective/train/weights_max": 1.2832419872283936, "objective/train/weights_min": 0.3726079761981964, "theoretical_loss": 3.3571961869637508, "tokens_seen": 2908225536 }, { "epoch": 0.76, "learning_rate": 0.00011991494824681057, "loss": 0.0628, "theoretical_loss": 3.3571961869637508, "tokens_seen": 2908225536 }, { "epoch": 0.76, "learning_rate": 0.00011987482949530611, "loss": 0.0656, "theoretical_loss": 3.3571845795307143, "tokens_seen": 2908356608 }, { "epoch": 0.76, "learning_rate": 0.00011983471074380165, "loss": 0.0622, "theoretical_loss": 3.357172972767248, "tokens_seen": 2908487680 }, { "epoch": 0.76, "learning_rate": 0.0001197945919922972, "loss": 0.0638, "theoretical_loss": 3.3571613666732825, "tokens_seen": 2908618752 }, { "epoch": 0.76, "learning_rate": 0.00011975447324079275, "loss": 0.0633, "theoretical_loss": 3.357149761248749, "tokens_seen": 2908749824 }, { "epoch": 0.76, "learning_rate": 0.0001197143544892883, "loss": 0.0619, "theoretical_loss": 3.35713815649358, "tokens_seen": 2908880896 }, { "epoch": 0.76, "learning_rate": 0.00011967423573778385, "loss": 0.0655, "theoretical_loss": 3.357126552407705, "tokens_seen": 2909011968 }, { "epoch": 0.76, "learning_rate": 0.00011963411698627939, "loss": 0.0634, "theoretical_loss": 3.357114948991056, "tokens_seen": 2909143040 }, { "epoch": 0.76, "learning_rate": 0.00011959399823477493, "loss": 0.0669, "theoretical_loss": 3.3571033462435644, "tokens_seen": 2909274112 }, { "epoch": 0.76, "learning_rate": 0.00011955387948327048, "loss": 0.0666, "theoretical_loss": 3.357091744165161, "tokens_seen": 2909405184 }, { "epoch": 0.76, "learning_rate": 0.00011951376073176604, "loss": 0.0603, "theoretical_loss": 3.3570801427557777, "tokens_seen": 2909536256 }, { "epoch": 0.76, "learning_rate": 0.00011947364198026158, "loss": 0.0672, "theoretical_loss": 3.3570685420153454, "tokens_seen": 2909667328 }, { "epoch": 0.76, "learning_rate": 0.00011943352322875712, "loss": 0.0618, "theoretical_loss": 3.357056941943796, "tokens_seen": 2909798400 }, { "epoch": 0.76, "learning_rate": 0.00011939340447725266, "loss": 0.0637, "theoretical_loss": 3.35704534254106, "tokens_seen": 2909929472 }, { "epoch": 0.76, "learning_rate": 0.00011935328572574822, "loss": 0.0679, "theoretical_loss": 3.3570337438070683, "tokens_seen": 2910060544 }, { "epoch": 0.76, "learning_rate": 0.00011931316697424377, "loss": 0.0647, "theoretical_loss": 3.3570221457417535, "tokens_seen": 2910191616 }, { "epoch": 0.76, "learning_rate": 0.00011927304822273931, "loss": 0.0658, "theoretical_loss": 3.357010548345046, "tokens_seen": 2910322688 }, { "epoch": 0.76, "learning_rate": 0.00011923292947123485, "loss": 0.0652, "theoretical_loss": 3.3569989516168777, "tokens_seen": 2910453760 }, { "epoch": 0.76, "learning_rate": 0.0001191928107197304, "loss": 0.0641, "theoretical_loss": 3.3569873555571803, "tokens_seen": 2910584832 }, { "epoch": 0.76, "learning_rate": 0.00011915269196822595, "loss": 0.0626, "theoretical_loss": 3.356975760165884, "tokens_seen": 2910715904 }, { "epoch": 0.76, "learning_rate": 0.0001191125732167215, "loss": 0.0665, "theoretical_loss": 3.3569641654429208, "tokens_seen": 2910846976 }, { "epoch": 0.76, "learning_rate": 0.00011907245446521705, "loss": 0.0641, "theoretical_loss": 3.3569525713882222, "tokens_seen": 2910978048 }, { "epoch": 0.76, "learning_rate": 0.00011903233571371259, "loss": 0.0649, "theoretical_loss": 3.3569409780017194, "tokens_seen": 2911109120 }, { "epoch": 0.76, "learning_rate": 0.00011899221696220813, "loss": 0.0638, "theoretical_loss": 3.356929385283344, "tokens_seen": 2911240192 }, { "epoch": 0.76, "learning_rate": 0.00011895209821070368, "loss": 0.0644, "theoretical_loss": 3.356917793233027, "tokens_seen": 2911371264 }, { "epoch": 0.76, "objective/train/advantage_avg": 0.00046487266081385314, "objective/train/docs_used": 1058588, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3457109928131104, "objective/train/original_loss": 1.3457109928131104, "objective/train/theoretical_loss": 3.3569062018507005, "objective/train/tokens_used": 1282026976, "objective/train/value_avg": -0.00949859619140625, "objective/train/value_loss": 0.00022275625087786466, "objective/train/value_max": -2.9325485229492188e-05, "objective/train/value_min": -0.96630859375, "objective/train/value_reward_corr": 0.7550642534663565, "objective/train/value_std": 0.0184478759765625, "objective/train/weight_avg": 1.00057053565979, "objective/train/weighted_lm_loss": 1.3461002111434937, "objective/train/weights_max": 1.6057409048080444, "objective/train/weights_min": 0.3711751401424408, "theoretical_loss": 3.3569062018507005, "tokens_seen": 2911502336 }, { "epoch": 0.76, "learning_rate": 0.00011891197945919924, "loss": 0.0723, "theoretical_loss": 3.3569062018507005, "tokens_seen": 2911502336 }, { "epoch": 0.76, "learning_rate": 0.00011887186070769478, "loss": 0.0679, "theoretical_loss": 3.356894611136296, "tokens_seen": 2911633408 }, { "epoch": 0.76, "learning_rate": 0.00011883174195619032, "loss": 0.0672, "theoretical_loss": 3.3568830210897436, "tokens_seen": 2911764480 }, { "epoch": 0.76, "learning_rate": 0.00011879162320468586, "loss": 0.0676, "theoretical_loss": 3.3568714317109762, "tokens_seen": 2911895552 }, { "epoch": 0.76, "learning_rate": 0.00011875150445318142, "loss": 0.0634, "theoretical_loss": 3.3568598429999246, "tokens_seen": 2912026624 }, { "epoch": 0.76, "learning_rate": 0.00011871138570167697, "loss": 0.069, "theoretical_loss": 3.3568482549565206, "tokens_seen": 2912157696 }, { "epoch": 0.77, "learning_rate": 0.00011867126695017251, "loss": 0.0622, "theoretical_loss": 3.356836667580695, "tokens_seen": 2912288768 }, { "epoch": 0.77, "learning_rate": 0.00011863114819866807, "loss": 0.0649, "theoretical_loss": 3.3568250808723805, "tokens_seen": 2912419840 }, { "epoch": 0.77, "learning_rate": 0.0001185910294471636, "loss": 0.0652, "theoretical_loss": 3.3568134948315076, "tokens_seen": 2912550912 }, { "epoch": 0.77, "learning_rate": 0.00011855091069565915, "loss": 0.0646, "theoretical_loss": 3.356801909458008, "tokens_seen": 2912681984 }, { "epoch": 0.77, "learning_rate": 0.0001185107919441547, "loss": 0.0653, "theoretical_loss": 3.3567903247518136, "tokens_seen": 2912813056 }, { "epoch": 0.77, "learning_rate": 0.00011847067319265025, "loss": 0.064, "theoretical_loss": 3.3567787407128558, "tokens_seen": 2912944128 }, { "epoch": 0.77, "learning_rate": 0.0001184305544411458, "loss": 0.0646, "theoretical_loss": 3.3567671573410656, "tokens_seen": 2913075200 }, { "epoch": 0.77, "learning_rate": 0.00011839043568964133, "loss": 0.0621, "theoretical_loss": 3.3567555746363755, "tokens_seen": 2913206272 }, { "epoch": 0.77, "learning_rate": 0.00011835031693813689, "loss": 0.0664, "theoretical_loss": 3.3567439925987164, "tokens_seen": 2913337344 }, { "epoch": 0.77, "learning_rate": 0.00011831019818663244, "loss": 0.0667, "theoretical_loss": 3.35673241122802, "tokens_seen": 2913468416 }, { "epoch": 0.77, "learning_rate": 0.00011827007943512798, "loss": 0.0649, "theoretical_loss": 3.356720830524218, "tokens_seen": 2913599488 }, { "epoch": 0.77, "learning_rate": 0.00011822996068362354, "loss": 0.0625, "theoretical_loss": 3.356709250487242, "tokens_seen": 2913730560 }, { "epoch": 0.77, "learning_rate": 0.00011818984193211906, "loss": 0.0667, "theoretical_loss": 3.3566976711170233, "tokens_seen": 2913861632 }, { "epoch": 0.77, "learning_rate": 0.00011814972318061462, "loss": 0.0645, "theoretical_loss": 3.356686092413494, "tokens_seen": 2913992704 }, { "epoch": 0.77, "learning_rate": 0.00011810960442911017, "loss": 0.0626, "theoretical_loss": 3.3566745143765857, "tokens_seen": 2914123776 }, { "epoch": 0.77, "learning_rate": 0.00011806948567760571, "loss": 0.0658, "theoretical_loss": 3.3566629370062295, "tokens_seen": 2914254848 }, { "epoch": 0.77, "learning_rate": 0.00011802936692610127, "loss": 0.0644, "theoretical_loss": 3.3566513603023576, "tokens_seen": 2914385920 }, { "epoch": 0.77, "learning_rate": 0.0001179892481745968, "loss": 0.0651, "theoretical_loss": 3.3566397842649014, "tokens_seen": 2914516992 }, { "epoch": 0.77, "learning_rate": 0.00011794912942309235, "loss": 0.0667, "theoretical_loss": 3.3566282088937927, "tokens_seen": 2914648064 }, { "epoch": 0.77, "objective/train/advantage_avg": 0.0005846558487974107, "objective/train/docs_used": 1059748, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3763521909713745, "objective/train/original_loss": 1.3763521909713745, "objective/train/theoretical_loss": 3.356616634188963, "objective/train/tokens_used": 1285303776, "objective/train/value_avg": -0.01265716552734375, "objective/train/value_loss": 0.00023838038032408804, "objective/train/value_max": -2.485513687133789e-05, "objective/train/value_min": -0.61376953125, "objective/train/value_reward_corr": 0.9142954638710314, "objective/train/value_std": 0.036590576171875, "objective/train/weight_avg": 1.0006937980651855, "objective/train/weighted_lm_loss": 1.3767807483673096, "objective/train/weights_max": 1.3088483810424805, "objective/train/weights_min": 0.36819323897361755, "theoretical_loss": 3.356616634188963, "tokens_seen": 2914779136 }, { "epoch": 0.77, "learning_rate": 0.00011790901067158791, "loss": 0.0648, "theoretical_loss": 3.356616634188963, "tokens_seen": 2914779136 }, { "epoch": 0.77, "learning_rate": 0.00011786889192008345, "loss": 0.0642, "theoretical_loss": 3.3566050601503443, "tokens_seen": 2914910208 }, { "epoch": 0.77, "learning_rate": 0.000117828773168579, "loss": 0.0655, "theoretical_loss": 3.356593486777868, "tokens_seen": 2915041280 }, { "epoch": 0.77, "learning_rate": 0.00011778865441707453, "loss": 0.0692, "theoretical_loss": 3.356581914071466, "tokens_seen": 2915172352 }, { "epoch": 0.77, "learning_rate": 0.00011774853566557009, "loss": 0.0638, "theoretical_loss": 3.3565703420310697, "tokens_seen": 2915303424 }, { "epoch": 0.77, "learning_rate": 0.00011770841691406564, "loss": 0.0646, "theoretical_loss": 3.356558770656611, "tokens_seen": 2915434496 }, { "epoch": 0.77, "learning_rate": 0.00011766829816256118, "loss": 0.0614, "theoretical_loss": 3.356547199948022, "tokens_seen": 2915565568 }, { "epoch": 0.77, "learning_rate": 0.00011762817941105674, "loss": 0.0602, "theoretical_loss": 3.356535629905234, "tokens_seen": 2915696640 }, { "epoch": 0.77, "learning_rate": 0.00011758806065955226, "loss": 0.0672, "theoretical_loss": 3.356524060528179, "tokens_seen": 2915827712 }, { "epoch": 0.77, "learning_rate": 0.00011754794190804782, "loss": 0.069, "theoretical_loss": 3.3565124918167886, "tokens_seen": 2915958784 }, { "epoch": 0.77, "learning_rate": 0.00011750782315654337, "loss": 0.0639, "theoretical_loss": 3.356500923770995, "tokens_seen": 2916089856 }, { "epoch": 0.77, "learning_rate": 0.00011746770440503892, "loss": 0.0697, "theoretical_loss": 3.356489356390729, "tokens_seen": 2916220928 }, { "epoch": 0.77, "learning_rate": 0.00011742758565353447, "loss": 0.0663, "theoretical_loss": 3.3564777896759237, "tokens_seen": 2916352000 }, { "epoch": 0.77, "learning_rate": 0.00011738746690203001, "loss": 0.0616, "theoretical_loss": 3.35646622362651, "tokens_seen": 2916483072 }, { "epoch": 0.77, "learning_rate": 0.00011734734815052555, "loss": 0.066, "theoretical_loss": 3.35645465824242, "tokens_seen": 2916614144 }, { "epoch": 0.77, "learning_rate": 0.00011730722939902111, "loss": 0.064, "theoretical_loss": 3.3564430935235854, "tokens_seen": 2916745216 }, { "epoch": 0.77, "learning_rate": 0.00011726711064751665, "loss": 0.0655, "theoretical_loss": 3.3564315294699383, "tokens_seen": 2916876288 }, { "epoch": 0.77, "learning_rate": 0.0001172269918960122, "loss": 0.0648, "theoretical_loss": 3.3564199660814102, "tokens_seen": 2917007360 }, { "epoch": 0.77, "learning_rate": 0.00011718687314450774, "loss": 0.0654, "theoretical_loss": 3.3564084033579333, "tokens_seen": 2917138432 }, { "epoch": 0.77, "learning_rate": 0.00011714675439300329, "loss": 0.0636, "theoretical_loss": 3.3563968412994396, "tokens_seen": 2917269504 }, { "epoch": 0.77, "learning_rate": 0.00011710663564149884, "loss": 0.0655, "theoretical_loss": 3.3563852799058607, "tokens_seen": 2917400576 }, { "epoch": 0.77, "learning_rate": 0.00011706651688999438, "loss": 0.0645, "theoretical_loss": 3.356373719177128, "tokens_seen": 2917531648 }, { "epoch": 0.77, "learning_rate": 0.00011702639813848994, "loss": 0.0595, "theoretical_loss": 3.3563621591131745, "tokens_seen": 2917662720 }, { "epoch": 0.77, "learning_rate": 0.00011698627938698549, "loss": 0.0635, "theoretical_loss": 3.3563505997139313, "tokens_seen": 2917793792 }, { "epoch": 0.77, "learning_rate": 0.00011694616063548102, "loss": 0.0616, "theoretical_loss": 3.3563390409793303, "tokens_seen": 2917924864 }, { "epoch": 0.77, "objective/train/advantage_avg": -3.2370695407735184e-05, "objective/train/docs_used": 1060857, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3370975255966187, "objective/train/original_loss": 1.337097406387329, "objective/train/theoretical_loss": 3.356327482909304, "objective/train/tokens_used": 1288580576, "objective/train/value_avg": -0.006526947021484375, "objective/train/value_loss": 0.0001258328411495313, "objective/train/value_max": -3.790855407714844e-05, "objective/train/value_min": -0.202880859375, "objective/train/value_reward_corr": 0.6826456582306457, "objective/train/value_std": 0.01055908203125, "objective/train/weight_avg": 1.0000262260437012, "objective/train/weighted_lm_loss": 1.3369383811950684, "objective/train/weights_max": 1.100432276725769, "objective/train/weights_min": 0.3722897469997406, "theoretical_loss": 3.356327482909304, "tokens_seen": 2918055936 }, { "epoch": 0.77, "learning_rate": 0.00011690604188397657, "loss": 0.0637, "theoretical_loss": 3.356327482909304, "tokens_seen": 2918055936 }, { "epoch": 0.77, "learning_rate": 0.00011686592313247212, "loss": 0.0662, "theoretical_loss": 3.356315925503784, "tokens_seen": 2918187008 }, { "epoch": 0.77, "learning_rate": 0.00011682580438096767, "loss": 0.0635, "theoretical_loss": 3.3563043687627023, "tokens_seen": 2918318080 }, { "epoch": 0.77, "learning_rate": 0.00011678568562946323, "loss": 0.068, "theoretical_loss": 3.356292812685991, "tokens_seen": 2918449152 }, { "epoch": 0.77, "learning_rate": 0.00011674556687795875, "loss": 0.0655, "theoretical_loss": 3.356281257273581, "tokens_seen": 2918580224 }, { "epoch": 0.77, "learning_rate": 0.00011670544812645431, "loss": 0.0676, "theoretical_loss": 3.3562697025254065, "tokens_seen": 2918711296 }, { "epoch": 0.77, "learning_rate": 0.00011666532937494985, "loss": 0.0658, "theoretical_loss": 3.3562581484413974, "tokens_seen": 2918842368 }, { "epoch": 0.77, "learning_rate": 0.0001166252106234454, "loss": 0.0608, "theoretical_loss": 3.356246595021487, "tokens_seen": 2918973440 }, { "epoch": 0.77, "learning_rate": 0.00011658509187194096, "loss": 0.0622, "theoretical_loss": 3.356235042265606, "tokens_seen": 2919104512 }, { "epoch": 0.77, "learning_rate": 0.00011654497312043649, "loss": 0.0641, "theoretical_loss": 3.356223490173688, "tokens_seen": 2919235584 }, { "epoch": 0.77, "learning_rate": 0.00011650485436893204, "loss": 0.0687, "theoretical_loss": 3.356211938745664, "tokens_seen": 2919366656 }, { "epoch": 0.77, "learning_rate": 0.00011646473561742758, "loss": 0.0664, "theoretical_loss": 3.356200387981466, "tokens_seen": 2919497728 }, { "epoch": 0.77, "learning_rate": 0.00011642461686592314, "loss": 0.0686, "theoretical_loss": 3.3561888378810267, "tokens_seen": 2919628800 }, { "epoch": 0.77, "learning_rate": 0.00011638449811441869, "loss": 0.0655, "theoretical_loss": 3.3561772884442775, "tokens_seen": 2919759872 }, { "epoch": 0.77, "learning_rate": 0.00011634437936291422, "loss": 0.0652, "theoretical_loss": 3.356165739671151, "tokens_seen": 2919890944 }, { "epoch": 0.77, "learning_rate": 0.00011630426061140978, "loss": 0.0652, "theoretical_loss": 3.356154191561579, "tokens_seen": 2920022016 }, { "epoch": 0.77, "learning_rate": 0.00011626414185990532, "loss": 0.0637, "theoretical_loss": 3.3561426441154936, "tokens_seen": 2920153088 }, { "epoch": 0.77, "learning_rate": 0.00011622402310840087, "loss": 0.067, "theoretical_loss": 3.356131097332827, "tokens_seen": 2920284160 }, { "epoch": 0.77, "learning_rate": 0.00011618390435689643, "loss": 0.0665, "theoretical_loss": 3.3561195512135114, "tokens_seen": 2920415232 }, { "epoch": 0.77, "learning_rate": 0.00011614378560539197, "loss": 0.0633, "theoretical_loss": 3.356108005757479, "tokens_seen": 2920546304 }, { "epoch": 0.77, "learning_rate": 0.00011610366685388751, "loss": 0.0631, "theoretical_loss": 3.356096460964661, "tokens_seen": 2920677376 }, { "epoch": 0.77, "learning_rate": 0.00011606354810238305, "loss": 0.0642, "theoretical_loss": 3.3560849168349907, "tokens_seen": 2920808448 }, { "epoch": 0.77, "learning_rate": 0.0001160234293508786, "loss": 0.0618, "theoretical_loss": 3.3560733733683996, "tokens_seen": 2920939520 }, { "epoch": 0.77, "learning_rate": 0.00011598331059937416, "loss": 0.067, "theoretical_loss": 3.3560618305648204, "tokens_seen": 2921070592 }, { "epoch": 0.77, "learning_rate": 0.0001159431918478697, "loss": 0.0659, "theoretical_loss": 3.3560502884241847, "tokens_seen": 2921201664 }, { "epoch": 0.77, "objective/train/advantage_avg": -0.0006997896707616746, "objective/train/docs_used": 1062024, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3455272912979126, "objective/train/original_loss": 1.345527172088623, "objective/train/theoretical_loss": 3.3560387469464255, "objective/train/tokens_used": 1291857376, "objective/train/value_avg": -0.007579803466796875, "objective/train/value_loss": 0.0003019356809090823, "objective/train/value_max": -3.647804260253906e-05, "objective/train/value_min": -0.37841796875, "objective/train/value_reward_corr": 0.7202165807264829, "objective/train/value_std": 0.01500701904296875, "objective/train/weight_avg": 0.9994326233863831, "objective/train/weighted_lm_loss": 1.3445745706558228, "objective/train/weights_max": 1.1392511129379272, "objective/train/weights_min": 0.36856982111930847, "theoretical_loss": 3.3560387469464255, "tokens_seen": 2921332736 }, { "epoch": 0.77, "learning_rate": 0.00011590307309636524, "loss": 0.0663, "theoretical_loss": 3.3560387469464255, "tokens_seen": 2921332736 }, { "epoch": 0.77, "learning_rate": 0.00011586295434486078, "loss": 0.0642, "theoretical_loss": 3.3560272061314738, "tokens_seen": 2921463808 }, { "epoch": 0.77, "learning_rate": 0.00011582283559335634, "loss": 0.0649, "theoretical_loss": 3.356015665979262, "tokens_seen": 2921594880 }, { "epoch": 0.77, "learning_rate": 0.00011578271684185189, "loss": 0.0653, "theoretical_loss": 3.3560041264897236, "tokens_seen": 2921725952 }, { "epoch": 0.77, "learning_rate": 0.00011574259809034743, "loss": 0.0688, "theoretical_loss": 3.3559925876627896, "tokens_seen": 2921857024 }, { "epoch": 0.77, "learning_rate": 0.00011570247933884298, "loss": 0.0617, "theoretical_loss": 3.355981049498393, "tokens_seen": 2921988096 }, { "epoch": 0.77, "learning_rate": 0.00011566236058733852, "loss": 0.0612, "theoretical_loss": 3.3559695119964648, "tokens_seen": 2922119168 }, { "epoch": 0.77, "learning_rate": 0.00011562224183583407, "loss": 0.0617, "theoretical_loss": 3.355957975156939, "tokens_seen": 2922250240 }, { "epoch": 0.77, "learning_rate": 0.00011558212308432963, "loss": 0.0642, "theoretical_loss": 3.355946438979746, "tokens_seen": 2922381312 }, { "epoch": 0.77, "learning_rate": 0.00011554200433282517, "loss": 0.0661, "theoretical_loss": 3.3559349034648194, "tokens_seen": 2922512384 }, { "epoch": 0.77, "learning_rate": 0.00011550188558132071, "loss": 0.0675, "theoretical_loss": 3.3559233686120913, "tokens_seen": 2922643456 }, { "epoch": 0.77, "learning_rate": 0.00011546176682981625, "loss": 0.0623, "theoretical_loss": 3.3559118344214935, "tokens_seen": 2922774528 }, { "epoch": 0.77, "learning_rate": 0.0001154216480783118, "loss": 0.0673, "theoretical_loss": 3.3559003008929587, "tokens_seen": 2922905600 }, { "epoch": 0.77, "learning_rate": 0.00011538152932680736, "loss": 0.0664, "theoretical_loss": 3.3558887680264196, "tokens_seen": 2923036672 }, { "epoch": 0.77, "learning_rate": 0.0001153414105753029, "loss": 0.0645, "theoretical_loss": 3.355877235821807, "tokens_seen": 2923167744 }, { "epoch": 0.77, "learning_rate": 0.00011530129182379844, "loss": 0.0646, "theoretical_loss": 3.355865704279055, "tokens_seen": 2923298816 }, { "epoch": 0.77, "learning_rate": 0.00011526117307229398, "loss": 0.0657, "theoretical_loss": 3.355854173398095, "tokens_seen": 2923429888 }, { "epoch": 0.77, "learning_rate": 0.00011522105432078954, "loss": 0.0653, "theoretical_loss": 3.3558426431788595, "tokens_seen": 2923560960 }, { "epoch": 0.77, "learning_rate": 0.0001151809355692851, "loss": 0.0657, "theoretical_loss": 3.3558311136212806, "tokens_seen": 2923692032 }, { "epoch": 0.77, "learning_rate": 0.00011514081681778063, "loss": 0.065, "theoretical_loss": 3.3558195847252916, "tokens_seen": 2923823104 }, { "epoch": 0.77, "learning_rate": 0.00011510069806627618, "loss": 0.0668, "theoretical_loss": 3.355808056490824, "tokens_seen": 2923954176 }, { "epoch": 0.77, "learning_rate": 0.00011506057931477172, "loss": 0.0655, "theoretical_loss": 3.35579652891781, "tokens_seen": 2924085248 }, { "epoch": 0.77, "learning_rate": 0.00011502046056326727, "loss": 0.0641, "theoretical_loss": 3.3557850020061832, "tokens_seen": 2924216320 }, { "epoch": 0.77, "learning_rate": 0.00011498034181176283, "loss": 0.0632, "theoretical_loss": 3.355773475755875, "tokens_seen": 2924347392 }, { "epoch": 0.77, "learning_rate": 0.00011494022306025837, "loss": 0.0671, "theoretical_loss": 3.355761950166818, "tokens_seen": 2924478464 }, { "epoch": 0.77, "objective/train/advantage_avg": 0.0008818767964839935, "objective/train/docs_used": 1063229, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3021143674850464, "objective/train/original_loss": 1.3021143674850464, "objective/train/theoretical_loss": 3.3557504252389445, "objective/train/tokens_used": 1295134176, "objective/train/value_avg": -0.00922393798828125, "objective/train/value_loss": 0.00026928409351967275, "objective/train/value_max": -3.3736228942871094e-05, "objective/train/value_min": -0.8720703125, "objective/train/value_reward_corr": 0.7354123000081694, "objective/train/value_std": 0.0185699462890625, "objective/train/weight_avg": 1.0010017156600952, "objective/train/weighted_lm_loss": 1.3029780387878418, "objective/train/weights_max": 1.3908377885818481, "objective/train/weights_min": 0.3684675693511963, "theoretical_loss": 3.3557504252389445, "tokens_seen": 2924609536 }, { "epoch": 0.77, "learning_rate": 0.00011490010430875392, "loss": 0.0681, "theoretical_loss": 3.3557504252389445, "tokens_seen": 2924609536 }, { "epoch": 0.77, "learning_rate": 0.00011485998555724945, "loss": 0.0662, "theoretical_loss": 3.355738900972187, "tokens_seen": 2924740608 }, { "epoch": 0.77, "learning_rate": 0.000114819866805745, "loss": 0.0679, "theoretical_loss": 3.3557273773664784, "tokens_seen": 2924871680 }, { "epoch": 0.77, "learning_rate": 0.00011477974805424056, "loss": 0.065, "theoretical_loss": 3.355715854421751, "tokens_seen": 2925002752 }, { "epoch": 0.77, "learning_rate": 0.0001147396293027361, "loss": 0.065, "theoretical_loss": 3.355704332137937, "tokens_seen": 2925133824 }, { "epoch": 0.77, "learning_rate": 0.00011469951055123166, "loss": 0.0651, "theoretical_loss": 3.3556928105149693, "tokens_seen": 2925264896 }, { "epoch": 0.77, "learning_rate": 0.00011465939179972718, "loss": 0.0613, "theoretical_loss": 3.3556812895527797, "tokens_seen": 2925395968 }, { "epoch": 0.77, "learning_rate": 0.00011461927304822274, "loss": 0.066, "theoretical_loss": 3.355669769251301, "tokens_seen": 2925527040 }, { "epoch": 0.77, "learning_rate": 0.0001145791542967183, "loss": 0.0646, "theoretical_loss": 3.355658249610466, "tokens_seen": 2925658112 }, { "epoch": 0.77, "learning_rate": 0.00011453903554521384, "loss": 0.0667, "theoretical_loss": 3.355646730630207, "tokens_seen": 2925789184 }, { "epoch": 0.77, "learning_rate": 0.00011449891679370939, "loss": 0.0631, "theoretical_loss": 3.3556352123104567, "tokens_seen": 2925920256 }, { "epoch": 0.77, "learning_rate": 0.00011445879804220492, "loss": 0.0675, "theoretical_loss": 3.3556236946511473, "tokens_seen": 2926051328 }, { "epoch": 0.77, "learning_rate": 0.00011441867929070047, "loss": 0.0663, "theoretical_loss": 3.355612177652212, "tokens_seen": 2926182400 }, { "epoch": 0.77, "learning_rate": 0.00011437856053919603, "loss": 0.0686, "theoretical_loss": 3.355600661313582, "tokens_seen": 2926313472 }, { "epoch": 0.77, "learning_rate": 0.00011433844178769157, "loss": 0.0665, "theoretical_loss": 3.3555891456351916, "tokens_seen": 2926444544 }, { "epoch": 0.77, "learning_rate": 0.00011429832303618712, "loss": 0.0659, "theoretical_loss": 3.3555776306169722, "tokens_seen": 2926575616 }, { "epoch": 0.77, "learning_rate": 0.00011425820428468265, "loss": 0.0639, "theoretical_loss": 3.355566116258857, "tokens_seen": 2926706688 }, { "epoch": 0.77, "learning_rate": 0.0001142180855331782, "loss": 0.0659, "theoretical_loss": 3.355554602560778, "tokens_seen": 2926837760 }, { "epoch": 0.77, "learning_rate": 0.00011417796678167376, "loss": 0.0657, "theoretical_loss": 3.3555430895226683, "tokens_seen": 2926968832 }, { "epoch": 0.77, "learning_rate": 0.0001141378480301693, "loss": 0.0633, "theoretical_loss": 3.3555315771444603, "tokens_seen": 2927099904 }, { "epoch": 0.77, "learning_rate": 0.00011409772927866486, "loss": 0.0676, "theoretical_loss": 3.3555200654260866, "tokens_seen": 2927230976 }, { "epoch": 0.77, "learning_rate": 0.00011405761052716039, "loss": 0.0652, "theoretical_loss": 3.3555085543674803, "tokens_seen": 2927362048 }, { "epoch": 0.77, "learning_rate": 0.00011401749177565594, "loss": 0.0657, "theoretical_loss": 3.3554970439685734, "tokens_seen": 2927493120 }, { "epoch": 0.77, "learning_rate": 0.0001139773730241515, "loss": 0.0644, "theoretical_loss": 3.355485534229299, "tokens_seen": 2927624192 }, { "epoch": 0.77, "learning_rate": 0.00011393725427264704, "loss": 0.0622, "theoretical_loss": 3.3554740251495896, "tokens_seen": 2927755264 }, { "epoch": 0.77, "objective/train/advantage_avg": 0.000546039140317589, "objective/train/docs_used": 1064246, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3171331882476807, "objective/train/original_loss": 1.3171331882476807, "objective/train/theoretical_loss": 3.3554625167293777, "objective/train/tokens_used": 1298410976, "objective/train/value_avg": -0.00565338134765625, "objective/train/value_loss": 0.00017991142522078007, "objective/train/value_max": -3.451108932495117e-05, "objective/train/value_min": -0.96923828125, "objective/train/value_reward_corr": 0.6964678600700871, "objective/train/value_std": 0.01177978515625, "objective/train/weight_avg": 1.0006215572357178, "objective/train/weighted_lm_loss": 1.3176209926605225, "objective/train/weights_max": 1.281307339668274, "objective/train/weights_min": 0.22952909767627716, "theoretical_loss": 3.3554625167293777, "tokens_seen": 2927886336 }, { "epoch": 0.77, "learning_rate": 0.00011389713552114259, "loss": 0.0676, "theoretical_loss": 3.3554625167293777, "tokens_seen": 2927886336 }, { "epoch": 0.77, "learning_rate": 0.00011385701676963812, "loss": 0.0698, "theoretical_loss": 3.3554510089685965, "tokens_seen": 2928017408 }, { "epoch": 0.77, "learning_rate": 0.00011381689801813367, "loss": 0.0681, "theoretical_loss": 3.355439501867178, "tokens_seen": 2928148480 }, { "epoch": 0.77, "learning_rate": 0.00011377677926662923, "loss": 0.0667, "theoretical_loss": 3.355427995425056, "tokens_seen": 2928279552 }, { "epoch": 0.77, "learning_rate": 0.00011373666051512477, "loss": 0.0631, "theoretical_loss": 3.3554164896421614, "tokens_seen": 2928410624 }, { "epoch": 0.77, "learning_rate": 0.00011369654176362032, "loss": 0.0717, "theoretical_loss": 3.3554049845184286, "tokens_seen": 2928541696 }, { "epoch": 0.77, "learning_rate": 0.00011365642301211585, "loss": 0.0632, "theoretical_loss": 3.35539348005379, "tokens_seen": 2928672768 }, { "epoch": 0.78, "learning_rate": 0.00011361630426061141, "loss": 0.0673, "theoretical_loss": 3.355381976248178, "tokens_seen": 2928803840 }, { "epoch": 0.78, "learning_rate": 0.00011357618550910696, "loss": 0.0692, "theoretical_loss": 3.3553704731015253, "tokens_seen": 2928934912 }, { "epoch": 0.78, "learning_rate": 0.0001135360667576025, "loss": 0.067, "theoretical_loss": 3.355358970613765, "tokens_seen": 2929065984 }, { "epoch": 0.78, "learning_rate": 0.00011349594800609806, "loss": 0.0669, "theoretical_loss": 3.3553474687848297, "tokens_seen": 2929197056 }, { "epoch": 0.78, "learning_rate": 0.0001134558292545936, "loss": 0.0636, "theoretical_loss": 3.355335967614652, "tokens_seen": 2929328128 }, { "epoch": 0.78, "learning_rate": 0.00011341571050308914, "loss": 0.0642, "theoretical_loss": 3.355324467103165, "tokens_seen": 2929459200 }, { "epoch": 0.78, "learning_rate": 0.0001133755917515847, "loss": 0.065, "theoretical_loss": 3.3553129672503017, "tokens_seen": 2929590272 }, { "epoch": 0.78, "learning_rate": 0.00011333547300008024, "loss": 0.064, "theoretical_loss": 3.3553014680559943, "tokens_seen": 2929721344 }, { "epoch": 0.78, "learning_rate": 0.00011329535424857579, "loss": 0.0635, "theoretical_loss": 3.355289969520176, "tokens_seen": 2929852416 }, { "epoch": 0.78, "learning_rate": 0.00011325523549707133, "loss": 0.0665, "theoretical_loss": 3.3552784716427797, "tokens_seen": 2929983488 }, { "epoch": 0.78, "learning_rate": 0.00011321511674556687, "loss": 0.0651, "theoretical_loss": 3.355266974423738, "tokens_seen": 2930114560 }, { "epoch": 0.78, "learning_rate": 0.00011317499799406243, "loss": 0.0678, "theoretical_loss": 3.355255477862984, "tokens_seen": 2930245632 }, { "epoch": 0.78, "learning_rate": 0.00011313487924255797, "loss": 0.0678, "theoretical_loss": 3.3552439819604505, "tokens_seen": 2930376704 }, { "epoch": 0.78, "learning_rate": 0.00011309476049105352, "loss": 0.0667, "theoretical_loss": 3.35523248671607, "tokens_seen": 2930507776 }, { "epoch": 0.78, "learning_rate": 0.00011305464173954908, "loss": 0.0627, "theoretical_loss": 3.355220992129776, "tokens_seen": 2930638848 }, { "epoch": 0.78, "learning_rate": 0.00011301452298804461, "loss": 0.0642, "theoretical_loss": 3.3552094982015013, "tokens_seen": 2930769920 }, { "epoch": 0.78, "learning_rate": 0.00011297440423654016, "loss": 0.066, "theoretical_loss": 3.3551980049311783, "tokens_seen": 2930900992 }, { "epoch": 0.78, "learning_rate": 0.0001129342854850357, "loss": 0.0652, "theoretical_loss": 3.35518651231874, "tokens_seen": 2931032064 }, { "epoch": 0.78, "objective/train/advantage_avg": 0.0003943601914215833, "objective/train/docs_used": 1065435, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4010844230651855, "objective/train/original_loss": 1.4010841846466064, "objective/train/theoretical_loss": 3.35517502036412, "objective/train/tokens_used": 1301687776, "objective/train/value_avg": -0.0067901611328125, "objective/train/value_loss": 0.00016236299416050315, "objective/train/value_max": -2.6285648345947266e-05, "objective/train/value_min": -0.453125, "objective/train/value_reward_corr": 0.7453822016831452, "objective/train/value_std": 0.01470947265625, "objective/train/weight_avg": 1.000468373298645, "objective/train/weighted_lm_loss": 1.4012668132781982, "objective/train/weights_max": 1.4077798128128052, "objective/train/weights_min": 0.38067081570625305, "theoretical_loss": 3.35517502036412, "tokens_seen": 2931163136 }, { "epoch": 0.78, "learning_rate": 0.00011289416673353126, "loss": 0.0645, "theoretical_loss": 3.35517502036412, "tokens_seen": 2931163136 }, { "epoch": 0.78, "learning_rate": 0.00011285404798202681, "loss": 0.0652, "theoretical_loss": 3.3551635290672506, "tokens_seen": 2931294208 }, { "epoch": 0.78, "learning_rate": 0.00011281392923052234, "loss": 0.0645, "theoretical_loss": 3.355152038428065, "tokens_seen": 2931425280 }, { "epoch": 0.78, "learning_rate": 0.0001127738104790179, "loss": 0.0667, "theoretical_loss": 3.3551405484464962, "tokens_seen": 2931556352 }, { "epoch": 0.78, "learning_rate": 0.00011273369172751344, "loss": 0.0624, "theoretical_loss": 3.355129059122477, "tokens_seen": 2931687424 }, { "epoch": 0.78, "learning_rate": 0.00011269357297600899, "loss": 0.061, "theoretical_loss": 3.35511757045594, "tokens_seen": 2931818496 }, { "epoch": 0.78, "learning_rate": 0.00011265345422450455, "loss": 0.0653, "theoretical_loss": 3.3551060824468193, "tokens_seen": 2931949568 }, { "epoch": 0.78, "learning_rate": 0.00011261333547300007, "loss": 0.0634, "theoretical_loss": 3.3550945950950473, "tokens_seen": 2932080640 }, { "epoch": 0.78, "learning_rate": 0.00011257321672149563, "loss": 0.0645, "theoretical_loss": 3.355083108400556, "tokens_seen": 2932211712 }, { "epoch": 0.78, "learning_rate": 0.00011253309796999117, "loss": 0.0631, "theoretical_loss": 3.35507162236328, "tokens_seen": 2932342784 }, { "epoch": 0.78, "learning_rate": 0.00011249297921848673, "loss": 0.0654, "theoretical_loss": 3.3550601369831514, "tokens_seen": 2932473856 }, { "epoch": 0.78, "learning_rate": 0.00011245286046698228, "loss": 0.066, "theoretical_loss": 3.3550486522601037, "tokens_seen": 2932604928 }, { "epoch": 0.78, "learning_rate": 0.00011241274171547782, "loss": 0.0665, "theoretical_loss": 3.3550371681940696, "tokens_seen": 2932736000 }, { "epoch": 0.78, "learning_rate": 0.00011237262296397336, "loss": 0.064, "theoretical_loss": 3.355025684784982, "tokens_seen": 2932867072 }, { "epoch": 0.78, "learning_rate": 0.0001123325042124689, "loss": 0.0621, "theoretical_loss": 3.3550142020327747, "tokens_seen": 2932998144 }, { "epoch": 0.78, "learning_rate": 0.00011229238546096446, "loss": 0.0664, "theoretical_loss": 3.35500271993738, "tokens_seen": 2933129216 }, { "epoch": 0.78, "learning_rate": 0.00011225226670946001, "loss": 0.0609, "theoretical_loss": 3.3549912384987315, "tokens_seen": 2933260288 }, { "epoch": 0.78, "learning_rate": 0.00011221214795795556, "loss": 0.0697, "theoretical_loss": 3.354979757716762, "tokens_seen": 2933391360 }, { "epoch": 0.78, "learning_rate": 0.0001121720292064511, "loss": 0.0621, "theoretical_loss": 3.3549682775914045, "tokens_seen": 2933522432 }, { "epoch": 0.78, "learning_rate": 0.00011213191045494664, "loss": 0.0654, "theoretical_loss": 3.3549567981225925, "tokens_seen": 2933653504 }, { "epoch": 0.78, "learning_rate": 0.00011209179170344219, "loss": 0.068, "theoretical_loss": 3.354945319310259, "tokens_seen": 2933784576 }, { "epoch": 0.78, "learning_rate": 0.00011205167295193775, "loss": 0.069, "theoretical_loss": 3.354933841154337, "tokens_seen": 2933915648 }, { "epoch": 0.78, "learning_rate": 0.00011201155420043329, "loss": 0.0652, "theoretical_loss": 3.3549223636547594, "tokens_seen": 2934046720 }, { "epoch": 0.78, "learning_rate": 0.00011197143544892883, "loss": 0.0631, "theoretical_loss": 3.35491088681146, "tokens_seen": 2934177792 }, { "epoch": 0.78, "learning_rate": 0.00011193131669742437, "loss": 0.062, "theoretical_loss": 3.3548994106243715, "tokens_seen": 2934308864 }, { "epoch": 0.78, "objective/train/advantage_avg": 0.00012145860819146037, "objective/train/docs_used": 1066769, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.356713056564331, "objective/train/original_loss": 1.3567129373550415, "objective/train/theoretical_loss": 3.3548879350934273, "objective/train/tokens_used": 1304964576, "objective/train/value_avg": -0.004367828369140625, "objective/train/value_loss": 0.0001049613201757893, "objective/train/value_max": -2.9325485229492188e-05, "objective/train/value_min": -0.2252197265625, "objective/train/value_reward_corr": 0.5708482940828121, "objective/train/value_std": 0.0071563720703125, "objective/train/weight_avg": 1.0001699924468994, "objective/train/weighted_lm_loss": 1.3564629554748535, "objective/train/weights_max": 1.1621463298797607, "objective/train/weights_min": 0.381368488073349, "theoretical_loss": 3.3548879350934273, "tokens_seen": 2934439936 }, { "epoch": 0.78, "learning_rate": 0.00011189119794591993, "loss": 0.067, "theoretical_loss": 3.3548879350934273, "tokens_seen": 2934439936 }, { "epoch": 0.78, "learning_rate": 0.00011185107919441548, "loss": 0.0647, "theoretical_loss": 3.35487646021856, "tokens_seen": 2934571008 }, { "epoch": 0.78, "learning_rate": 0.00011181096044291102, "loss": 0.0596, "theoretical_loss": 3.3548649859997033, "tokens_seen": 2934702080 }, { "epoch": 0.78, "learning_rate": 0.00011177084169140656, "loss": 0.0647, "theoretical_loss": 3.3548535124367906, "tokens_seen": 2934833152 }, { "epoch": 0.78, "learning_rate": 0.0001117307229399021, "loss": 0.0634, "theoretical_loss": 3.354842039529755, "tokens_seen": 2934964224 }, { "epoch": 0.78, "learning_rate": 0.00011169060418839766, "loss": 0.0675, "theoretical_loss": 3.3548305672785292, "tokens_seen": 2935095296 }, { "epoch": 0.78, "learning_rate": 0.00011165048543689321, "loss": 0.0654, "theoretical_loss": 3.3548190956830473, "tokens_seen": 2935226368 }, { "epoch": 0.78, "learning_rate": 0.00011161036668538876, "loss": 0.0651, "theoretical_loss": 3.3548076247432417, "tokens_seen": 2935357440 }, { "epoch": 0.78, "learning_rate": 0.0001115702479338843, "loss": 0.0651, "theoretical_loss": 3.354796154459046, "tokens_seen": 2935488512 }, { "epoch": 0.78, "learning_rate": 0.00011153012918237984, "loss": 0.0647, "theoretical_loss": 3.3547846848303933, "tokens_seen": 2935619584 }, { "epoch": 0.78, "learning_rate": 0.00011149001043087539, "loss": 0.0669, "theoretical_loss": 3.354773215857217, "tokens_seen": 2935750656 }, { "epoch": 0.78, "learning_rate": 0.00011144989167937095, "loss": 0.0641, "theoretical_loss": 3.3547617475394507, "tokens_seen": 2935881728 }, { "epoch": 0.78, "learning_rate": 0.00011140977292786649, "loss": 0.0668, "theoretical_loss": 3.354750279877027, "tokens_seen": 2936012800 }, { "epoch": 0.78, "learning_rate": 0.00011136965417636203, "loss": 0.0618, "theoretical_loss": 3.35473881286988, "tokens_seen": 2936143872 }, { "epoch": 0.78, "learning_rate": 0.00011132953542485757, "loss": 0.0648, "theoretical_loss": 3.3547273465179424, "tokens_seen": 2936274944 }, { "epoch": 0.78, "learning_rate": 0.00011128941667335313, "loss": 0.0638, "theoretical_loss": 3.3547158808211472, "tokens_seen": 2936406016 }, { "epoch": 0.78, "learning_rate": 0.00011124929792184868, "loss": 0.0646, "theoretical_loss": 3.354704415779429, "tokens_seen": 2936537088 }, { "epoch": 0.78, "learning_rate": 0.00011120917917034422, "loss": 0.0611, "theoretical_loss": 3.35469295139272, "tokens_seen": 2936668160 }, { "epoch": 0.78, "learning_rate": 0.00011116906041883978, "loss": 0.0642, "theoretical_loss": 3.3546814876609536, "tokens_seen": 2936799232 }, { "epoch": 0.78, "learning_rate": 0.0001111289416673353, "loss": 0.0625, "theoretical_loss": 3.354670024584064, "tokens_seen": 2936930304 }, { "epoch": 0.78, "learning_rate": 0.00011108882291583086, "loss": 0.064, "theoretical_loss": 3.354658562161984, "tokens_seen": 2937061376 }, { "epoch": 0.78, "learning_rate": 0.00011104870416432641, "loss": 0.0634, "theoretical_loss": 3.3546471003946468, "tokens_seen": 2937192448 }, { "epoch": 0.78, "learning_rate": 0.00011100858541282196, "loss": 0.0632, "theoretical_loss": 3.354635639281986, "tokens_seen": 2937323520 }, { "epoch": 0.78, "learning_rate": 0.00011096846666131751, "loss": 0.0662, "theoretical_loss": 3.354624178823935, "tokens_seen": 2937454592 }, { "epoch": 0.78, "learning_rate": 0.00011092834790981304, "loss": 0.0657, "theoretical_loss": 3.3546127190204276, "tokens_seen": 2937585664 }, { "epoch": 0.78, "objective/train/advantage_avg": 0.0009938053553923965, "objective/train/docs_used": 1068027, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4413843154907227, "objective/train/original_loss": 1.4413840770721436, "objective/train/theoretical_loss": 3.354601259871396, "objective/train/tokens_used": 1308241376, "objective/train/value_avg": -0.00833892822265625, "objective/train/value_loss": 0.00017810236022341996, "objective/train/value_max": -4.947185516357422e-05, "objective/train/value_min": -0.39306640625, "objective/train/value_reward_corr": 0.6739346790890153, "objective/train/value_std": 0.0141448974609375, "objective/train/weight_avg": 1.0010764598846436, "objective/train/weighted_lm_loss": 1.4431509971618652, "objective/train/weights_max": 1.448595643043518, "objective/train/weights_min": 0.3721221685409546, "theoretical_loss": 3.354601259871396, "tokens_seen": 2937716736 }, { "epoch": 0.78, "learning_rate": 0.0001108882291583086, "loss": 0.0642, "theoretical_loss": 3.354601259871396, "tokens_seen": 2937716736 }, { "epoch": 0.78, "learning_rate": 0.00011084811040680415, "loss": 0.0659, "theoretical_loss": 3.354589801376775, "tokens_seen": 2937847808 }, { "epoch": 0.78, "learning_rate": 0.00011080799165529969, "loss": 0.0616, "theoretical_loss": 3.354578343536497, "tokens_seen": 2937978880 }, { "epoch": 0.78, "learning_rate": 0.00011076787290379524, "loss": 0.0655, "theoretical_loss": 3.3545668863504963, "tokens_seen": 2938109952 }, { "epoch": 0.78, "learning_rate": 0.00011072775415229077, "loss": 0.064, "theoretical_loss": 3.354555429818706, "tokens_seen": 2938241024 }, { "epoch": 0.78, "learning_rate": 0.00011068763540078633, "loss": 0.0637, "theoretical_loss": 3.3545439739410594, "tokens_seen": 2938372096 }, { "epoch": 0.78, "learning_rate": 0.00011064751664928188, "loss": 0.0678, "theoretical_loss": 3.35453251871749, "tokens_seen": 2938503168 }, { "epoch": 0.78, "learning_rate": 0.00011060739789777742, "loss": 0.067, "theoretical_loss": 3.3545210641479315, "tokens_seen": 2938634240 }, { "epoch": 0.78, "learning_rate": 0.00011056727914627298, "loss": 0.0637, "theoretical_loss": 3.3545096102323173, "tokens_seen": 2938765312 }, { "epoch": 0.78, "learning_rate": 0.0001105271603947685, "loss": 0.0606, "theoretical_loss": 3.354498156970581, "tokens_seen": 2938896384 }, { "epoch": 0.78, "learning_rate": 0.00011048704164326406, "loss": 0.0668, "theoretical_loss": 3.354486704362656, "tokens_seen": 2939027456 }, { "epoch": 0.78, "learning_rate": 0.00011044692289175962, "loss": 0.0622, "theoretical_loss": 3.3544752524084753, "tokens_seen": 2939158528 }, { "epoch": 0.78, "learning_rate": 0.00011040680414025516, "loss": 0.0639, "theoretical_loss": 3.354463801107973, "tokens_seen": 2939289600 }, { "epoch": 0.78, "learning_rate": 0.00011036668538875071, "loss": 0.0644, "theoretical_loss": 3.3544523504610826, "tokens_seen": 2939420672 }, { "epoch": 0.78, "learning_rate": 0.00011032656663724624, "loss": 0.0636, "theoretical_loss": 3.354440900467738, "tokens_seen": 2939551744 }, { "epoch": 0.78, "learning_rate": 0.0001102864478857418, "loss": 0.0617, "theoretical_loss": 3.354429451127872, "tokens_seen": 2939682816 }, { "epoch": 0.78, "learning_rate": 0.00011024632913423735, "loss": 0.0632, "theoretical_loss": 3.354418002441419, "tokens_seen": 2939813888 }, { "epoch": 0.78, "learning_rate": 0.00011020621038273289, "loss": 0.0654, "theoretical_loss": 3.354406554408312, "tokens_seen": 2939944960 }, { "epoch": 0.78, "learning_rate": 0.00011016609163122845, "loss": 0.0669, "theoretical_loss": 3.3543951070284845, "tokens_seen": 2940076032 }, { "epoch": 0.78, "learning_rate": 0.00011012597287972397, "loss": 0.0666, "theoretical_loss": 3.3543836603018704, "tokens_seen": 2940207104 }, { "epoch": 0.78, "learning_rate": 0.00011008585412821953, "loss": 0.0642, "theoretical_loss": 3.3543722142284036, "tokens_seen": 2940338176 }, { "epoch": 0.78, "learning_rate": 0.00011004573537671508, "loss": 0.0637, "theoretical_loss": 3.354360768808017, "tokens_seen": 2940469248 }, { "epoch": 0.78, "learning_rate": 0.00011000561662521062, "loss": 0.0634, "theoretical_loss": 3.3543493240406446, "tokens_seen": 2940600320 }, { "epoch": 0.78, "learning_rate": 0.00010996549787370618, "loss": 0.069, "theoretical_loss": 3.35433787992622, "tokens_seen": 2940731392 }, { "epoch": 0.78, "learning_rate": 0.0001099253791222017, "loss": 0.0637, "theoretical_loss": 3.3543264364646768, "tokens_seen": 2940862464 }, { "epoch": 0.78, "objective/train/advantage_avg": 0.00016547272389288992, "objective/train/docs_used": 1069310, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3632683753967285, "objective/train/original_loss": 1.3632681369781494, "objective/train/theoretical_loss": 3.354314993655949, "objective/train/tokens_used": 1311518176, "objective/train/value_avg": -0.0090789794921875, "objective/train/value_loss": 0.0001451668213121593, "objective/train/value_max": -2.4497509002685547e-05, "objective/train/value_min": -0.311279296875, "objective/train/value_reward_corr": 0.7400341808515996, "objective/train/value_std": 0.0135650634765625, "objective/train/weight_avg": 1.0002360343933105, "objective/train/weighted_lm_loss": 1.363125205039978, "objective/train/weights_max": 1.2590364217758179, "objective/train/weights_min": 0.6070769429206848, "theoretical_loss": 3.354314993655949, "tokens_seen": 2940993536 }, { "epoch": 0.78, "learning_rate": 0.00010988526037069726, "loss": 0.0645, "theoretical_loss": 3.354314993655949, "tokens_seen": 2940993536 }, { "epoch": 0.78, "learning_rate": 0.00010984514161919282, "loss": 0.0626, "theoretical_loss": 3.35430355149997, "tokens_seen": 2941124608 }, { "epoch": 0.78, "learning_rate": 0.00010980502286768836, "loss": 0.0685, "theoretical_loss": 3.354292109996673, "tokens_seen": 2941255680 }, { "epoch": 0.78, "learning_rate": 0.00010976490411618391, "loss": 0.0651, "theoretical_loss": 3.3542806691459925, "tokens_seen": 2941386752 }, { "epoch": 0.78, "learning_rate": 0.00010972478536467945, "loss": 0.0625, "theoretical_loss": 3.354269228947862, "tokens_seen": 2941517824 }, { "epoch": 0.78, "learning_rate": 0.000109684666613175, "loss": 0.0672, "theoretical_loss": 3.3542577894022148, "tokens_seen": 2941648896 }, { "epoch": 0.78, "learning_rate": 0.00010964454786167055, "loss": 0.0636, "theoretical_loss": 3.354246350508985, "tokens_seen": 2941779968 }, { "epoch": 0.78, "learning_rate": 0.00010960442911016609, "loss": 0.0638, "theoretical_loss": 3.354234912268106, "tokens_seen": 2941911040 }, { "epoch": 0.78, "learning_rate": 0.00010956431035866165, "loss": 0.0637, "theoretical_loss": 3.354223474679512, "tokens_seen": 2942042112 }, { "epoch": 0.78, "learning_rate": 0.00010952419160715719, "loss": 0.0616, "theoretical_loss": 3.354212037743136, "tokens_seen": 2942173184 }, { "epoch": 0.78, "learning_rate": 0.00010948407285565273, "loss": 0.0652, "theoretical_loss": 3.354200601458913, "tokens_seen": 2942304256 }, { "epoch": 0.78, "learning_rate": 0.00010944395410414828, "loss": 0.0666, "theoretical_loss": 3.3541891658267753, "tokens_seen": 2942435328 }, { "epoch": 0.78, "learning_rate": 0.00010940383535264382, "loss": 0.0643, "theoretical_loss": 3.354177730846658, "tokens_seen": 2942566400 }, { "epoch": 0.78, "learning_rate": 0.00010936371660113938, "loss": 0.0625, "theoretical_loss": 3.3541662965184935, "tokens_seen": 2942697472 }, { "epoch": 0.78, "learning_rate": 0.00010932359784963492, "loss": 0.0671, "theoretical_loss": 3.354154862842217, "tokens_seen": 2942828544 }, { "epoch": 0.78, "learning_rate": 0.00010928347909813046, "loss": 0.0635, "theoretical_loss": 3.3541434298177615, "tokens_seen": 2942959616 }, { "epoch": 0.78, "learning_rate": 0.00010924336034662602, "loss": 0.0654, "theoretical_loss": 3.354131997445061, "tokens_seen": 2943090688 }, { "epoch": 0.78, "learning_rate": 0.00010920324159512156, "loss": 0.0653, "theoretical_loss": 3.354120565724049, "tokens_seen": 2943221760 }, { "epoch": 0.78, "learning_rate": 0.00010916312284361711, "loss": 0.0645, "theoretical_loss": 3.3541091346546597, "tokens_seen": 2943352832 }, { "epoch": 0.78, "learning_rate": 0.00010912300409211267, "loss": 0.0645, "theoretical_loss": 3.354097704236827, "tokens_seen": 2943483904 }, { "epoch": 0.78, "learning_rate": 0.0001090828853406082, "loss": 0.0627, "theoretical_loss": 3.3540862744704842, "tokens_seen": 2943614976 }, { "epoch": 0.78, "learning_rate": 0.00010904276658910375, "loss": 0.0662, "theoretical_loss": 3.354074845355566, "tokens_seen": 2943746048 }, { "epoch": 0.78, "learning_rate": 0.00010900264783759929, "loss": 0.0692, "theoretical_loss": 3.3540634168920063, "tokens_seen": 2943877120 }, { "epoch": 0.78, "learning_rate": 0.00010896252908609485, "loss": 0.0659, "theoretical_loss": 3.354051989079738, "tokens_seen": 2944008192 }, { "epoch": 0.78, "learning_rate": 0.0001089224103345904, "loss": 0.0647, "theoretical_loss": 3.354040561918695, "tokens_seen": 2944139264 }, { "epoch": 0.78, "objective/train/advantage_avg": 0.00018057593842968345, "objective/train/docs_used": 1070538, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.196791648864746, "objective/train/original_loss": 1.1967915296554565, "objective/train/theoretical_loss": 3.3540291354088123, "objective/train/tokens_used": 1314794976, "objective/train/value_avg": -0.00799560546875, "objective/train/value_loss": 0.0002412875328445807, "objective/train/value_max": -2.2470951080322266e-05, "objective/train/value_min": -0.568359375, "objective/train/value_reward_corr": 0.7857869043217764, "objective/train/value_std": 0.01910400390625, "objective/train/weight_avg": 1.0002779960632324, "objective/train/weighted_lm_loss": 1.1968539953231812, "objective/train/weights_max": 1.1173384189605713, "objective/train/weights_min": 0.22658516466617584, "theoretical_loss": 3.3540291354088123, "tokens_seen": 2944270336 }, { "epoch": 0.78, "learning_rate": 0.00010888229158308593, "loss": 0.0602, "theoretical_loss": 3.3540291354088123, "tokens_seen": 2944270336 }, { "epoch": 0.78, "learning_rate": 0.00010884217283158148, "loss": 0.065, "theoretical_loss": 3.354017709550023, "tokens_seen": 2944401408 }, { "epoch": 0.78, "learning_rate": 0.00010880205408007702, "loss": 0.0635, "theoretical_loss": 3.354006284342262, "tokens_seen": 2944532480 }, { "epoch": 0.78, "learning_rate": 0.00010876193532857258, "loss": 0.065, "theoretical_loss": 3.353994859785461, "tokens_seen": 2944663552 }, { "epoch": 0.78, "learning_rate": 0.00010872181657706813, "loss": 0.0655, "theoretical_loss": 3.353983435879557, "tokens_seen": 2944794624 }, { "epoch": 0.78, "learning_rate": 0.00010868169782556366, "loss": 0.0643, "theoretical_loss": 3.353972012624481, "tokens_seen": 2944925696 }, { "epoch": 0.78, "learning_rate": 0.00010864157907405922, "loss": 0.0653, "theoretical_loss": 3.353960590020169, "tokens_seen": 2945056768 }, { "epoch": 0.78, "learning_rate": 0.00010860146032255476, "loss": 0.064, "theoretical_loss": 3.3539491680665545, "tokens_seen": 2945187840 }, { "epoch": 0.79, "learning_rate": 0.00010856134157105031, "loss": 0.0659, "theoretical_loss": 3.3539377467635707, "tokens_seen": 2945318912 }, { "epoch": 0.79, "learning_rate": 0.00010852122281954587, "loss": 0.062, "theoretical_loss": 3.3539263261111523, "tokens_seen": 2945449984 }, { "epoch": 0.79, "learning_rate": 0.00010848110406804141, "loss": 0.0627, "theoretical_loss": 3.353914906109233, "tokens_seen": 2945581056 }, { "epoch": 0.79, "learning_rate": 0.00010844098531653695, "loss": 0.0686, "theoretical_loss": 3.3539034867577473, "tokens_seen": 2945712128 }, { "epoch": 0.79, "learning_rate": 0.00010840086656503249, "loss": 0.0635, "theoretical_loss": 3.353892068056629, "tokens_seen": 2945843200 }, { "epoch": 0.79, "learning_rate": 0.00010836074781352805, "loss": 0.0621, "theoretical_loss": 3.353880650005811, "tokens_seen": 2945974272 }, { "epoch": 0.79, "learning_rate": 0.0001083206290620236, "loss": 0.0627, "theoretical_loss": 3.3538692326052293, "tokens_seen": 2946105344 }, { "epoch": 0.79, "learning_rate": 0.00010828051031051914, "loss": 0.0643, "theoretical_loss": 3.3538578158548167, "tokens_seen": 2946236416 }, { "epoch": 0.79, "learning_rate": 0.00010824039155901468, "loss": 0.063, "theoretical_loss": 3.3538463997545076, "tokens_seen": 2946367488 }, { "epoch": 0.79, "learning_rate": 0.00010820027280751023, "loss": 0.0614, "theoretical_loss": 3.3538349843042354, "tokens_seen": 2946498560 }, { "epoch": 0.79, "learning_rate": 0.00010816015405600578, "loss": 0.0706, "theoretical_loss": 3.3538235695039353, "tokens_seen": 2946629632 }, { "epoch": 0.79, "learning_rate": 0.00010812003530450134, "loss": 0.0624, "theoretical_loss": 3.3538121553535407, "tokens_seen": 2946760704 }, { "epoch": 0.79, "learning_rate": 0.00010807991655299688, "loss": 0.0659, "theoretical_loss": 3.353800741852986, "tokens_seen": 2946891776 }, { "epoch": 0.79, "learning_rate": 0.00010803979780149242, "loss": 0.068, "theoretical_loss": 3.353789329002205, "tokens_seen": 2947022848 }, { "epoch": 0.79, "learning_rate": 0.00010799967904998796, "loss": 0.0668, "theoretical_loss": 3.353777916801132, "tokens_seen": 2947153920 }, { "epoch": 0.79, "learning_rate": 0.00010795956029848351, "loss": 0.066, "theoretical_loss": 3.3537665052497005, "tokens_seen": 2947284992 }, { "epoch": 0.79, "learning_rate": 0.00010791944154697907, "loss": 0.0638, "theoretical_loss": 3.3537550943478456, "tokens_seen": 2947416064 }, { "epoch": 0.79, "objective/train/advantage_avg": -0.0001552351750433445, "objective/train/docs_used": 1071710, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3176791667938232, "objective/train/original_loss": 1.3176791667938232, "objective/train/theoretical_loss": 3.353743684095501, "objective/train/tokens_used": 1318071776, "objective/train/value_avg": -0.008697509765625, "objective/train/value_loss": 0.00048761151265352964, "objective/train/value_max": -2.6285648345947266e-05, "objective/train/value_min": -0.8115234375, "objective/train/value_reward_corr": 0.6224587788563438, "objective/train/value_std": 0.0175323486328125, "objective/train/weight_avg": 1.0000568628311157, "objective/train/weighted_lm_loss": 1.3177754878997803, "objective/train/weights_max": 2.1628236770629883, "objective/train/weights_min": 0.22375836968421936, "theoretical_loss": 3.353743684095501, "tokens_seen": 2947547136 }, { "epoch": 0.79, "learning_rate": 0.00010787932279547461, "loss": 0.0641, "theoretical_loss": 3.353743684095501, "tokens_seen": 2947547136 }, { "epoch": 0.79, "learning_rate": 0.00010783920404397015, "loss": 0.0667, "theoretical_loss": 3.353732274492601, "tokens_seen": 2947678208 }, { "epoch": 0.79, "learning_rate": 0.00010779908529246569, "loss": 0.0664, "theoretical_loss": 3.3537208655390796, "tokens_seen": 2947809280 }, { "epoch": 0.79, "learning_rate": 0.00010775896654096125, "loss": 0.0642, "theoretical_loss": 3.3537094572348707, "tokens_seen": 2947940352 }, { "epoch": 0.79, "learning_rate": 0.0001077188477894568, "loss": 0.0634, "theoretical_loss": 3.353698049579909, "tokens_seen": 2948071424 }, { "epoch": 0.79, "learning_rate": 0.00010767872903795234, "loss": 0.0646, "theoretical_loss": 3.3536866425741287, "tokens_seen": 2948202496 }, { "epoch": 0.79, "learning_rate": 0.00010763861028644788, "loss": 0.0608, "theoretical_loss": 3.3536752362174633, "tokens_seen": 2948333568 }, { "epoch": 0.79, "learning_rate": 0.00010759849153494343, "loss": 0.0629, "theoretical_loss": 3.353663830509848, "tokens_seen": 2948464640 }, { "epoch": 0.79, "learning_rate": 0.00010755837278343898, "loss": 0.0606, "theoretical_loss": 3.3536524254512163, "tokens_seen": 2948595712 }, { "epoch": 0.79, "learning_rate": 0.00010751825403193454, "loss": 0.062, "theoretical_loss": 3.3536410210415024, "tokens_seen": 2948726784 }, { "epoch": 0.79, "learning_rate": 0.00010747813528043008, "loss": 0.0622, "theoretical_loss": 3.353629617280641, "tokens_seen": 2948857856 }, { "epoch": 0.79, "learning_rate": 0.00010743801652892563, "loss": 0.0653, "theoretical_loss": 3.353618214168566, "tokens_seen": 2948988928 }, { "epoch": 0.79, "learning_rate": 0.00010739789777742116, "loss": 0.0597, "theoretical_loss": 3.3536068117052116, "tokens_seen": 2949120000 }, { "epoch": 0.79, "learning_rate": 0.00010735777902591671, "loss": 0.0655, "theoretical_loss": 3.3535954098905125, "tokens_seen": 2949251072 }, { "epoch": 0.79, "learning_rate": 0.00010731766027441227, "loss": 0.0599, "theoretical_loss": 3.3535840087244027, "tokens_seen": 2949382144 }, { "epoch": 0.79, "learning_rate": 0.00010727754152290781, "loss": 0.0678, "theoretical_loss": 3.3535726082068162, "tokens_seen": 2949513216 }, { "epoch": 0.79, "learning_rate": 0.00010723742277140337, "loss": 0.0652, "theoretical_loss": 3.3535612083376876, "tokens_seen": 2949644288 }, { "epoch": 0.79, "learning_rate": 0.00010719730401989889, "loss": 0.066, "theoretical_loss": 3.3535498091169513, "tokens_seen": 2949775360 }, { "epoch": 0.79, "learning_rate": 0.00010715718526839445, "loss": 0.0647, "theoretical_loss": 3.353538410544542, "tokens_seen": 2949906432 }, { "epoch": 0.79, "learning_rate": 0.00010711706651689, "loss": 0.0651, "theoretical_loss": 3.353527012620393, "tokens_seen": 2950037504 }, { "epoch": 0.79, "learning_rate": 0.00010707694776538554, "loss": 0.0631, "theoretical_loss": 3.3535156153444388, "tokens_seen": 2950168576 }, { "epoch": 0.79, "learning_rate": 0.0001070368290138811, "loss": 0.063, "theoretical_loss": 3.3535042187166146, "tokens_seen": 2950299648 }, { "epoch": 0.79, "learning_rate": 0.00010699671026237663, "loss": 0.0662, "theoretical_loss": 3.353492822736854, "tokens_seen": 2950430720 }, { "epoch": 0.79, "learning_rate": 0.00010695659151087218, "loss": 0.0619, "theoretical_loss": 3.3534814274050917, "tokens_seen": 2950561792 }, { "epoch": 0.79, "learning_rate": 0.00010691647275936774, "loss": 0.065, "theoretical_loss": 3.353470032721262, "tokens_seen": 2950692864 }, { "epoch": 0.79, "objective/train/advantage_avg": -0.0012217030161991715, "objective/train/docs_used": 1072835, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.1874449253082275, "objective/train/original_loss": 1.1874449253082275, "objective/train/theoretical_loss": 3.353458638685299, "objective/train/tokens_used": 1321348576, "objective/train/value_avg": -0.00634765625, "objective/train/value_loss": 0.0002577314735390246, "objective/train/value_max": -2.390146255493164e-05, "objective/train/value_min": -0.5322265625, "objective/train/value_reward_corr": 0.5944878394853538, "objective/train/value_std": 0.01140594482421875, "objective/train/weight_avg": 0.9988889098167419, "objective/train/weighted_lm_loss": 1.186848759651184, "objective/train/weights_max": 1.3819372653961182, "objective/train/weights_min": 0.3679875433444977, "theoretical_loss": 3.353458638685299, "tokens_seen": 2950823936 }, { "epoch": 0.79, "learning_rate": 0.00010687635400786328, "loss": 0.0623, "theoretical_loss": 3.353458638685299, "tokens_seen": 2950823936 }, { "epoch": 0.79, "learning_rate": 0.00010683623525635883, "loss": 0.0635, "theoretical_loss": 3.3534472452971373, "tokens_seen": 2950955008 }, { "epoch": 0.79, "learning_rate": 0.00010679611650485436, "loss": 0.0652, "theoretical_loss": 3.3534358525567116, "tokens_seen": 2951086080 }, { "epoch": 0.79, "learning_rate": 0.00010675599775334991, "loss": 0.0637, "theoretical_loss": 3.3534244604639563, "tokens_seen": 2951217152 }, { "epoch": 0.79, "learning_rate": 0.00010671587900184547, "loss": 0.063, "theoretical_loss": 3.353413069018805, "tokens_seen": 2951348224 }, { "epoch": 0.79, "learning_rate": 0.00010667576025034101, "loss": 0.0655, "theoretical_loss": 3.353401678221193, "tokens_seen": 2951479296 }, { "epoch": 0.79, "learning_rate": 0.00010663564149883657, "loss": 0.0691, "theoretical_loss": 3.353390288071054, "tokens_seen": 2951610368 }, { "epoch": 0.79, "learning_rate": 0.0001065955227473321, "loss": 0.0641, "theoretical_loss": 3.3533788985683235, "tokens_seen": 2951741440 }, { "epoch": 0.79, "learning_rate": 0.00010655540399582765, "loss": 0.0666, "theoretical_loss": 3.3533675097129354, "tokens_seen": 2951872512 }, { "epoch": 0.79, "learning_rate": 0.0001065152852443232, "loss": 0.0635, "theoretical_loss": 3.3533561215048233, "tokens_seen": 2952003584 }, { "epoch": 0.79, "learning_rate": 0.00010647516649281874, "loss": 0.0705, "theoretical_loss": 3.3533447339439233, "tokens_seen": 2952134656 }, { "epoch": 0.79, "learning_rate": 0.0001064350477413143, "loss": 0.067, "theoretical_loss": 3.3533333470301683, "tokens_seen": 2952265728 }, { "epoch": 0.79, "learning_rate": 0.00010639492898980983, "loss": 0.0676, "theoretical_loss": 3.353321960763494, "tokens_seen": 2952396800 }, { "epoch": 0.79, "learning_rate": 0.00010635481023830538, "loss": 0.0666, "theoretical_loss": 3.3533105751438343, "tokens_seen": 2952527872 }, { "epoch": 0.79, "learning_rate": 0.00010631469148680094, "loss": 0.062, "theoretical_loss": 3.353299190171124, "tokens_seen": 2952658944 }, { "epoch": 0.79, "learning_rate": 0.00010627457273529648, "loss": 0.0646, "theoretical_loss": 3.3532878058452975, "tokens_seen": 2952790016 }, { "epoch": 0.79, "learning_rate": 0.00010623445398379203, "loss": 0.0647, "theoretical_loss": 3.353276422166289, "tokens_seen": 2952921088 }, { "epoch": 0.79, "learning_rate": 0.00010619433523228756, "loss": 0.0662, "theoretical_loss": 3.3532650391340337, "tokens_seen": 2953052160 }, { "epoch": 0.79, "learning_rate": 0.00010615421648078312, "loss": 0.0657, "theoretical_loss": 3.353253656748466, "tokens_seen": 2953183232 }, { "epoch": 0.79, "learning_rate": 0.00010611409772927867, "loss": 0.059, "theoretical_loss": 3.35324227500952, "tokens_seen": 2953314304 }, { "epoch": 0.79, "learning_rate": 0.00010607397897777421, "loss": 0.0645, "theoretical_loss": 3.3532308939171305, "tokens_seen": 2953445376 }, { "epoch": 0.79, "learning_rate": 0.00010603386022626977, "loss": 0.0657, "theoretical_loss": 3.353219513471232, "tokens_seen": 2953576448 }, { "epoch": 0.79, "learning_rate": 0.00010599374147476531, "loss": 0.0661, "theoretical_loss": 3.3532081336717594, "tokens_seen": 2953707520 }, { "epoch": 0.79, "learning_rate": 0.00010595362272326085, "loss": 0.0652, "theoretical_loss": 3.3531967545186467, "tokens_seen": 2953838592 }, { "epoch": 0.79, "learning_rate": 0.0001059135039717564, "loss": 0.0651, "theoretical_loss": 3.3531853760118295, "tokens_seen": 2953969664 }, { "epoch": 0.79, "objective/train/advantage_avg": 7.58013193262741e-05, "objective/train/docs_used": 1074062, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3037848472595215, "objective/train/original_loss": 1.3037848472595215, "objective/train/theoretical_loss": 3.3531739981512416, "objective/train/tokens_used": 1324625376, "objective/train/value_avg": -0.004360198974609375, "objective/train/value_loss": 9.564879292156547e-05, "objective/train/value_max": -4.13060188293457e-05, "objective/train/value_min": -0.2142333984375, "objective/train/value_reward_corr": 0.5884070371111936, "objective/train/value_std": 0.007732391357421875, "objective/train/weight_avg": 1.0001190900802612, "objective/train/weighted_lm_loss": 1.303444266319275, "objective/train/weights_max": 1.1411995887756348, "objective/train/weights_min": 0.36930587887763977, "theoretical_loss": 3.3531739981512416, "tokens_seen": 2954100736 }, { "epoch": 0.79, "learning_rate": 0.00010587338522025195, "loss": 0.068, "theoretical_loss": 3.3531739981512416, "tokens_seen": 2954100736 }, { "epoch": 0.79, "learning_rate": 0.0001058332664687475, "loss": 0.062, "theoretical_loss": 3.3531626209368177, "tokens_seen": 2954231808 }, { "epoch": 0.79, "learning_rate": 0.00010579314771724304, "loss": 0.068, "theoretical_loss": 3.353151244368493, "tokens_seen": 2954362880 }, { "epoch": 0.79, "learning_rate": 0.00010575302896573858, "loss": 0.0654, "theoretical_loss": 3.3531398684462017, "tokens_seen": 2954493952 }, { "epoch": 0.79, "learning_rate": 0.00010571291021423414, "loss": 0.0673, "theoretical_loss": 3.353128493169878, "tokens_seen": 2954625024 }, { "epoch": 0.79, "learning_rate": 0.00010567279146272968, "loss": 0.0672, "theoretical_loss": 3.3531171185394575, "tokens_seen": 2954756096 }, { "epoch": 0.79, "learning_rate": 0.00010563267271122523, "loss": 0.0654, "theoretical_loss": 3.353105744554875, "tokens_seen": 2954887168 }, { "epoch": 0.79, "learning_rate": 0.00010559255395972077, "loss": 0.066, "theoretical_loss": 3.3530943712160637, "tokens_seen": 2955018240 }, { "epoch": 0.79, "learning_rate": 0.00010555243520821632, "loss": 0.0682, "theoretical_loss": 3.3530829985229595, "tokens_seen": 2955149312 }, { "epoch": 0.79, "learning_rate": 0.00010551231645671187, "loss": 0.0683, "theoretical_loss": 3.3530716264754967, "tokens_seen": 2955280384 }, { "epoch": 0.79, "learning_rate": 0.00010547219770520741, "loss": 0.0682, "theoretical_loss": 3.3530602550736104, "tokens_seen": 2955411456 }, { "epoch": 0.79, "learning_rate": 0.00010543207895370297, "loss": 0.0657, "theoretical_loss": 3.3530488843172352, "tokens_seen": 2955542528 }, { "epoch": 0.79, "learning_rate": 0.00010539196020219851, "loss": 0.0577, "theoretical_loss": 3.3530375142063056, "tokens_seen": 2955673600 }, { "epoch": 0.79, "learning_rate": 0.00010535184145069405, "loss": 0.0649, "theoretical_loss": 3.353026144740756, "tokens_seen": 2955804672 }, { "epoch": 0.79, "learning_rate": 0.0001053117226991896, "loss": 0.0623, "theoretical_loss": 3.353014775920522, "tokens_seen": 2955935744 }, { "epoch": 0.79, "learning_rate": 0.00010527160394768515, "loss": 0.0658, "theoretical_loss": 3.353003407745538, "tokens_seen": 2956066816 }, { "epoch": 0.79, "learning_rate": 0.0001052314851961807, "loss": 0.0661, "theoretical_loss": 3.3529920402157387, "tokens_seen": 2956197888 }, { "epoch": 0.79, "learning_rate": 0.00010519136644467624, "loss": 0.0656, "theoretical_loss": 3.3529806733310585, "tokens_seen": 2956328960 }, { "epoch": 0.79, "learning_rate": 0.00010515124769317178, "loss": 0.0641, "theoretical_loss": 3.352969307091433, "tokens_seen": 2956460032 }, { "epoch": 0.79, "learning_rate": 0.00010511112894166734, "loss": 0.0674, "theoretical_loss": 3.3529579414967965, "tokens_seen": 2956591104 }, { "epoch": 0.79, "learning_rate": 0.00010507101019016288, "loss": 0.0601, "theoretical_loss": 3.352946576547084, "tokens_seen": 2956722176 }, { "epoch": 0.79, "learning_rate": 0.00010503089143865843, "loss": 0.0618, "theoretical_loss": 3.35293521224223, "tokens_seen": 2956853248 }, { "epoch": 0.79, "learning_rate": 0.00010499077268715399, "loss": 0.0625, "theoretical_loss": 3.3529238485821695, "tokens_seen": 2956984320 }, { "epoch": 0.79, "learning_rate": 0.00010495065393564952, "loss": 0.0658, "theoretical_loss": 3.3529124855668377, "tokens_seen": 2957115392 }, { "epoch": 0.79, "learning_rate": 0.00010491053518414507, "loss": 0.0668, "theoretical_loss": 3.352901123196169, "tokens_seen": 2957246464 }, { "epoch": 0.79, "objective/train/advantage_avg": 0.00029492005705833435, "objective/train/docs_used": 1075270, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.306082844734192, "objective/train/original_loss": 1.3060829639434814, "objective/train/theoretical_loss": 3.3528897614700983, "objective/train/tokens_used": 1327902176, "objective/train/value_avg": -0.006473541259765625, "objective/train/value_loss": 0.00017134232621174306, "objective/train/value_max": -1.7940998077392578e-05, "objective/train/value_min": -0.6865234375, "objective/train/value_reward_corr": 0.7451941069902409, "objective/train/value_std": 0.0160369873046875, "objective/train/weight_avg": 1.0003772974014282, "objective/train/weighted_lm_loss": 1.3055616617202759, "objective/train/weights_max": 1.5356355905532837, "objective/train/weights_min": 0.5516515374183655, "theoretical_loss": 3.3528897614700983, "tokens_seen": 2957377536 }, { "epoch": 0.79, "learning_rate": 0.00010487041643264061, "loss": 0.0655, "theoretical_loss": 3.3528897614700983, "tokens_seen": 2957377536 }, { "epoch": 0.79, "learning_rate": 0.00010483029768113617, "loss": 0.0648, "theoretical_loss": 3.3528784003885606, "tokens_seen": 2957508608 }, { "epoch": 0.79, "learning_rate": 0.00010479017892963172, "loss": 0.0629, "theoretical_loss": 3.352867039951491, "tokens_seen": 2957639680 }, { "epoch": 0.79, "learning_rate": 0.00010475006017812726, "loss": 0.0645, "theoretical_loss": 3.3528556801588234, "tokens_seen": 2957770752 }, { "epoch": 0.79, "learning_rate": 0.0001047099414266228, "loss": 0.0646, "theoretical_loss": 3.352844321010494, "tokens_seen": 2957901824 }, { "epoch": 0.79, "learning_rate": 0.00010466982267511835, "loss": 0.0629, "theoretical_loss": 3.3528329625064366, "tokens_seen": 2958032896 }, { "epoch": 0.79, "learning_rate": 0.0001046297039236139, "loss": 0.064, "theoretical_loss": 3.352821604646587, "tokens_seen": 2958163968 }, { "epoch": 0.79, "learning_rate": 0.00010458958517210946, "loss": 0.0643, "theoretical_loss": 3.3528102474308796, "tokens_seen": 2958295040 }, { "epoch": 0.79, "learning_rate": 0.000104549466420605, "loss": 0.0648, "theoretical_loss": 3.3527988908592494, "tokens_seen": 2958426112 }, { "epoch": 0.79, "learning_rate": 0.00010450934766910054, "loss": 0.0646, "theoretical_loss": 3.3527875349316316, "tokens_seen": 2958557184 }, { "epoch": 0.79, "learning_rate": 0.00010446922891759608, "loss": 0.0633, "theoretical_loss": 3.352776179647961, "tokens_seen": 2958688256 }, { "epoch": 0.79, "learning_rate": 0.00010442911016609163, "loss": 0.0637, "theoretical_loss": 3.3527648250081725, "tokens_seen": 2958819328 }, { "epoch": 0.79, "learning_rate": 0.00010438899141458719, "loss": 0.0666, "theoretical_loss": 3.3527534710122007, "tokens_seen": 2958950400 }, { "epoch": 0.79, "learning_rate": 0.00010434887266308273, "loss": 0.0665, "theoretical_loss": 3.3527421176599814, "tokens_seen": 2959081472 }, { "epoch": 0.79, "learning_rate": 0.00010430875391157827, "loss": 0.0627, "theoretical_loss": 3.352730764951449, "tokens_seen": 2959212544 }, { "epoch": 0.79, "learning_rate": 0.00010426863516007381, "loss": 0.0661, "theoretical_loss": 3.3527194128865387, "tokens_seen": 2959343616 }, { "epoch": 0.79, "learning_rate": 0.00010422851640856937, "loss": 0.065, "theoretical_loss": 3.3527080614651856, "tokens_seen": 2959474688 }, { "epoch": 0.79, "learning_rate": 0.00010418839765706492, "loss": 0.0642, "theoretical_loss": 3.352696710687324, "tokens_seen": 2959605760 }, { "epoch": 0.79, "learning_rate": 0.00010414827890556046, "loss": 0.0648, "theoretical_loss": 3.35268536055289, "tokens_seen": 2959736832 }, { "epoch": 0.79, "learning_rate": 0.000104108160154056, "loss": 0.0648, "theoretical_loss": 3.352674011061818, "tokens_seen": 2959867904 }, { "epoch": 0.79, "learning_rate": 0.00010406804140255155, "loss": 0.0678, "theoretical_loss": 3.352662662214043, "tokens_seen": 2959998976 }, { "epoch": 0.79, "learning_rate": 0.0001040279226510471, "loss": 0.0673, "theoretical_loss": 3.352651314009501, "tokens_seen": 2960130048 }, { "epoch": 0.79, "learning_rate": 0.00010398780389954266, "loss": 0.0637, "theoretical_loss": 3.3526399664481255, "tokens_seen": 2960261120 }, { "epoch": 0.79, "learning_rate": 0.0001039476851480382, "loss": 0.0651, "theoretical_loss": 3.3526286195298525, "tokens_seen": 2960392192 }, { "epoch": 0.79, "learning_rate": 0.00010390756639653374, "loss": 0.0649, "theoretical_loss": 3.352617273254617, "tokens_seen": 2960523264 }, { "debugging/Compilability": 1.0, "debugging/distinct-1-grams": 0.7453161420790304, "debugging/entropy-1-grams": 5.098414579318759, "debugging/length": 436.125, "debugging/num_segments": 8, "debugging/raw_token_scores_avg": 0.004281158559024334, "debugging/raw_token_scores_std": 0.014509525150060654, "debugging/score": 0.008938875309987073, "debugging/score_std": 0.00792166152187058, "epoch": 0.79, "objective/train/advantage_avg": -9.591884008841589e-05, "objective/train/docs_used": 1076444, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3262734413146973, "objective/train/original_loss": 1.3262732028961182, "objective/train/theoretical_loss": 3.352605927622354, "objective/train/tokens_used": 1331178976, "objective/train/value_avg": -0.004180908203125, "objective/train/value_loss": 0.00014841899974271655, "objective/train/value_max": -9.298324584960938e-06, "objective/train/value_min": -0.196044921875, "objective/train/value_reward_corr": 0.5455001637918214, "objective/train/value_std": 0.007106781005859375, "objective/train/weight_avg": 0.9999665021896362, "objective/train/weighted_lm_loss": 1.326359510421753, "objective/train/weights_max": 1.0618524551391602, "objective/train/weights_min": 0.3683614730834961, "theoretical_loss": 3.352605927622354, "tokens_seen": 2960654336 }, { "epoch": 0.79, "learning_rate": 0.00010386744764502928, "loss": 0.0684, "theoretical_loss": 3.352605927622354, "tokens_seen": 2960654336 }, { "epoch": 0.79, "learning_rate": 0.00010382732889352484, "loss": 0.0621, "theoretical_loss": 3.3525945826329986, "tokens_seen": 2960785408 }, { "epoch": 0.79, "learning_rate": 0.00010378721014202039, "loss": 0.0621, "theoretical_loss": 3.352583238286486, "tokens_seen": 2960916480 }, { "epoch": 0.79, "learning_rate": 0.00010374709139051593, "loss": 0.0639, "theoretical_loss": 3.3525718945827516, "tokens_seen": 2961047552 }, { "epoch": 0.79, "learning_rate": 0.00010370697263901147, "loss": 0.0631, "theoretical_loss": 3.35256055152173, "tokens_seen": 2961178624 }, { "epoch": 0.79, "learning_rate": 0.00010366685388750701, "loss": 0.0597, "theoretical_loss": 3.3525492091033566, "tokens_seen": 2961309696 }, { "epoch": 0.79, "learning_rate": 0.00010362673513600257, "loss": 0.0672, "theoretical_loss": 3.352537867327566, "tokens_seen": 2961440768 }, { "epoch": 0.79, "learning_rate": 0.00010358661638449812, "loss": 0.0651, "theoretical_loss": 3.3525265261942945, "tokens_seen": 2961571840 }, { "epoch": 0.79, "learning_rate": 0.00010354649763299366, "loss": 0.0664, "theoretical_loss": 3.3525151857034765, "tokens_seen": 2961702912 }, { "epoch": 0.8, "learning_rate": 0.00010350637888148922, "loss": 0.0642, "theoretical_loss": 3.352503845855047, "tokens_seen": 2961833984 }, { "epoch": 0.8, "learning_rate": 0.00010346626012998475, "loss": 0.0647, "theoretical_loss": 3.3524925066489417, "tokens_seen": 2961965056 }, { "epoch": 0.8, "learning_rate": 0.0001034261413784803, "loss": 0.0607, "theoretical_loss": 3.352481168085095, "tokens_seen": 2962096128 }, { "epoch": 0.8, "learning_rate": 0.00010338602262697586, "loss": 0.0684, "theoretical_loss": 3.3524698301634435, "tokens_seen": 2962227200 }, { "epoch": 0.8, "learning_rate": 0.0001033459038754714, "loss": 0.0649, "theoretical_loss": 3.352458492883921, "tokens_seen": 2962358272 }, { "epoch": 0.8, "learning_rate": 0.00010330578512396695, "loss": 0.0642, "theoretical_loss": 3.352447156246464, "tokens_seen": 2962489344 }, { "epoch": 0.8, "learning_rate": 0.00010326566637246248, "loss": 0.0647, "theoretical_loss": 3.3524358202510065, "tokens_seen": 2962620416 }, { "epoch": 0.8, "learning_rate": 0.00010322554762095804, "loss": 0.0656, "theoretical_loss": 3.3524244848974845, "tokens_seen": 2962751488 }, { "epoch": 0.8, "learning_rate": 0.00010318542886945359, "loss": 0.0682, "theoretical_loss": 3.352413150185833, "tokens_seen": 2962882560 }, { "epoch": 0.8, "learning_rate": 0.00010314531011794913, "loss": 0.0629, "theoretical_loss": 3.352401816115987, "tokens_seen": 2963013632 }, { "epoch": 0.8, "learning_rate": 0.00010310519136644469, "loss": 0.0637, "theoretical_loss": 3.3523904826878823, "tokens_seen": 2963144704 }, { "epoch": 0.8, "learning_rate": 0.00010306507261494021, "loss": 0.0626, "theoretical_loss": 3.352379149901454, "tokens_seen": 2963275776 }, { "epoch": 0.8, "learning_rate": 0.00010302495386343577, "loss": 0.0654, "theoretical_loss": 3.3523678177566376, "tokens_seen": 2963406848 }, { "epoch": 0.8, "learning_rate": 0.00010298483511193132, "loss": 0.0619, "theoretical_loss": 3.3523564862533677, "tokens_seen": 2963537920 }, { "epoch": 0.8, "learning_rate": 0.00010294471636042687, "loss": 0.0625, "theoretical_loss": 3.35234515539158, "tokens_seen": 2963668992 }, { "epoch": 0.8, "learning_rate": 0.00010290459760892242, "loss": 0.0613, "theoretical_loss": 3.3523338251712103, "tokens_seen": 2963800064 }, { "epoch": 0.8, "objective/train/advantage_avg": -0.0002942625433206558, "objective/train/docs_used": 1077722, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2416437864303589, "objective/train/original_loss": 1.2416436672210693, "objective/train/theoretical_loss": 3.352322495592193, "objective/train/tokens_used": 1334455776, "objective/train/value_avg": -0.007526397705078125, "objective/train/value_loss": 0.00015823439753148705, "objective/train/value_max": -5.692243576049805e-05, "objective/train/value_min": -0.449462890625, "objective/train/value_reward_corr": 0.7936034262500276, "objective/train/value_std": 0.0145263671875, "objective/train/weight_avg": 0.9997801780700684, "objective/train/weighted_lm_loss": 1.240529179573059, "objective/train/weights_max": 1.139668345451355, "objective/train/weights_min": 0.39198824763298035, "theoretical_loss": 3.352322495592193, "tokens_seen": 2963931136 }, { "epoch": 0.8, "learning_rate": 0.00010286447885741795, "loss": 0.0618, "theoretical_loss": 3.352322495592193, "tokens_seen": 2963931136 }, { "epoch": 0.8, "learning_rate": 0.0001028243601059135, "loss": 0.0674, "theoretical_loss": 3.3523111666544643, "tokens_seen": 2964062208 }, { "epoch": 0.8, "learning_rate": 0.00010278424135440906, "loss": 0.0656, "theoretical_loss": 3.3522998383579594, "tokens_seen": 2964193280 }, { "epoch": 0.8, "learning_rate": 0.0001027441226029046, "loss": 0.0663, "theoretical_loss": 3.352288510702613, "tokens_seen": 2964324352 }, { "epoch": 0.8, "learning_rate": 0.00010270400385140015, "loss": 0.0631, "theoretical_loss": 3.352277183688361, "tokens_seen": 2964455424 }, { "epoch": 0.8, "learning_rate": 0.00010266388509989568, "loss": 0.0638, "theoretical_loss": 3.3522658573151385, "tokens_seen": 2964586496 }, { "epoch": 0.8, "learning_rate": 0.00010262376634839124, "loss": 0.0663, "theoretical_loss": 3.352254531582881, "tokens_seen": 2964717568 }, { "epoch": 0.8, "learning_rate": 0.00010258364759688679, "loss": 0.0639, "theoretical_loss": 3.3522432064915244, "tokens_seen": 2964848640 }, { "epoch": 0.8, "learning_rate": 0.00010254352884538233, "loss": 0.0642, "theoretical_loss": 3.352231882041003, "tokens_seen": 2964979712 }, { "epoch": 0.8, "learning_rate": 0.00010250341009387789, "loss": 0.0647, "theoretical_loss": 3.3522205582312536, "tokens_seen": 2965110784 }, { "epoch": 0.8, "learning_rate": 0.00010246329134237341, "loss": 0.0655, "theoretical_loss": 3.3522092350622104, "tokens_seen": 2965241856 }, { "epoch": 0.8, "learning_rate": 0.00010242317259086897, "loss": 0.0638, "theoretical_loss": 3.3521979125338097, "tokens_seen": 2965372928 }, { "epoch": 0.8, "learning_rate": 0.00010238305383936452, "loss": 0.0647, "theoretical_loss": 3.352186590645986, "tokens_seen": 2965504000 }, { "epoch": 0.8, "learning_rate": 0.00010234293508786007, "loss": 0.0619, "theoretical_loss": 3.3521752693986757, "tokens_seen": 2965635072 }, { "epoch": 0.8, "learning_rate": 0.00010230281633635562, "loss": 0.0617, "theoretical_loss": 3.3521639487918136, "tokens_seen": 2965766144 }, { "epoch": 0.8, "learning_rate": 0.00010226269758485116, "loss": 0.0631, "theoretical_loss": 3.3521526288253356, "tokens_seen": 2965897216 }, { "epoch": 0.8, "learning_rate": 0.0001022225788333467, "loss": 0.0636, "theoretical_loss": 3.352141309499177, "tokens_seen": 2966028288 }, { "epoch": 0.8, "learning_rate": 0.00010218246008184226, "loss": 0.0614, "theoretical_loss": 3.352129990813273, "tokens_seen": 2966159360 }, { "epoch": 0.8, "learning_rate": 0.0001021423413303378, "loss": 0.0688, "theoretical_loss": 3.35211867276756, "tokens_seen": 2966290432 }, { "epoch": 0.8, "learning_rate": 0.00010210222257883335, "loss": 0.0637, "theoretical_loss": 3.3521073553619725, "tokens_seen": 2966421504 }, { "epoch": 0.8, "learning_rate": 0.0001020621038273289, "loss": 0.0625, "theoretical_loss": 3.352096038596446, "tokens_seen": 2966552576 }, { "epoch": 0.8, "learning_rate": 0.00010202198507582444, "loss": 0.0635, "theoretical_loss": 3.352084722470917, "tokens_seen": 2966683648 }, { "epoch": 0.8, "learning_rate": 0.00010198186632431999, "loss": 0.0651, "theoretical_loss": 3.35207340698532, "tokens_seen": 2966814720 }, { "epoch": 0.8, "learning_rate": 0.00010194174757281553, "loss": 0.0662, "theoretical_loss": 3.352062092139591, "tokens_seen": 2966945792 }, { "epoch": 0.8, "learning_rate": 0.00010190162882131109, "loss": 0.0628, "theoretical_loss": 3.352050777933666, "tokens_seen": 2967076864 }, { "epoch": 0.8, "objective/train/advantage_avg": -1.2291126950003672e-05, "objective/train/docs_used": 1078546, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3113611936569214, "objective/train/original_loss": 1.3113611936569214, "objective/train/theoretical_loss": 3.35203946436748, "objective/train/tokens_used": 1337732576, "objective/train/value_avg": -0.00872039794921875, "objective/train/value_loss": 0.00021087206550873816, "objective/train/value_max": -2.6881694793701172e-05, "objective/train/value_min": -0.335693359375, "objective/train/value_reward_corr": 0.6867047895110352, "objective/train/value_std": 0.0140380859375, "objective/train/weight_avg": 1.0000865459442139, "objective/train/weighted_lm_loss": 1.311224102973938, "objective/train/weights_max": 1.129938006401062, "objective/train/weights_min": 0.38186925649642944, "theoretical_loss": 3.35203946436748, "tokens_seen": 2967207936 }, { "epoch": 0.8, "learning_rate": 0.00010186151006980663, "loss": 0.0616, "theoretical_loss": 3.35203946436748, "tokens_seen": 2967207936 }, { "epoch": 0.8, "learning_rate": 0.00010182139131830217, "loss": 0.0658, "theoretical_loss": 3.352028151440969, "tokens_seen": 2967339008 }, { "epoch": 0.8, "learning_rate": 0.00010178127256679773, "loss": 0.064, "theoretical_loss": 3.3520168391540675, "tokens_seen": 2967470080 }, { "epoch": 0.8, "learning_rate": 0.00010174115381529327, "loss": 0.0665, "theoretical_loss": 3.352005527506712, "tokens_seen": 2967601152 }, { "epoch": 0.8, "learning_rate": 0.00010170103506378882, "loss": 0.0626, "theoretical_loss": 3.3519942164988383, "tokens_seen": 2967732224 }, { "epoch": 0.8, "learning_rate": 0.00010166091631228436, "loss": 0.0631, "theoretical_loss": 3.3519829061303814, "tokens_seen": 2967863296 }, { "epoch": 0.8, "learning_rate": 0.0001016207975607799, "loss": 0.0624, "theoretical_loss": 3.3519715964012775, "tokens_seen": 2967994368 }, { "epoch": 0.8, "learning_rate": 0.00010158067880927546, "loss": 0.0648, "theoretical_loss": 3.3519602873114613, "tokens_seen": 2968125440 }, { "epoch": 0.8, "learning_rate": 0.000101540560057771, "loss": 0.0625, "theoretical_loss": 3.3519489788608694, "tokens_seen": 2968256512 }, { "epoch": 0.8, "learning_rate": 0.00010150044130626655, "loss": 0.0596, "theoretical_loss": 3.351937671049437, "tokens_seen": 2968387584 }, { "epoch": 0.8, "learning_rate": 0.0001014603225547621, "loss": 0.064, "theoretical_loss": 3.3519263638770997, "tokens_seen": 2968518656 }, { "epoch": 0.8, "learning_rate": 0.00010142020380325764, "loss": 0.0618, "theoretical_loss": 3.3519150573437937, "tokens_seen": 2968649728 }, { "epoch": 0.8, "learning_rate": 0.00010138008505175319, "loss": 0.0606, "theoretical_loss": 3.351903751449454, "tokens_seen": 2968780800 }, { "epoch": 0.8, "learning_rate": 0.00010133996630024873, "loss": 0.0631, "theoretical_loss": 3.3518924461940167, "tokens_seen": 2968911872 }, { "epoch": 0.8, "learning_rate": 0.00010129984754874429, "loss": 0.0619, "theoretical_loss": 3.351881141577417, "tokens_seen": 2969042944 }, { "epoch": 0.8, "learning_rate": 0.00010125972879723983, "loss": 0.0642, "theoretical_loss": 3.3518698375995912, "tokens_seen": 2969174016 }, { "epoch": 0.8, "learning_rate": 0.00010121961004573537, "loss": 0.065, "theoretical_loss": 3.3518585342604745, "tokens_seen": 2969305088 }, { "epoch": 0.8, "learning_rate": 0.00010117949129423093, "loss": 0.0616, "theoretical_loss": 3.3518472315600034, "tokens_seen": 2969436160 }, { "epoch": 0.8, "learning_rate": 0.00010113937254272647, "loss": 0.0593, "theoretical_loss": 3.3518359294981126, "tokens_seen": 2969567232 }, { "epoch": 0.8, "learning_rate": 0.00010109925379122202, "loss": 0.0629, "theoretical_loss": 3.3518246280747386, "tokens_seen": 2969698304 }, { "epoch": 0.8, "learning_rate": 0.00010105913503971758, "loss": 0.0603, "theoretical_loss": 3.3518133272898165, "tokens_seen": 2969829376 }, { "epoch": 0.8, "learning_rate": 0.00010101901628821312, "loss": 0.064, "theoretical_loss": 3.351802027143283, "tokens_seen": 2969960448 }, { "epoch": 0.8, "learning_rate": 0.00010097889753670866, "loss": 0.0618, "theoretical_loss": 3.3517907276350725, "tokens_seen": 2970091520 }, { "epoch": 0.8, "learning_rate": 0.0001009387787852042, "loss": 0.0655, "theoretical_loss": 3.351779428765122, "tokens_seen": 2970222592 }, { "epoch": 0.8, "learning_rate": 0.00010089866003369976, "loss": 0.0638, "theoretical_loss": 3.351768130533367, "tokens_seen": 2970353664 }, { "epoch": 0.8, "objective/train/advantage_avg": -2.1982141333865002e-05, "objective/train/docs_used": 1080058, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3492704629898071, "objective/train/original_loss": 1.3492704629898071, "objective/train/theoretical_loss": 3.3517568329397434, "objective/train/tokens_used": 1341009376, "objective/train/value_avg": -0.008514404296875, "objective/train/value_loss": 0.00020766546367667615, "objective/train/value_max": -4.684925079345703e-05, "objective/train/value_min": -0.873046875, "objective/train/value_reward_corr": 0.81526487906684, "objective/train/value_std": 0.0203857421875, "objective/train/weight_avg": 1.0000710487365723, "objective/train/weighted_lm_loss": 1.3492146730422974, "objective/train/weights_max": 1.3413362503051758, "objective/train/weights_min": 0.3714556097984314, "theoretical_loss": 3.3517568329397434, "tokens_seen": 2970484736 }, { "epoch": 0.8, "learning_rate": 0.00010085854128219531, "loss": 0.0658, "theoretical_loss": 3.3517568329397434, "tokens_seen": 2970484736 }, { "epoch": 0.8, "learning_rate": 0.00010081842253069085, "loss": 0.0621, "theoretical_loss": 3.351745535984186, "tokens_seen": 2970615808 }, { "epoch": 0.8, "learning_rate": 0.00010077830377918639, "loss": 0.0619, "theoretical_loss": 3.3517342396666323, "tokens_seen": 2970746880 }, { "epoch": 0.8, "learning_rate": 0.00010073818502768193, "loss": 0.0658, "theoretical_loss": 3.3517229439870166, "tokens_seen": 2970877952 }, { "epoch": 0.8, "learning_rate": 0.00010069806627617749, "loss": 0.0625, "theoretical_loss": 3.351711648945275, "tokens_seen": 2971009024 }, { "epoch": 0.8, "learning_rate": 0.00010065794752467304, "loss": 0.0674, "theoretical_loss": 3.3517003545413444, "tokens_seen": 2971140096 }, { "epoch": 0.8, "learning_rate": 0.00010061782877316859, "loss": 0.0667, "theoretical_loss": 3.3516890607751595, "tokens_seen": 2971271168 }, { "epoch": 0.8, "learning_rate": 0.00010057771002166413, "loss": 0.0685, "theoretical_loss": 3.351677767646657, "tokens_seen": 2971402240 }, { "epoch": 0.8, "learning_rate": 0.00010053759127015967, "loss": 0.0625, "theoretical_loss": 3.351666475155772, "tokens_seen": 2971533312 }, { "epoch": 0.8, "learning_rate": 0.00010049747251865522, "loss": 0.0622, "theoretical_loss": 3.351655183302441, "tokens_seen": 2971664384 }, { "epoch": 0.8, "learning_rate": 0.00010045735376715078, "loss": 0.0695, "theoretical_loss": 3.351643892086599, "tokens_seen": 2971795456 }, { "epoch": 0.8, "learning_rate": 0.00010041723501564632, "loss": 0.0581, "theoretical_loss": 3.3516326015081828, "tokens_seen": 2971926528 }, { "epoch": 0.8, "learning_rate": 0.00010037711626414186, "loss": 0.0645, "theoretical_loss": 3.3516213115671283, "tokens_seen": 2972057600 }, { "epoch": 0.8, "learning_rate": 0.0001003369975126374, "loss": 0.0645, "theoretical_loss": 3.3516100222633707, "tokens_seen": 2972188672 }, { "epoch": 0.8, "learning_rate": 0.00010029687876113296, "loss": 0.0617, "theoretical_loss": 3.3515987335968465, "tokens_seen": 2972319744 }, { "epoch": 0.8, "learning_rate": 0.00010025676000962851, "loss": 0.0602, "theoretical_loss": 3.351587445567491, "tokens_seen": 2972450816 }, { "epoch": 0.8, "learning_rate": 0.00010021664125812405, "loss": 0.0632, "theoretical_loss": 3.3515761581752415, "tokens_seen": 2972581888 }, { "epoch": 0.8, "learning_rate": 0.0001001765225066196, "loss": 0.0664, "theoretical_loss": 3.351564871420033, "tokens_seen": 2972712960 }, { "epoch": 0.8, "learning_rate": 0.00010013640375511513, "loss": 0.0601, "theoretical_loss": 3.351553585301801, "tokens_seen": 2972844032 }, { "epoch": 0.8, "learning_rate": 0.00010009628500361069, "loss": 0.0632, "theoretical_loss": 3.351542299820482, "tokens_seen": 2972975104 }, { "epoch": 0.8, "learning_rate": 0.00010005616625210624, "loss": 0.0624, "theoretical_loss": 3.351531014976012, "tokens_seen": 2973106176 }, { "epoch": 0.8, "learning_rate": 0.00010001604750060179, "loss": 0.0657, "theoretical_loss": 3.3515197307683273, "tokens_seen": 2973237248 }, { "epoch": 0.8, "learning_rate": 9.997592874909733e-05, "loss": 0.0606, "theoretical_loss": 3.3515084471973635, "tokens_seen": 2973368320 }, { "epoch": 0.8, "learning_rate": 9.993580999759287e-05, "loss": 0.0643, "theoretical_loss": 3.3514971642630567, "tokens_seen": 2973499392 }, { "epoch": 0.8, "learning_rate": 9.989569124608842e-05, "loss": 0.0652, "theoretical_loss": 3.3514858819653424, "tokens_seen": 2973630464 }, { "epoch": 0.8, "objective/train/advantage_avg": 0.0002827980206348002, "objective/train/docs_used": 1081215, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3985306024551392, "objective/train/original_loss": 1.3985306024551392, "objective/train/theoretical_loss": 3.3514746003041576, "objective/train/tokens_used": 1344286176, "objective/train/value_avg": -0.006778717041015625, "objective/train/value_loss": 0.00013371894601732492, "objective/train/value_max": -2.8192996978759766e-05, "objective/train/value_min": -0.239013671875, "objective/train/value_reward_corr": 0.8456003646695109, "objective/train/value_std": 0.0172576904296875, "objective/train/weight_avg": 1.0003451108932495, "objective/train/weighted_lm_loss": 1.3987102508544922, "objective/train/weights_max": 1.1895605325698853, "objective/train/weights_min": 0.37335923314094543, "theoretical_loss": 3.3514746003041576, "tokens_seen": 2973761536 }, { "epoch": 0.8, "learning_rate": 9.985557249458398e-05, "loss": 0.0653, "theoretical_loss": 3.3514746003041576, "tokens_seen": 2973761536 }, { "epoch": 0.8, "learning_rate": 9.981545374307952e-05, "loss": 0.0643, "theoretical_loss": 3.3514633192794374, "tokens_seen": 2973892608 }, { "epoch": 0.8, "learning_rate": 9.977533499157507e-05, "loss": 0.0635, "theoretical_loss": 3.351452038891119, "tokens_seen": 2974023680 }, { "epoch": 0.8, "learning_rate": 9.97352162400706e-05, "loss": 0.0634, "theoretical_loss": 3.351440759139137, "tokens_seen": 2974154752 }, { "epoch": 0.8, "learning_rate": 9.969509748856616e-05, "loss": 0.0655, "theoretical_loss": 3.3514294800234286, "tokens_seen": 2974285824 }, { "epoch": 0.8, "learning_rate": 9.965497873706171e-05, "loss": 0.0629, "theoretical_loss": 3.3514182015439293, "tokens_seen": 2974416896 }, { "epoch": 0.8, "learning_rate": 9.961485998555725e-05, "loss": 0.0637, "theoretical_loss": 3.3514069237005755, "tokens_seen": 2974547968 }, { "epoch": 0.8, "learning_rate": 9.957474123405281e-05, "loss": 0.0682, "theoretical_loss": 3.3513956464933035, "tokens_seen": 2974679040 }, { "epoch": 0.8, "learning_rate": 9.953462248254834e-05, "loss": 0.0685, "theoretical_loss": 3.3513843699220485, "tokens_seen": 2974810112 }, { "epoch": 0.8, "learning_rate": 9.949450373104389e-05, "loss": 0.0662, "theoretical_loss": 3.3513730939867474, "tokens_seen": 2974941184 }, { "epoch": 0.8, "learning_rate": 9.945438497953944e-05, "loss": 0.0644, "theoretical_loss": 3.351361818687336, "tokens_seen": 2975072256 }, { "epoch": 0.8, "learning_rate": 9.941426622803499e-05, "loss": 0.0634, "theoretical_loss": 3.3513505440237505, "tokens_seen": 2975203328 }, { "epoch": 0.8, "learning_rate": 9.937414747653054e-05, "loss": 0.0607, "theoretical_loss": 3.3513392699959272, "tokens_seen": 2975334400 }, { "epoch": 0.8, "learning_rate": 9.933402872502607e-05, "loss": 0.0679, "theoretical_loss": 3.3513279966038017, "tokens_seen": 2975465472 }, { "epoch": 0.8, "learning_rate": 9.929390997352162e-05, "loss": 0.0631, "theoretical_loss": 3.3513167238473107, "tokens_seen": 2975596544 }, { "epoch": 0.8, "learning_rate": 9.925379122201718e-05, "loss": 0.0663, "theoretical_loss": 3.3513054517263905, "tokens_seen": 2975727616 }, { "epoch": 0.8, "learning_rate": 9.921367247051272e-05, "loss": 0.0623, "theoretical_loss": 3.3512941802409766, "tokens_seen": 2975858688 }, { "epoch": 0.8, "learning_rate": 9.917355371900827e-05, "loss": 0.0667, "theoretical_loss": 3.351282909391006, "tokens_seen": 2975989760 }, { "epoch": 0.8, "learning_rate": 9.91334349675038e-05, "loss": 0.0675, "theoretical_loss": 3.3512716391764137, "tokens_seen": 2976120832 }, { "epoch": 0.8, "learning_rate": 9.909331621599936e-05, "loss": 0.0611, "theoretical_loss": 3.3512603695971372, "tokens_seen": 2976251904 }, { "epoch": 0.8, "learning_rate": 9.905319746449491e-05, "loss": 0.0648, "theoretical_loss": 3.351249100653112, "tokens_seen": 2976382976 }, { "epoch": 0.8, "learning_rate": 9.901307871299045e-05, "loss": 0.0664, "theoretical_loss": 3.3512378323442746, "tokens_seen": 2976514048 }, { "epoch": 0.8, "learning_rate": 9.897295996148601e-05, "loss": 0.0627, "theoretical_loss": 3.3512265646705615, "tokens_seen": 2976645120 }, { "epoch": 0.8, "learning_rate": 9.893284120998154e-05, "loss": 0.0633, "theoretical_loss": 3.351215297631908, "tokens_seen": 2976776192 }, { "epoch": 0.8, "learning_rate": 9.889272245847709e-05, "loss": 0.0606, "theoretical_loss": 3.3512040312282507, "tokens_seen": 2976907264 }, { "epoch": 0.8, "objective/train/advantage_avg": 0.0001903286320157349, "objective/train/docs_used": 1082519, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3712365627288818, "objective/train/original_loss": 1.3712363243103027, "objective/train/theoretical_loss": 3.3511927654595266, "objective/train/tokens_used": 1347562976, "objective/train/value_avg": -0.008636474609375, "objective/train/value_loss": 0.0005604121834039688, "objective/train/value_max": -3.5643577575683594e-05, "objective/train/value_min": -0.9541015625, "objective/train/value_reward_corr": 0.6208301250481264, "objective/train/value_std": 0.0225830078125, "objective/train/weight_avg": 1.0004498958587646, "objective/train/weighted_lm_loss": 1.371800422668457, "objective/train/weights_max": 2.272244930267334, "objective/train/weights_min": 0.3694460988044739, "theoretical_loss": 3.3511927654595266, "tokens_seen": 2977038336 }, { "epoch": 0.8, "learning_rate": 9.885260370697265e-05, "loss": 0.0643, "theoretical_loss": 3.3511927654595266, "tokens_seen": 2977038336 }, { "epoch": 0.8, "learning_rate": 9.881248495546819e-05, "loss": 0.0641, "theoretical_loss": 3.351181500325671, "tokens_seen": 2977169408 }, { "epoch": 0.8, "learning_rate": 9.877236620396374e-05, "loss": 0.0676, "theoretical_loss": 3.3511702358266207, "tokens_seen": 2977300480 }, { "epoch": 0.8, "learning_rate": 9.873224745245927e-05, "loss": 0.0652, "theoretical_loss": 3.351158971962312, "tokens_seen": 2977431552 }, { "epoch": 0.8, "learning_rate": 9.869212870095482e-05, "loss": 0.0654, "theoretical_loss": 3.351147708732681, "tokens_seen": 2977562624 }, { "epoch": 0.8, "learning_rate": 9.865200994945038e-05, "loss": 0.0655, "theoretical_loss": 3.351136446137664, "tokens_seen": 2977693696 }, { "epoch": 0.8, "learning_rate": 9.861189119794592e-05, "loss": 0.066, "theoretical_loss": 3.3511251841771976, "tokens_seen": 2977824768 }, { "epoch": 0.8, "learning_rate": 9.857177244644148e-05, "loss": 0.0627, "theoretical_loss": 3.3511139228512175, "tokens_seen": 2977955840 }, { "epoch": 0.8, "learning_rate": 9.853165369493702e-05, "loss": 0.0634, "theoretical_loss": 3.351102662159661, "tokens_seen": 2978086912 }, { "epoch": 0.8, "learning_rate": 9.849153494343256e-05, "loss": 0.0618, "theoretical_loss": 3.3510914021024636, "tokens_seen": 2978217984 }, { "epoch": 0.81, "learning_rate": 9.845141619192811e-05, "loss": 0.0648, "theoretical_loss": 3.3510801426795616, "tokens_seen": 2978349056 }, { "epoch": 0.81, "learning_rate": 9.841129744042365e-05, "loss": 0.067, "theoretical_loss": 3.3510688838908917, "tokens_seen": 2978480128 }, { "epoch": 0.81, "learning_rate": 9.837117868891921e-05, "loss": 0.0637, "theoretical_loss": 3.3510576257363907, "tokens_seen": 2978611200 }, { "epoch": 0.81, "learning_rate": 9.833105993741475e-05, "loss": 0.0619, "theoretical_loss": 3.3510463682159943, "tokens_seen": 2978742272 }, { "epoch": 0.81, "learning_rate": 9.829094118591029e-05, "loss": 0.0648, "theoretical_loss": 3.3510351113296393, "tokens_seen": 2978873344 }, { "epoch": 0.81, "learning_rate": 9.825082243440585e-05, "loss": 0.0649, "theoretical_loss": 3.3510238550772615, "tokens_seen": 2979004416 }, { "epoch": 0.81, "learning_rate": 9.821070368290139e-05, "loss": 0.0646, "theoretical_loss": 3.351012599458798, "tokens_seen": 2979135488 }, { "epoch": 0.81, "learning_rate": 9.817058493139694e-05, "loss": 0.064, "theoretical_loss": 3.3510013444741844, "tokens_seen": 2979266560 }, { "epoch": 0.81, "learning_rate": 9.813046617989248e-05, "loss": 0.067, "theoretical_loss": 3.350990090123358, "tokens_seen": 2979397632 }, { "epoch": 0.81, "learning_rate": 9.809034742838802e-05, "loss": 0.0654, "theoretical_loss": 3.350978836406255, "tokens_seen": 2979528704 }, { "epoch": 0.81, "learning_rate": 9.805022867688358e-05, "loss": 0.0701, "theoretical_loss": 3.3509675833228116, "tokens_seen": 2979659776 }, { "epoch": 0.81, "learning_rate": 9.801010992537912e-05, "loss": 0.0663, "theoretical_loss": 3.3509563308729646, "tokens_seen": 2979790848 }, { "epoch": 0.81, "learning_rate": 9.796999117387468e-05, "loss": 0.0667, "theoretical_loss": 3.3509450790566495, "tokens_seen": 2979921920 }, { "epoch": 0.81, "learning_rate": 9.792987242237022e-05, "loss": 0.0616, "theoretical_loss": 3.350933827873804, "tokens_seen": 2980052992 }, { "epoch": 0.81, "learning_rate": 9.788975367086576e-05, "loss": 0.0595, "theoretical_loss": 3.350922577324364, "tokens_seen": 2980184064 }, { "epoch": 0.81, "objective/train/advantage_avg": 0.0008086652960628271, "objective/train/docs_used": 1083727, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3861056566238403, "objective/train/original_loss": 1.3861057758331299, "objective/train/theoretical_loss": 3.3509113274082662, "objective/train/tokens_used": 1350839776, "objective/train/value_avg": -0.00913238525390625, "objective/train/value_loss": 0.00026631756918504834, "objective/train/value_max": -3.349781036376953e-05, "objective/train/value_min": -0.580078125, "objective/train/value_reward_corr": 0.686456631569798, "objective/train/value_std": 0.0177459716796875, "objective/train/weight_avg": 1.0009338855743408, "objective/train/weighted_lm_loss": 1.3878506422042847, "objective/train/weights_max": 1.507053256034851, "objective/train/weights_min": 0.3768506348133087, "theoretical_loss": 3.3509113274082662, "tokens_seen": 2980315136 }, { "epoch": 0.81, "learning_rate": 9.784963491936131e-05, "loss": 0.0693, "theoretical_loss": 3.3509113274082662, "tokens_seen": 2980315136 }, { "epoch": 0.81, "learning_rate": 9.780951616785685e-05, "loss": 0.0648, "theoretical_loss": 3.3509000781254468, "tokens_seen": 2980446208 }, { "epoch": 0.81, "learning_rate": 9.776939741635241e-05, "loss": 0.0671, "theoretical_loss": 3.350888829475842, "tokens_seen": 2980577280 }, { "epoch": 0.81, "learning_rate": 9.772927866484795e-05, "loss": 0.0661, "theoretical_loss": 3.3508775814593896, "tokens_seen": 2980708352 }, { "epoch": 0.81, "learning_rate": 9.768915991334349e-05, "loss": 0.0647, "theoretical_loss": 3.3508663340760245, "tokens_seen": 2980839424 }, { "epoch": 0.81, "learning_rate": 9.764904116183905e-05, "loss": 0.0658, "theoretical_loss": 3.3508550873256846, "tokens_seen": 2980970496 }, { "epoch": 0.81, "learning_rate": 9.760892241033459e-05, "loss": 0.064, "theoretical_loss": 3.3508438412083055, "tokens_seen": 2981101568 }, { "epoch": 0.81, "learning_rate": 9.756880365883014e-05, "loss": 0.0639, "theoretical_loss": 3.350832595723824, "tokens_seen": 2981232640 }, { "epoch": 0.81, "learning_rate": 9.752868490732568e-05, "loss": 0.065, "theoretical_loss": 3.350821350872177, "tokens_seen": 2981363712 }, { "epoch": 0.81, "learning_rate": 9.748856615582123e-05, "loss": 0.0652, "theoretical_loss": 3.3508101066533005, "tokens_seen": 2981494784 }, { "epoch": 0.81, "learning_rate": 9.744844740431678e-05, "loss": 0.0664, "theoretical_loss": 3.3507988630671317, "tokens_seen": 2981625856 }, { "epoch": 0.81, "learning_rate": 9.740832865281232e-05, "loss": 0.0649, "theoretical_loss": 3.350787620113607, "tokens_seen": 2981756928 }, { "epoch": 0.81, "learning_rate": 9.736820990130788e-05, "loss": 0.0634, "theoretical_loss": 3.3507763777926627, "tokens_seen": 2981888000 }, { "epoch": 0.81, "learning_rate": 9.732809114980342e-05, "loss": 0.0665, "theoretical_loss": 3.3507651361042354, "tokens_seen": 2982019072 }, { "epoch": 0.81, "learning_rate": 9.728797239829897e-05, "loss": 0.0662, "theoretical_loss": 3.3507538950482623, "tokens_seen": 2982150144 }, { "epoch": 0.81, "learning_rate": 9.724785364679451e-05, "loss": 0.0662, "theoretical_loss": 3.3507426546246792, "tokens_seen": 2982281216 }, { "epoch": 0.81, "learning_rate": 9.720773489529005e-05, "loss": 0.0638, "theoretical_loss": 3.3507314148334233, "tokens_seen": 2982412288 }, { "epoch": 0.81, "learning_rate": 9.716761614378561e-05, "loss": 0.0654, "theoretical_loss": 3.3507201756744314, "tokens_seen": 2982543360 }, { "epoch": 0.81, "learning_rate": 9.712749739228116e-05, "loss": 0.063, "theoretical_loss": 3.3507089371476395, "tokens_seen": 2982674432 }, { "epoch": 0.81, "learning_rate": 9.70873786407767e-05, "loss": 0.0644, "theoretical_loss": 3.3506976992529847, "tokens_seen": 2982805504 }, { "epoch": 0.81, "learning_rate": 9.704725988927225e-05, "loss": 0.0663, "theoretical_loss": 3.3506864619904038, "tokens_seen": 2982936576 }, { "epoch": 0.81, "learning_rate": 9.700714113776779e-05, "loss": 0.0677, "theoretical_loss": 3.350675225359833, "tokens_seen": 2983067648 }, { "epoch": 0.81, "learning_rate": 9.696702238626334e-05, "loss": 0.0613, "theoretical_loss": 3.3506639893612093, "tokens_seen": 2983198720 }, { "epoch": 0.81, "learning_rate": 9.69269036347589e-05, "loss": 0.0617, "theoretical_loss": 3.3506527539944693, "tokens_seen": 2983329792 }, { "epoch": 0.81, "learning_rate": 9.688678488325444e-05, "loss": 0.0658, "theoretical_loss": 3.3506415192595496, "tokens_seen": 2983460864 }, { "epoch": 0.81, "objective/train/advantage_avg": 0.0008193785906769335, "objective/train/docs_used": 1084846, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.207245111465454, "objective/train/original_loss": 1.207244873046875, "objective/train/theoretical_loss": 3.350630285156387, "objective/train/tokens_used": 1354116576, "objective/train/value_avg": -0.01232147216796875, "objective/train/value_loss": 0.0005648218211717904, "objective/train/value_max": -4.297494888305664e-05, "objective/train/value_min": -0.70849609375, "objective/train/value_reward_corr": 0.7514797669483622, "objective/train/value_std": 0.0288543701171875, "objective/train/weight_avg": 1.001079797744751, "objective/train/weighted_lm_loss": 1.207803726196289, "objective/train/weights_max": 1.7537949085235596, "objective/train/weights_min": 0.38588747382164, "theoretical_loss": 3.350630285156387, "tokens_seen": 2983591936 }, { "epoch": 0.81, "learning_rate": 9.684666613174998e-05, "loss": 0.0594, "theoretical_loss": 3.350630285156387, "tokens_seen": 2983591936 }, { "epoch": 0.81, "learning_rate": 9.680654738024552e-05, "loss": 0.0628, "theoretical_loss": 3.350619051684919, "tokens_seen": 2983723008 }, { "epoch": 0.81, "learning_rate": 9.676642862874108e-05, "loss": 0.0642, "theoretical_loss": 3.3506078188450807, "tokens_seen": 2983854080 }, { "epoch": 0.81, "learning_rate": 9.672630987723663e-05, "loss": 0.0585, "theoretical_loss": 3.3505965866368106, "tokens_seen": 2983985152 }, { "epoch": 0.81, "learning_rate": 9.668619112573217e-05, "loss": 0.0626, "theoretical_loss": 3.350585355060044, "tokens_seen": 2984116224 }, { "epoch": 0.81, "learning_rate": 9.664607237422771e-05, "loss": 0.0625, "theoretical_loss": 3.3505741241147184, "tokens_seen": 2984247296 }, { "epoch": 0.81, "learning_rate": 9.660595362272326e-05, "loss": 0.0597, "theoretical_loss": 3.3505628938007703, "tokens_seen": 2984378368 }, { "epoch": 0.81, "learning_rate": 9.656583487121881e-05, "loss": 0.0647, "theoretical_loss": 3.350551664118137, "tokens_seen": 2984509440 }, { "epoch": 0.81, "learning_rate": 9.652571611971437e-05, "loss": 0.0637, "theoretical_loss": 3.3505404350667547, "tokens_seen": 2984640512 }, { "epoch": 0.81, "learning_rate": 9.64855973682099e-05, "loss": 0.066, "theoretical_loss": 3.35052920664656, "tokens_seen": 2984771584 }, { "epoch": 0.81, "learning_rate": 9.644547861670545e-05, "loss": 0.0613, "theoretical_loss": 3.350517978857491, "tokens_seen": 2984902656 }, { "epoch": 0.81, "learning_rate": 9.640535986520099e-05, "loss": 0.0678, "theoretical_loss": 3.350506751699483, "tokens_seen": 2985033728 }, { "epoch": 0.81, "learning_rate": 9.636524111369654e-05, "loss": 0.0625, "theoretical_loss": 3.3504955251724735, "tokens_seen": 2985164800 }, { "epoch": 0.81, "learning_rate": 9.63251223621921e-05, "loss": 0.0635, "theoretical_loss": 3.3504842992763995, "tokens_seen": 2985295872 }, { "epoch": 0.81, "learning_rate": 9.628500361068764e-05, "loss": 0.0647, "theoretical_loss": 3.350473074011197, "tokens_seen": 2985426944 }, { "epoch": 0.81, "learning_rate": 9.624488485918318e-05, "loss": 0.065, "theoretical_loss": 3.350461849376804, "tokens_seen": 2985558016 }, { "epoch": 0.81, "learning_rate": 9.620476610767872e-05, "loss": 0.0663, "theoretical_loss": 3.3504506253731567, "tokens_seen": 2985689088 }, { "epoch": 0.81, "learning_rate": 9.616464735617428e-05, "loss": 0.0648, "theoretical_loss": 3.350439402000192, "tokens_seen": 2985820160 }, { "epoch": 0.81, "learning_rate": 9.612452860466983e-05, "loss": 0.0642, "theoretical_loss": 3.350428179257847, "tokens_seen": 2985951232 }, { "epoch": 0.81, "learning_rate": 9.608440985316537e-05, "loss": 0.0618, "theoretical_loss": 3.3504169571460585, "tokens_seen": 2986082304 }, { "epoch": 0.81, "learning_rate": 9.604429110166093e-05, "loss": 0.0651, "theoretical_loss": 3.350405735664763, "tokens_seen": 2986213376 }, { "epoch": 0.81, "learning_rate": 9.600417235015646e-05, "loss": 0.0617, "theoretical_loss": 3.3503945148138983, "tokens_seen": 2986344448 }, { "epoch": 0.81, "learning_rate": 9.596405359865201e-05, "loss": 0.068, "theoretical_loss": 3.3503832945934002, "tokens_seen": 2986475520 }, { "epoch": 0.81, "learning_rate": 9.592393484714757e-05, "loss": 0.0621, "theoretical_loss": 3.350372075003207, "tokens_seen": 2986606592 }, { "epoch": 0.81, "learning_rate": 9.588381609564311e-05, "loss": 0.0605, "theoretical_loss": 3.3503608560432543, "tokens_seen": 2986737664 }, { "epoch": 0.81, "objective/train/advantage_avg": 6.757098890375346e-05, "objective/train/docs_used": 1086010, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3565868139266968, "objective/train/original_loss": 1.3565869331359863, "objective/train/theoretical_loss": 3.3503496377134794, "objective/train/tokens_used": 1357393376, "objective/train/value_avg": -0.00872039794921875, "objective/train/value_loss": 0.00036822157562710345, "objective/train/value_max": -1.4603137969970703e-05, "objective/train/value_min": -0.83837890625, "objective/train/value_reward_corr": 0.738295699357484, "objective/train/value_std": 0.020538330078125, "objective/train/weight_avg": 1.0002326965332031, "objective/train/weighted_lm_loss": 1.355155348777771, "objective/train/weights_max": 1.7405465841293335, "objective/train/weights_min": 0.3803688883781433, "theoretical_loss": 3.3503496377134794, "tokens_seen": 2986868736 }, { "epoch": 0.81, "learning_rate": 9.584369734413866e-05, "loss": 0.0603, "theoretical_loss": 3.3503496377134794, "tokens_seen": 2986868736 }, { "epoch": 0.81, "learning_rate": 9.580357859263419e-05, "loss": 0.064, "theoretical_loss": 3.3503384200138195, "tokens_seen": 2986999808 }, { "epoch": 0.81, "learning_rate": 9.576345984112974e-05, "loss": 0.064, "theoretical_loss": 3.3503272029442117, "tokens_seen": 2987130880 }, { "epoch": 0.81, "learning_rate": 9.57233410896253e-05, "loss": 0.067, "theoretical_loss": 3.350315986504593, "tokens_seen": 2987261952 }, { "epoch": 0.81, "learning_rate": 9.568322233812084e-05, "loss": 0.0628, "theoretical_loss": 3.3503047706948994, "tokens_seen": 2987393024 }, { "epoch": 0.81, "learning_rate": 9.56431035866164e-05, "loss": 0.0644, "theoretical_loss": 3.350293555515069, "tokens_seen": 2987524096 }, { "epoch": 0.81, "learning_rate": 9.560298483511192e-05, "loss": 0.0684, "theoretical_loss": 3.3502823409650384, "tokens_seen": 2987655168 }, { "epoch": 0.81, "learning_rate": 9.556286608360748e-05, "loss": 0.0618, "theoretical_loss": 3.3502711270447447, "tokens_seen": 2987786240 }, { "epoch": 0.81, "learning_rate": 9.552274733210303e-05, "loss": 0.0631, "theoretical_loss": 3.3502599137541247, "tokens_seen": 2987917312 }, { "epoch": 0.81, "learning_rate": 9.548262858059857e-05, "loss": 0.0622, "theoretical_loss": 3.3502487010931157, "tokens_seen": 2988048384 }, { "epoch": 0.81, "learning_rate": 9.544250982909413e-05, "loss": 0.0629, "theoretical_loss": 3.350237489061654, "tokens_seen": 2988179456 }, { "epoch": 0.81, "learning_rate": 9.540239107758966e-05, "loss": 0.0669, "theoretical_loss": 3.350226277659678, "tokens_seen": 2988310528 }, { "epoch": 0.81, "learning_rate": 9.536227232608521e-05, "loss": 0.0626, "theoretical_loss": 3.350215066887124, "tokens_seen": 2988441600 }, { "epoch": 0.81, "learning_rate": 9.532215357458077e-05, "loss": 0.0601, "theoretical_loss": 3.3502038567439287, "tokens_seen": 2988572672 }, { "epoch": 0.81, "learning_rate": 9.528203482307631e-05, "loss": 0.0659, "theoretical_loss": 3.3501926472300294, "tokens_seen": 2988703744 }, { "epoch": 0.81, "learning_rate": 9.524191607157186e-05, "loss": 0.0592, "theoretical_loss": 3.350181438345363, "tokens_seen": 2988834816 }, { "epoch": 0.81, "learning_rate": 9.520179732006739e-05, "loss": 0.0636, "theoretical_loss": 3.3501702300898675, "tokens_seen": 2988965888 }, { "epoch": 0.81, "learning_rate": 9.516167856856294e-05, "loss": 0.0652, "theoretical_loss": 3.3501590224634796, "tokens_seen": 2989096960 }, { "epoch": 0.81, "learning_rate": 9.51215598170585e-05, "loss": 0.0648, "theoretical_loss": 3.3501478154661353, "tokens_seen": 2989228032 }, { "epoch": 0.81, "learning_rate": 9.508144106555404e-05, "loss": 0.0618, "theoretical_loss": 3.350136609097773, "tokens_seen": 2989359104 }, { "epoch": 0.81, "learning_rate": 9.50413223140496e-05, "loss": 0.063, "theoretical_loss": 3.3501254033583296, "tokens_seen": 2989490176 }, { "epoch": 0.81, "learning_rate": 9.500120356254512e-05, "loss": 0.0641, "theoretical_loss": 3.350114198247742, "tokens_seen": 2989621248 }, { "epoch": 0.81, "learning_rate": 9.496108481104068e-05, "loss": 0.0637, "theoretical_loss": 3.350102993765947, "tokens_seen": 2989752320 }, { "epoch": 0.81, "learning_rate": 9.492096605953623e-05, "loss": 0.064, "theoretical_loss": 3.3500917899128826, "tokens_seen": 2989883392 }, { "epoch": 0.81, "learning_rate": 9.488084730803177e-05, "loss": 0.0619, "theoretical_loss": 3.3500805866884855, "tokens_seen": 2990014464 }, { "epoch": 0.81, "objective/train/advantage_avg": 0.00019861401233356446, "objective/train/docs_used": 1087207, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.303059697151184, "objective/train/original_loss": 1.303059697151184, "objective/train/theoretical_loss": 3.3500693840926923, "objective/train/tokens_used": 1360670176, "objective/train/value_avg": -0.005474090576171875, "objective/train/value_loss": 0.00015291482850443572, "objective/train/value_max": -2.3543834686279297e-05, "objective/train/value_min": -0.23193359375, "objective/train/value_reward_corr": 0.6028137130523471, "objective/train/value_std": 0.0099945068359375, "objective/train/weight_avg": 1.0002663135528564, "objective/train/weighted_lm_loss": 1.3029582500457764, "objective/train/weights_max": 1.1091493368148804, "objective/train/weights_min": 0.36878642439842224, "theoretical_loss": 3.3500693840926923, "tokens_seen": 2990145536 }, { "epoch": 0.81, "learning_rate": 9.484072855652733e-05, "loss": 0.0647, "theoretical_loss": 3.3500693840926923, "tokens_seen": 2990145536 }, { "epoch": 0.81, "learning_rate": 9.480060980502286e-05, "loss": 0.0605, "theoretical_loss": 3.350058182125441, "tokens_seen": 2990276608 }, { "epoch": 0.81, "learning_rate": 9.476049105351841e-05, "loss": 0.0642, "theoretical_loss": 3.3500469807866686, "tokens_seen": 2990407680 }, { "epoch": 0.81, "learning_rate": 9.472037230201397e-05, "loss": 0.065, "theoretical_loss": 3.3500357800763125, "tokens_seen": 2990538752 }, { "epoch": 0.81, "learning_rate": 9.468025355050951e-05, "loss": 0.0606, "theoretical_loss": 3.3500245799943094, "tokens_seen": 2990669824 }, { "epoch": 0.81, "learning_rate": 9.464013479900506e-05, "loss": 0.066, "theoretical_loss": 3.3500133805405965, "tokens_seen": 2990800896 }, { "epoch": 0.81, "learning_rate": 9.46000160475006e-05, "loss": 0.0634, "theoretical_loss": 3.3500021817151113, "tokens_seen": 2990931968 }, { "epoch": 0.81, "learning_rate": 9.455989729599615e-05, "loss": 0.0638, "theoretical_loss": 3.3499909835177912, "tokens_seen": 2991063040 }, { "epoch": 0.81, "learning_rate": 9.45197785444917e-05, "loss": 0.0651, "theoretical_loss": 3.349979785948573, "tokens_seen": 2991194112 }, { "epoch": 0.81, "learning_rate": 9.447965979298724e-05, "loss": 0.0629, "theoretical_loss": 3.3499685890073945, "tokens_seen": 2991325184 }, { "epoch": 0.81, "learning_rate": 9.44395410414828e-05, "loss": 0.0657, "theoretical_loss": 3.3499573926941926, "tokens_seen": 2991456256 }, { "epoch": 0.81, "learning_rate": 9.439942228997834e-05, "loss": 0.0611, "theoretical_loss": 3.3499461970089044, "tokens_seen": 2991587328 }, { "epoch": 0.81, "learning_rate": 9.435930353847388e-05, "loss": 0.0648, "theoretical_loss": 3.3499350019514678, "tokens_seen": 2991718400 }, { "epoch": 0.81, "learning_rate": 9.431918478696943e-05, "loss": 0.0652, "theoretical_loss": 3.349923807521819, "tokens_seen": 2991849472 }, { "epoch": 0.81, "learning_rate": 9.427906603546498e-05, "loss": 0.0635, "theoretical_loss": 3.349912613719896, "tokens_seen": 2991980544 }, { "epoch": 0.81, "learning_rate": 9.423894728396053e-05, "loss": 0.0649, "theoretical_loss": 3.3499014205456366, "tokens_seen": 2992111616 }, { "epoch": 0.81, "learning_rate": 9.419882853245607e-05, "loss": 0.0631, "theoretical_loss": 3.3498902279989773, "tokens_seen": 2992242688 }, { "epoch": 0.81, "learning_rate": 9.415870978095161e-05, "loss": 0.0632, "theoretical_loss": 3.349879036079856, "tokens_seen": 2992373760 }, { "epoch": 0.81, "learning_rate": 9.411859102944717e-05, "loss": 0.0601, "theoretical_loss": 3.3498678447882093, "tokens_seen": 2992504832 }, { "epoch": 0.81, "learning_rate": 9.407847227794271e-05, "loss": 0.0624, "theoretical_loss": 3.349856654123975, "tokens_seen": 2992635904 }, { "epoch": 0.81, "learning_rate": 9.403835352643826e-05, "loss": 0.0633, "theoretical_loss": 3.3498454640870907, "tokens_seen": 2992766976 }, { "epoch": 0.81, "learning_rate": 9.39982347749338e-05, "loss": 0.0616, "theoretical_loss": 3.349834274677493, "tokens_seen": 2992898048 }, { "epoch": 0.81, "learning_rate": 9.395811602342935e-05, "loss": 0.062, "theoretical_loss": 3.34982308589512, "tokens_seen": 2993029120 }, { "epoch": 0.81, "learning_rate": 9.39179972719249e-05, "loss": 0.0635, "theoretical_loss": 3.349811897739909, "tokens_seen": 2993160192 }, { "epoch": 0.81, "learning_rate": 9.387787852042044e-05, "loss": 0.0627, "theoretical_loss": 3.349800710211797, "tokens_seen": 2993291264 }, { "epoch": 0.81, "objective/train/advantage_avg": 0.00034333387156948447, "objective/train/docs_used": 1088408, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.280105471611023, "objective/train/original_loss": 1.2801052331924438, "objective/train/theoretical_loss": 3.3497895233107218, "objective/train/tokens_used": 1363946976, "objective/train/value_avg": -0.00884246826171875, "objective/train/value_loss": 0.0002553405938670039, "objective/train/value_max": -2.777576446533203e-05, "objective/train/value_min": -0.705078125, "objective/train/value_reward_corr": 0.7720263023066909, "objective/train/value_std": 0.019134521484375, "objective/train/weight_avg": 1.0004589557647705, "objective/train/weighted_lm_loss": 1.280160665512085, "objective/train/weights_max": 1.3852648735046387, "objective/train/weights_min": 0.36808687448501587, "theoretical_loss": 3.3497895233107218, "tokens_seen": 2993422336 }, { "epoch": 0.81, "learning_rate": 9.3837759768916e-05, "loss": 0.0624, "theoretical_loss": 3.3497895233107218, "tokens_seen": 2993422336 }, { "epoch": 0.81, "learning_rate": 9.379764101741154e-05, "loss": 0.0609, "theoretical_loss": 3.3497783370366205, "tokens_seen": 2993553408 }, { "epoch": 0.81, "learning_rate": 9.375752226590708e-05, "loss": 0.0662, "theoretical_loss": 3.3497671513894307, "tokens_seen": 2993684480 }, { "epoch": 0.81, "learning_rate": 9.371740351440263e-05, "loss": 0.0615, "theoretical_loss": 3.3497559663690897, "tokens_seen": 2993815552 }, { "epoch": 0.81, "learning_rate": 9.367728476289818e-05, "loss": 0.0648, "theoretical_loss": 3.3497447819755344, "tokens_seen": 2993946624 }, { "epoch": 0.81, "learning_rate": 9.363716601139373e-05, "loss": 0.0641, "theoretical_loss": 3.3497335982087035, "tokens_seen": 2994077696 }, { "epoch": 0.81, "learning_rate": 9.359704725988927e-05, "loss": 0.0651, "theoretical_loss": 3.349722415068533, "tokens_seen": 2994208768 }, { "epoch": 0.81, "learning_rate": 9.355692850838483e-05, "loss": 0.0627, "theoretical_loss": 3.349711232554962, "tokens_seen": 2994339840 }, { "epoch": 0.81, "learning_rate": 9.351680975688037e-05, "loss": 0.0619, "theoretical_loss": 3.3497000506679266, "tokens_seen": 2994470912 }, { "epoch": 0.81, "learning_rate": 9.347669100537591e-05, "loss": 0.065, "theoretical_loss": 3.3496888694073648, "tokens_seen": 2994601984 }, { "epoch": 0.81, "learning_rate": 9.343657225387146e-05, "loss": 0.0658, "theoretical_loss": 3.349677688773214, "tokens_seen": 2994733056 }, { "epoch": 0.82, "learning_rate": 9.3396453502367e-05, "loss": 0.0654, "theoretical_loss": 3.3496665087654116, "tokens_seen": 2994864128 }, { "epoch": 0.82, "learning_rate": 9.335633475086256e-05, "loss": 0.0626, "theoretical_loss": 3.3496553293838955, "tokens_seen": 2994995200 }, { "epoch": 0.82, "learning_rate": 9.33162159993581e-05, "loss": 0.06, "theoretical_loss": 3.3496441506286025, "tokens_seen": 2995126272 }, { "epoch": 0.82, "learning_rate": 9.327609724785364e-05, "loss": 0.0627, "theoretical_loss": 3.349632972499471, "tokens_seen": 2995257344 }, { "epoch": 0.82, "learning_rate": 9.32359784963492e-05, "loss": 0.0642, "theoretical_loss": 3.349621794996438, "tokens_seen": 2995388416 }, { "epoch": 0.82, "learning_rate": 9.319585974484475e-05, "loss": 0.068, "theoretical_loss": 3.3496106181194407, "tokens_seen": 2995519488 }, { "epoch": 0.82, "learning_rate": 9.31557409933403e-05, "loss": 0.0635, "theoretical_loss": 3.3495994418684174, "tokens_seen": 2995650560 }, { "epoch": 0.82, "learning_rate": 9.311562224183583e-05, "loss": 0.066, "theoretical_loss": 3.3495882662433054, "tokens_seen": 2995781632 }, { "epoch": 0.82, "learning_rate": 9.307550349033138e-05, "loss": 0.066, "theoretical_loss": 3.349577091244042, "tokens_seen": 2995912704 }, { "epoch": 0.82, "learning_rate": 9.303538473882693e-05, "loss": 0.0633, "theoretical_loss": 3.349565916870565, "tokens_seen": 2996043776 }, { "epoch": 0.82, "learning_rate": 9.299526598732249e-05, "loss": 0.0603, "theoretical_loss": 3.3495547431228117, "tokens_seen": 2996174848 }, { "epoch": 0.82, "learning_rate": 9.295514723581803e-05, "loss": 0.0629, "theoretical_loss": 3.34954357000072, "tokens_seen": 2996305920 }, { "epoch": 0.82, "learning_rate": 9.291502848431357e-05, "loss": 0.0653, "theoretical_loss": 3.3495323975042277, "tokens_seen": 2996436992 }, { "epoch": 0.82, "learning_rate": 9.287490973280911e-05, "loss": 0.0637, "theoretical_loss": 3.3495212256332723, "tokens_seen": 2996568064 }, { "epoch": 0.82, "objective/train/advantage_avg": 2.8360344003885984e-05, "objective/train/docs_used": 1089453, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3133518695831299, "objective/train/original_loss": 1.3133519887924194, "objective/train/theoretical_loss": 3.3495100543877907, "objective/train/tokens_used": 1367223776, "objective/train/value_avg": -0.007595062255859375, "objective/train/value_loss": 0.00014581935829482973, "objective/train/value_max": -1.4603137969970703e-05, "objective/train/value_min": -0.2454833984375, "objective/train/value_reward_corr": 0.7229414520430395, "objective/train/value_std": 0.0127410888671875, "objective/train/weight_avg": 1.000096082687378, "objective/train/weighted_lm_loss": 1.3127262592315674, "objective/train/weights_max": 1.2078955173492432, "objective/train/weights_min": 0.3727388083934784, "theoretical_loss": 3.3495100543877907, "tokens_seen": 2996699136 }, { "epoch": 0.82, "learning_rate": 9.283479098130466e-05, "loss": 0.0661, "theoretical_loss": 3.3495100543877907, "tokens_seen": 2996699136 }, { "epoch": 0.82, "learning_rate": 9.279467222980022e-05, "loss": 0.0617, "theoretical_loss": 3.3494988837677213, "tokens_seen": 2996830208 }, { "epoch": 0.82, "learning_rate": 9.275455347829576e-05, "loss": 0.0645, "theoretical_loss": 3.3494877137730015, "tokens_seen": 2996961280 }, { "epoch": 0.82, "learning_rate": 9.27144347267913e-05, "loss": 0.066, "theoretical_loss": 3.349476544403569, "tokens_seen": 2997092352 }, { "epoch": 0.82, "learning_rate": 9.267431597528684e-05, "loss": 0.0627, "theoretical_loss": 3.3494653756593613, "tokens_seen": 2997223424 }, { "epoch": 0.82, "learning_rate": 9.26341972237824e-05, "loss": 0.0619, "theoretical_loss": 3.349454207540316, "tokens_seen": 2997354496 }, { "epoch": 0.82, "learning_rate": 9.259407847227795e-05, "loss": 0.0636, "theoretical_loss": 3.3494430400463715, "tokens_seen": 2997485568 }, { "epoch": 0.82, "learning_rate": 9.25539597207735e-05, "loss": 0.0657, "theoretical_loss": 3.3494318731774646, "tokens_seen": 2997616640 }, { "epoch": 0.82, "learning_rate": 9.251384096926904e-05, "loss": 0.063, "theoretical_loss": 3.3494207069335333, "tokens_seen": 2997747712 }, { "epoch": 0.82, "learning_rate": 9.247372221776458e-05, "loss": 0.0618, "theoretical_loss": 3.3494095413145155, "tokens_seen": 2997878784 }, { "epoch": 0.82, "learning_rate": 9.243360346626013e-05, "loss": 0.0643, "theoretical_loss": 3.3493983763203485, "tokens_seen": 2998009856 }, { "epoch": 0.82, "learning_rate": 9.239348471475569e-05, "loss": 0.0629, "theoretical_loss": 3.3493872119509702, "tokens_seen": 2998140928 }, { "epoch": 0.82, "learning_rate": 9.235336596325123e-05, "loss": 0.0644, "theoretical_loss": 3.3493760482063184, "tokens_seen": 2998272000 }, { "epoch": 0.82, "learning_rate": 9.231324721174678e-05, "loss": 0.0622, "theoretical_loss": 3.349364885086331, "tokens_seen": 2998403072 }, { "epoch": 0.82, "learning_rate": 9.227312846024231e-05, "loss": 0.0649, "theoretical_loss": 3.349353722590945, "tokens_seen": 2998534144 }, { "epoch": 0.82, "learning_rate": 9.223300970873787e-05, "loss": 0.0615, "theoretical_loss": 3.3493425607200993, "tokens_seen": 2998665216 }, { "epoch": 0.82, "learning_rate": 9.219289095723342e-05, "loss": 0.0638, "theoretical_loss": 3.349331399473731, "tokens_seen": 2998796288 }, { "epoch": 0.82, "learning_rate": 9.215277220572896e-05, "loss": 0.0656, "theoretical_loss": 3.349320238851777, "tokens_seen": 2998927360 }, { "epoch": 0.82, "learning_rate": 9.211265345422452e-05, "loss": 0.0645, "theoretical_loss": 3.3493090788541773, "tokens_seen": 2999058432 }, { "epoch": 0.82, "learning_rate": 9.207253470272004e-05, "loss": 0.0625, "theoretical_loss": 3.3492979194808674, "tokens_seen": 2999189504 }, { "epoch": 0.82, "learning_rate": 9.20324159512156e-05, "loss": 0.0642, "theoretical_loss": 3.3492867607317867, "tokens_seen": 2999320576 }, { "epoch": 0.82, "learning_rate": 9.199229719971115e-05, "loss": 0.063, "theoretical_loss": 3.349275602606872, "tokens_seen": 2999451648 }, { "epoch": 0.82, "learning_rate": 9.19521784482067e-05, "loss": 0.0637, "theoretical_loss": 3.3492644451060616, "tokens_seen": 2999582720 }, { "epoch": 0.82, "learning_rate": 9.191205969670225e-05, "loss": 0.0645, "theoretical_loss": 3.3492532882292934, "tokens_seen": 2999713792 }, { "epoch": 0.82, "learning_rate": 9.187194094519778e-05, "loss": 0.0585, "theoretical_loss": 3.3492421319765047, "tokens_seen": 2999844864 }, { "epoch": 0.82, "objective/train/advantage_avg": 0.00021293347526807338, "objective/train/docs_used": 1090669, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.1802539825439453, "objective/train/original_loss": 1.1802537441253662, "objective/train/theoretical_loss": 3.3492309763476342, "objective/train/tokens_used": 1370500576, "objective/train/value_avg": -0.00876617431640625, "objective/train/value_loss": 0.00019036293087992817, "objective/train/value_max": -3.170967102050781e-05, "objective/train/value_min": -0.73974609375, "objective/train/value_reward_corr": 0.7908432499669501, "objective/train/value_std": 0.017120361328125, "objective/train/weight_avg": 1.0003011226654053, "objective/train/weighted_lm_loss": 1.180512547492981, "objective/train/weights_max": 1.3708481788635254, "objective/train/weights_min": 0.40128397941589355, "theoretical_loss": 3.3492309763476342, "tokens_seen": 2999975936 }, { "epoch": 0.82, "learning_rate": 9.183182219369333e-05, "loss": 0.0593, "theoretical_loss": 3.3492309763476342, "tokens_seen": 2999975936 }, { "epoch": 0.82, "learning_rate": 9.179170344218889e-05, "loss": 0.0615, "theoretical_loss": 3.349219821342619, "tokens_seen": 3000107008 }, { "epoch": 0.82, "learning_rate": 9.175158469068443e-05, "loss": 0.0644, "theoretical_loss": 3.349208666961397, "tokens_seen": 3000238080 }, { "epoch": 0.82, "learning_rate": 9.171146593917998e-05, "loss": 0.0646, "theoretical_loss": 3.3491975132039062, "tokens_seen": 3000369152 }, { "epoch": 0.82, "learning_rate": 9.167134718767551e-05, "loss": 0.0652, "theoretical_loss": 3.349186360070085, "tokens_seen": 3000500224 }, { "epoch": 0.82, "learning_rate": 9.163122843617107e-05, "loss": 0.0624, "theoretical_loss": 3.3491752075598704, "tokens_seen": 3000631296 }, { "epoch": 0.82, "learning_rate": 9.159110968466662e-05, "loss": 0.0652, "theoretical_loss": 3.349164055673201, "tokens_seen": 3000762368 }, { "epoch": 0.82, "learning_rate": 9.155099093316216e-05, "loss": 0.0679, "theoretical_loss": 3.3491529044100146, "tokens_seen": 3000893440 }, { "epoch": 0.82, "learning_rate": 9.151087218165772e-05, "loss": 0.0642, "theoretical_loss": 3.3491417537702484, "tokens_seen": 3001024512 }, { "epoch": 0.82, "learning_rate": 9.147075343015324e-05, "loss": 0.0614, "theoretical_loss": 3.3491306037538413, "tokens_seen": 3001155584 }, { "epoch": 0.82, "learning_rate": 9.14306346786488e-05, "loss": 0.0636, "theoretical_loss": 3.3491194543607308, "tokens_seen": 3001286656 }, { "epoch": 0.82, "learning_rate": 9.139051592714435e-05, "loss": 0.0664, "theoretical_loss": 3.3491083055908546, "tokens_seen": 3001417728 }, { "epoch": 0.82, "learning_rate": 9.13503971756399e-05, "loss": 0.059, "theoretical_loss": 3.349097157444151, "tokens_seen": 3001548800 }, { "epoch": 0.82, "learning_rate": 9.131027842413545e-05, "loss": 0.0626, "theoretical_loss": 3.349086009920558, "tokens_seen": 3001679872 }, { "epoch": 0.82, "learning_rate": 9.127015967263098e-05, "loss": 0.0632, "theoretical_loss": 3.349074863020013, "tokens_seen": 3001810944 }, { "epoch": 0.82, "learning_rate": 9.123004092112653e-05, "loss": 0.0656, "theoretical_loss": 3.3490637167424544, "tokens_seen": 3001942016 }, { "epoch": 0.82, "learning_rate": 9.118992216962209e-05, "loss": 0.0653, "theoretical_loss": 3.3490525710878205, "tokens_seen": 3002073088 }, { "epoch": 0.82, "learning_rate": 9.114980341811763e-05, "loss": 0.0676, "theoretical_loss": 3.349041426056049, "tokens_seen": 3002204160 }, { "epoch": 0.82, "learning_rate": 9.110968466661318e-05, "loss": 0.0654, "theoretical_loss": 3.3490302816470776, "tokens_seen": 3002335232 }, { "epoch": 0.82, "learning_rate": 9.106956591510871e-05, "loss": 0.0625, "theoretical_loss": 3.3490191378608447, "tokens_seen": 3002466304 }, { "epoch": 0.82, "learning_rate": 9.102944716360427e-05, "loss": 0.0622, "theoretical_loss": 3.349007994697288, "tokens_seen": 3002597376 }, { "epoch": 0.82, "learning_rate": 9.098932841209982e-05, "loss": 0.0624, "theoretical_loss": 3.3489968521563456, "tokens_seen": 3002728448 }, { "epoch": 0.82, "learning_rate": 9.094920966059536e-05, "loss": 0.0595, "theoretical_loss": 3.348985710237956, "tokens_seen": 3002859520 }, { "epoch": 0.82, "learning_rate": 9.090909090909092e-05, "loss": 0.06, "theoretical_loss": 3.3489745689420567, "tokens_seen": 3002990592 }, { "epoch": 0.82, "learning_rate": 9.086897215758646e-05, "loss": 0.0643, "theoretical_loss": 3.348963428268586, "tokens_seen": 3003121664 }, { "epoch": 0.82, "objective/train/advantage_avg": 0.004574526567012072, "objective/train/docs_used": 1091879, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2278687953948975, "objective/train/original_loss": 1.2278687953948975, "objective/train/theoretical_loss": 3.348952288217482, "objective/train/tokens_used": 1373777376, "objective/train/value_avg": -0.01251983642578125, "objective/train/value_loss": 0.0013244827277958393, "objective/train/value_max": -1.806020736694336e-05, "objective/train/value_min": -0.97119140625, "objective/train/value_reward_corr": 0.42607667318176234, "objective/train/value_std": 0.0350341796875, "objective/train/weight_avg": 1.0052400827407837, "objective/train/weighted_lm_loss": 1.2330137491226196, "objective/train/weights_max": 2.240029811859131, "objective/train/weights_min": 0.3709034025669098, "theoretical_loss": 3.348952288217482, "tokens_seen": 3003252736 }, { "epoch": 0.82, "learning_rate": 9.0828853406082e-05, "loss": 0.0661, "theoretical_loss": 3.348952288217482, "tokens_seen": 3003252736 }, { "epoch": 0.82, "learning_rate": 9.078873465457755e-05, "loss": 0.0635, "theoretical_loss": 3.3489411487886827, "tokens_seen": 3003383808 }, { "epoch": 0.82, "learning_rate": 9.07486159030731e-05, "loss": 0.0619, "theoretical_loss": 3.3489300099821255, "tokens_seen": 3003514880 }, { "epoch": 0.82, "learning_rate": 9.070849715156865e-05, "loss": 0.0631, "theoretical_loss": 3.34891887179775, "tokens_seen": 3003645952 }, { "epoch": 0.82, "learning_rate": 9.066837840006419e-05, "loss": 0.0663, "theoretical_loss": 3.348907734235493, "tokens_seen": 3003777024 }, { "epoch": 0.82, "learning_rate": 9.062825964855973e-05, "loss": 0.0649, "theoretical_loss": 3.348896597295293, "tokens_seen": 3003908096 }, { "epoch": 0.82, "learning_rate": 9.058814089705529e-05, "loss": 0.0656, "theoretical_loss": 3.3488854609770886, "tokens_seen": 3004039168 }, { "epoch": 0.82, "learning_rate": 9.054802214555083e-05, "loss": 0.0617, "theoretical_loss": 3.3488743252808173, "tokens_seen": 3004170240 }, { "epoch": 0.82, "learning_rate": 9.050790339404638e-05, "loss": 0.0649, "theoretical_loss": 3.3488631902064174, "tokens_seen": 3004301312 }, { "epoch": 0.82, "learning_rate": 9.046778464254193e-05, "loss": 0.063, "theoretical_loss": 3.3488520557538273, "tokens_seen": 3004432384 }, { "epoch": 0.82, "learning_rate": 9.042766589103747e-05, "loss": 0.0614, "theoretical_loss": 3.3488409219229847, "tokens_seen": 3004563456 }, { "epoch": 0.82, "learning_rate": 9.038754713953302e-05, "loss": 0.0609, "theoretical_loss": 3.348829788713828, "tokens_seen": 3004694528 }, { "epoch": 0.82, "learning_rate": 9.034742838802856e-05, "loss": 0.0614, "theoretical_loss": 3.348818656126295, "tokens_seen": 3004825600 }, { "epoch": 0.82, "learning_rate": 9.030730963652412e-05, "loss": 0.0648, "theoretical_loss": 3.348807524160325, "tokens_seen": 3004956672 }, { "epoch": 0.82, "learning_rate": 9.026719088501966e-05, "loss": 0.0655, "theoretical_loss": 3.3487963928158546, "tokens_seen": 3005087744 }, { "epoch": 0.82, "learning_rate": 9.02270721335152e-05, "loss": 0.0683, "theoretical_loss": 3.3487852620928233, "tokens_seen": 3005218816 }, { "epoch": 0.82, "learning_rate": 9.018695338201076e-05, "loss": 0.0597, "theoretical_loss": 3.348774131991169, "tokens_seen": 3005349888 }, { "epoch": 0.82, "learning_rate": 9.01468346305063e-05, "loss": 0.0661, "theoretical_loss": 3.3487630025108293, "tokens_seen": 3005480960 }, { "epoch": 0.82, "learning_rate": 9.010671587900185e-05, "loss": 0.0662, "theoretical_loss": 3.348751873651743, "tokens_seen": 3005612032 }, { "epoch": 0.82, "learning_rate": 9.006659712749739e-05, "loss": 0.0637, "theoretical_loss": 3.3487407454138483, "tokens_seen": 3005743104 }, { "epoch": 0.82, "learning_rate": 9.002647837599293e-05, "loss": 0.0627, "theoretical_loss": 3.3487296177970833, "tokens_seen": 3005874176 }, { "epoch": 0.82, "learning_rate": 8.998635962448849e-05, "loss": 0.0617, "theoretical_loss": 3.348718490801386, "tokens_seen": 3006005248 }, { "epoch": 0.82, "learning_rate": 8.994624087298403e-05, "loss": 0.0667, "theoretical_loss": 3.348707364426695, "tokens_seen": 3006136320 }, { "epoch": 0.82, "learning_rate": 8.990612212147958e-05, "loss": 0.0638, "theoretical_loss": 3.348696238672949, "tokens_seen": 3006267392 }, { "epoch": 0.82, "learning_rate": 8.986600336997513e-05, "loss": 0.0649, "theoretical_loss": 3.3486851135400855, "tokens_seen": 3006398464 }, { "epoch": 0.82, "objective/train/advantage_avg": 1.8869370251195505e-05, "objective/train/docs_used": 1092739, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3733830451965332, "objective/train/original_loss": 1.3733830451965332, "objective/train/theoretical_loss": 3.3486739890280433, "objective/train/tokens_used": 1377054176, "objective/train/value_avg": -0.00843048095703125, "objective/train/value_loss": 0.0003625130921136588, "objective/train/value_max": -2.5451183319091797e-05, "objective/train/value_min": -0.90478515625, "objective/train/value_reward_corr": 0.8010614613195457, "objective/train/value_std": 0.0229339599609375, "objective/train/weight_avg": 1.0001810789108276, "objective/train/weighted_lm_loss": 1.3739793300628662, "objective/train/weights_max": 1.9974963665008545, "objective/train/weights_min": 0.3720710873603821, "theoretical_loss": 3.3486739890280433, "tokens_seen": 3006529536 }, { "epoch": 0.82, "learning_rate": 8.982588461847067e-05, "loss": 0.0648, "theoretical_loss": 3.3486739890280433, "tokens_seen": 3006529536 }, { "epoch": 0.82, "learning_rate": 8.978576586696622e-05, "loss": 0.0646, "theoretical_loss": 3.34866286513676, "tokens_seen": 3006660608 }, { "epoch": 0.82, "learning_rate": 8.974564711546176e-05, "loss": 0.0609, "theoretical_loss": 3.3486517418661745, "tokens_seen": 3006791680 }, { "epoch": 0.82, "learning_rate": 8.970552836395732e-05, "loss": 0.0657, "theoretical_loss": 3.348640619216225, "tokens_seen": 3006922752 }, { "epoch": 0.82, "learning_rate": 8.966540961245286e-05, "loss": 0.0663, "theoretical_loss": 3.3486294971868498, "tokens_seen": 3007053824 }, { "epoch": 0.82, "learning_rate": 8.962529086094841e-05, "loss": 0.0638, "theoretical_loss": 3.348618375777987, "tokens_seen": 3007184896 }, { "epoch": 0.82, "learning_rate": 8.958517210944396e-05, "loss": 0.0613, "theoretical_loss": 3.3486072549895756, "tokens_seen": 3007315968 }, { "epoch": 0.82, "learning_rate": 8.95450533579395e-05, "loss": 0.0635, "theoretical_loss": 3.348596134821553, "tokens_seen": 3007447040 }, { "epoch": 0.82, "learning_rate": 8.950493460643505e-05, "loss": 0.0612, "theoretical_loss": 3.3485850152738585, "tokens_seen": 3007578112 }, { "epoch": 0.82, "learning_rate": 8.946481585493059e-05, "loss": 0.0639, "theoretical_loss": 3.34857389634643, "tokens_seen": 3007709184 }, { "epoch": 0.82, "learning_rate": 8.942469710342615e-05, "loss": 0.0586, "theoretical_loss": 3.3485627780392058, "tokens_seen": 3007840256 }, { "epoch": 0.82, "learning_rate": 8.938457835192169e-05, "loss": 0.0624, "theoretical_loss": 3.348551660352124, "tokens_seen": 3007971328 }, { "epoch": 0.82, "learning_rate": 8.934445960041723e-05, "loss": 0.0638, "theoretical_loss": 3.348540543285124, "tokens_seen": 3008102400 }, { "epoch": 0.82, "learning_rate": 8.930434084891279e-05, "loss": 0.062, "theoretical_loss": 3.348529426838143, "tokens_seen": 3008233472 }, { "epoch": 0.82, "learning_rate": 8.926422209740834e-05, "loss": 0.0642, "theoretical_loss": 3.3485183110111203, "tokens_seen": 3008364544 }, { "epoch": 0.82, "learning_rate": 8.922410334590388e-05, "loss": 0.0611, "theoretical_loss": 3.348507195803994, "tokens_seen": 3008495616 }, { "epoch": 0.82, "learning_rate": 8.918398459439942e-05, "loss": 0.0625, "theoretical_loss": 3.348496081216702, "tokens_seen": 3008626688 }, { "epoch": 0.82, "learning_rate": 8.914386584289496e-05, "loss": 0.0642, "theoretical_loss": 3.348484967249184, "tokens_seen": 3008757760 }, { "epoch": 0.82, "learning_rate": 8.910374709139052e-05, "loss": 0.0627, "theoretical_loss": 3.3484738539013774, "tokens_seen": 3008888832 }, { "epoch": 0.82, "learning_rate": 8.906362833988607e-05, "loss": 0.0618, "theoretical_loss": 3.348462741173221, "tokens_seen": 3009019904 }, { "epoch": 0.82, "learning_rate": 8.902350958838161e-05, "loss": 0.064, "theoretical_loss": 3.3484516290646527, "tokens_seen": 3009150976 }, { "epoch": 0.82, "learning_rate": 8.898339083687716e-05, "loss": 0.0632, "theoretical_loss": 3.348440517575612, "tokens_seen": 3009282048 }, { "epoch": 0.82, "learning_rate": 8.89432720853727e-05, "loss": 0.0623, "theoretical_loss": 3.3484294067060367, "tokens_seen": 3009413120 }, { "epoch": 0.82, "learning_rate": 8.890315333386825e-05, "loss": 0.0626, "theoretical_loss": 3.3484182964558653, "tokens_seen": 3009544192 }, { "epoch": 0.82, "learning_rate": 8.886303458236381e-05, "loss": 0.0647, "theoretical_loss": 3.3484071868250367, "tokens_seen": 3009675264 }, { "epoch": 0.82, "objective/train/advantage_avg": 5.4227395594352856e-05, "objective/train/docs_used": 1093906, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.144332766532898, "objective/train/original_loss": 1.1443328857421875, "objective/train/theoretical_loss": 3.348396077813489, "objective/train/tokens_used": 1380330976, "objective/train/value_avg": -0.0081024169921875, "objective/train/value_loss": 0.0002597414131741971, "objective/train/value_max": -3.510713577270508e-05, "objective/train/value_min": -0.272216796875, "objective/train/value_reward_corr": 0.6814219308214406, "objective/train/value_std": 0.015106201171875, "objective/train/weight_avg": 1.0001699924468994, "objective/train/weighted_lm_loss": 1.1438390016555786, "objective/train/weights_max": 1.2142077684402466, "objective/train/weights_min": 0.3709401786327362, "theoretical_loss": 3.348396077813489, "tokens_seen": 3009806336 }, { "epoch": 0.82, "learning_rate": 8.882291583085935e-05, "loss": 0.0607, "theoretical_loss": 3.348396077813489, "tokens_seen": 3009806336 }, { "epoch": 0.82, "learning_rate": 8.878279707935489e-05, "loss": 0.0607, "theoretical_loss": 3.348384969421161, "tokens_seen": 3009937408 }, { "epoch": 0.82, "learning_rate": 8.874267832785043e-05, "loss": 0.0629, "theoretical_loss": 3.348373861647991, "tokens_seen": 3010068480 }, { "epoch": 0.82, "learning_rate": 8.870255957634599e-05, "loss": 0.0623, "theoretical_loss": 3.3483627544939174, "tokens_seen": 3010199552 }, { "epoch": 0.82, "learning_rate": 8.866244082484154e-05, "loss": 0.0638, "theoretical_loss": 3.348351647958879, "tokens_seen": 3010330624 }, { "epoch": 0.82, "learning_rate": 8.862232207333708e-05, "loss": 0.0659, "theoretical_loss": 3.3483405420428145, "tokens_seen": 3010461696 }, { "epoch": 0.82, "learning_rate": 8.858220332183264e-05, "loss": 0.066, "theoretical_loss": 3.348329436745662, "tokens_seen": 3010592768 }, { "epoch": 0.82, "learning_rate": 8.854208457032816e-05, "loss": 0.0642, "theoretical_loss": 3.34831833206736, "tokens_seen": 3010723840 }, { "epoch": 0.82, "learning_rate": 8.850196581882372e-05, "loss": 0.0599, "theoretical_loss": 3.3483072280078483, "tokens_seen": 3010854912 }, { "epoch": 0.82, "learning_rate": 8.846184706731927e-05, "loss": 0.063, "theoretical_loss": 3.3482961245670637, "tokens_seen": 3010985984 }, { "epoch": 0.82, "learning_rate": 8.842172831581482e-05, "loss": 0.0647, "theoretical_loss": 3.348285021744946, "tokens_seen": 3011117056 }, { "epoch": 0.83, "learning_rate": 8.838160956431037e-05, "loss": 0.0661, "theoretical_loss": 3.348273919541434, "tokens_seen": 3011248128 }, { "epoch": 0.83, "learning_rate": 8.83414908128059e-05, "loss": 0.063, "theoretical_loss": 3.348262817956465, "tokens_seen": 3011379200 }, { "epoch": 0.83, "learning_rate": 8.830137206130145e-05, "loss": 0.0635, "theoretical_loss": 3.348251716989979, "tokens_seen": 3011510272 }, { "epoch": 0.83, "learning_rate": 8.826125330979701e-05, "loss": 0.0641, "theoretical_loss": 3.348240616641913, "tokens_seen": 3011641344 }, { "epoch": 0.83, "learning_rate": 8.822113455829255e-05, "loss": 0.0619, "theoretical_loss": 3.3482295169122076, "tokens_seen": 3011772416 }, { "epoch": 0.83, "learning_rate": 8.81810158067881e-05, "loss": 0.0632, "theoretical_loss": 3.3482184178008003, "tokens_seen": 3011903488 }, { "epoch": 0.83, "learning_rate": 8.814089705528363e-05, "loss": 0.0635, "theoretical_loss": 3.3482073193076296, "tokens_seen": 3012034560 }, { "epoch": 0.83, "learning_rate": 8.810077830377919e-05, "loss": 0.0613, "theoretical_loss": 3.348196221432635, "tokens_seen": 3012165632 }, { "epoch": 0.83, "learning_rate": 8.806065955227474e-05, "loss": 0.067, "theoretical_loss": 3.3481851241757545, "tokens_seen": 3012296704 }, { "epoch": 0.83, "learning_rate": 8.802054080077028e-05, "loss": 0.0607, "theoretical_loss": 3.348174027536927, "tokens_seen": 3012427776 }, { "epoch": 0.83, "learning_rate": 8.798042204926584e-05, "loss": 0.064, "theoretical_loss": 3.3481629315160912, "tokens_seen": 3012558848 }, { "epoch": 0.83, "learning_rate": 8.794030329776137e-05, "loss": 0.0634, "theoretical_loss": 3.3481518361131855, "tokens_seen": 3012689920 }, { "epoch": 0.83, "learning_rate": 8.790018454625692e-05, "loss": 0.0632, "theoretical_loss": 3.348140741328149, "tokens_seen": 3012820992 }, { "epoch": 0.83, "learning_rate": 8.786006579475247e-05, "loss": 0.0627, "theoretical_loss": 3.3481296471609205, "tokens_seen": 3012952064 }, { "epoch": 0.83, "objective/train/advantage_avg": 0.0003257080970797688, "objective/train/docs_used": 1095077, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.32185959815979, "objective/train/original_loss": 1.32185959815979, "objective/train/theoretical_loss": 3.348118553611438, "objective/train/tokens_used": 1383607776, "objective/train/value_avg": -0.00914764404296875, "objective/train/value_loss": 0.00036844416172243655, "objective/train/value_max": -4.506111145019531e-05, "objective/train/value_min": -0.841796875, "objective/train/value_reward_corr": 0.7026793147801762, "objective/train/value_std": 0.0207672119140625, "objective/train/weight_avg": 1.0004905462265015, "objective/train/weighted_lm_loss": 1.3217318058013916, "objective/train/weights_max": 1.5975273847579956, "objective/train/weights_min": 0.39044809341430664, "theoretical_loss": 3.348118553611438, "tokens_seen": 3013083136 }, { "epoch": 0.83, "learning_rate": 8.781994704324802e-05, "loss": 0.0621, "theoretical_loss": 3.348118553611438, "tokens_seen": 3013083136 }, { "epoch": 0.83, "learning_rate": 8.777982829174357e-05, "loss": 0.0645, "theoretical_loss": 3.348107460679641, "tokens_seen": 3013214208 }, { "epoch": 0.83, "learning_rate": 8.77397095402391e-05, "loss": 0.0659, "theoretical_loss": 3.348096368365468, "tokens_seen": 3013345280 }, { "epoch": 0.83, "learning_rate": 8.769959078873465e-05, "loss": 0.0654, "theoretical_loss": 3.348085276668858, "tokens_seen": 3013476352 }, { "epoch": 0.83, "learning_rate": 8.765947203723021e-05, "loss": 0.0648, "theoretical_loss": 3.348074185589749, "tokens_seen": 3013607424 }, { "epoch": 0.83, "learning_rate": 8.761935328572575e-05, "loss": 0.0655, "theoretical_loss": 3.3480630951280803, "tokens_seen": 3013738496 }, { "epoch": 0.83, "learning_rate": 8.75792345342213e-05, "loss": 0.0619, "theoretical_loss": 3.3480520052837908, "tokens_seen": 3013869568 }, { "epoch": 0.83, "learning_rate": 8.753911578271683e-05, "loss": 0.063, "theoretical_loss": 3.348040916056819, "tokens_seen": 3014000640 }, { "epoch": 0.83, "learning_rate": 8.749899703121239e-05, "loss": 0.0602, "theoretical_loss": 3.348029827447104, "tokens_seen": 3014131712 }, { "epoch": 0.83, "learning_rate": 8.745887827970794e-05, "loss": 0.0653, "theoretical_loss": 3.348018739454584, "tokens_seen": 3014262784 }, { "epoch": 0.83, "learning_rate": 8.741875952820348e-05, "loss": 0.0637, "theoretical_loss": 3.3480076520791986, "tokens_seen": 3014393856 }, { "epoch": 0.83, "learning_rate": 8.737864077669904e-05, "loss": 0.063, "theoretical_loss": 3.347996565320886, "tokens_seen": 3014524928 }, { "epoch": 0.83, "learning_rate": 8.733852202519457e-05, "loss": 0.0653, "theoretical_loss": 3.3479854791795853, "tokens_seen": 3014656000 }, { "epoch": 0.83, "learning_rate": 8.729840327369012e-05, "loss": 0.0637, "theoretical_loss": 3.3479743936552353, "tokens_seen": 3014787072 }, { "epoch": 0.83, "learning_rate": 8.725828452218568e-05, "loss": 0.0622, "theoretical_loss": 3.347963308747775, "tokens_seen": 3014918144 }, { "epoch": 0.83, "learning_rate": 8.721816577068122e-05, "loss": 0.064, "theoretical_loss": 3.347952224457143, "tokens_seen": 3015049216 }, { "epoch": 0.83, "learning_rate": 8.717804701917677e-05, "loss": 0.0678, "theoretical_loss": 3.347941140783278, "tokens_seen": 3015180288 }, { "epoch": 0.83, "learning_rate": 8.713792826767231e-05, "loss": 0.0667, "theoretical_loss": 3.3479300577261197, "tokens_seen": 3015311360 }, { "epoch": 0.83, "learning_rate": 8.709780951616785e-05, "loss": 0.0612, "theoretical_loss": 3.347918975285606, "tokens_seen": 3015442432 }, { "epoch": 0.83, "learning_rate": 8.705769076466341e-05, "loss": 0.0626, "theoretical_loss": 3.3479078934616764, "tokens_seen": 3015573504 }, { "epoch": 0.83, "learning_rate": 8.701757201315895e-05, "loss": 0.0618, "theoretical_loss": 3.347896812254269, "tokens_seen": 3015704576 }, { "epoch": 0.83, "learning_rate": 8.69774532616545e-05, "loss": 0.0633, "theoretical_loss": 3.347885731663324, "tokens_seen": 3015835648 }, { "epoch": 0.83, "learning_rate": 8.693733451015005e-05, "loss": 0.0625, "theoretical_loss": 3.3478746516887794, "tokens_seen": 3015966720 }, { "epoch": 0.83, "learning_rate": 8.689721575864559e-05, "loss": 0.0605, "theoretical_loss": 3.3478635723305743, "tokens_seen": 3016097792 }, { "epoch": 0.83, "learning_rate": 8.685709700714114e-05, "loss": 0.0643, "theoretical_loss": 3.347852493588648, "tokens_seen": 3016228864 }, { "epoch": 0.83, "objective/train/advantage_avg": -0.00042014909558929503, "objective/train/docs_used": 1096638, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.21356201171875, "objective/train/original_loss": 1.213562250137329, "objective/train/theoretical_loss": 3.3478414154629386, "objective/train/tokens_used": 1386884576, "objective/train/value_avg": -0.006404876708984375, "objective/train/value_loss": 0.0001621203264221549, "objective/train/value_max": -3.11732292175293e-05, "objective/train/value_min": -0.296142578125, "objective/train/value_reward_corr": 0.7023178164584303, "objective/train/value_std": 0.01214599609375, "objective/train/weight_avg": 0.9996523261070251, "objective/train/weighted_lm_loss": 1.2129218578338623, "objective/train/weights_max": 1.1564855575561523, "objective/train/weights_min": 0.3702460527420044, "theoretical_loss": 3.3478414154629386, "tokens_seen": 3016359936 }, { "epoch": 0.83, "learning_rate": 8.681697825563668e-05, "loss": 0.0619, "theoretical_loss": 3.3478414154629386, "tokens_seen": 3016359936 }, { "epoch": 0.83, "learning_rate": 8.677685950413224e-05, "loss": 0.0662, "theoretical_loss": 3.347830337953386, "tokens_seen": 3016491008 }, { "epoch": 0.83, "learning_rate": 8.673674075262778e-05, "loss": 0.0645, "theoretical_loss": 3.3478192610599287, "tokens_seen": 3016622080 }, { "epoch": 0.83, "learning_rate": 8.669662200112332e-05, "loss": 0.0633, "theoretical_loss": 3.3478081847825054, "tokens_seen": 3016753152 }, { "epoch": 0.83, "learning_rate": 8.665650324961888e-05, "loss": 0.0645, "theoretical_loss": 3.3477971091210557, "tokens_seen": 3016884224 }, { "epoch": 0.83, "learning_rate": 8.661638449811442e-05, "loss": 0.0652, "theoretical_loss": 3.3477860340755177, "tokens_seen": 3017015296 }, { "epoch": 0.83, "learning_rate": 8.657626574660997e-05, "loss": 0.0659, "theoretical_loss": 3.3477749596458315, "tokens_seen": 3017146368 }, { "epoch": 0.83, "learning_rate": 8.653614699510551e-05, "loss": 0.06, "theoretical_loss": 3.3477638858319354, "tokens_seen": 3017277440 }, { "epoch": 0.83, "learning_rate": 8.649602824360105e-05, "loss": 0.0661, "theoretical_loss": 3.3477528126337686, "tokens_seen": 3017408512 }, { "epoch": 0.83, "learning_rate": 8.645590949209661e-05, "loss": 0.0626, "theoretical_loss": 3.34774174005127, "tokens_seen": 3017539584 }, { "epoch": 0.83, "learning_rate": 8.641579074059215e-05, "loss": 0.0666, "theoretical_loss": 3.347730668084379, "tokens_seen": 3017670656 }, { "epoch": 0.83, "learning_rate": 8.63756719890877e-05, "loss": 0.0661, "theoretical_loss": 3.347719596733034, "tokens_seen": 3017801728 }, { "epoch": 0.83, "learning_rate": 8.633555323758325e-05, "loss": 0.0665, "theoretical_loss": 3.3477085259971746, "tokens_seen": 3017932800 }, { "epoch": 0.83, "learning_rate": 8.629543448607879e-05, "loss": 0.0604, "theoretical_loss": 3.3476974558767396, "tokens_seen": 3018063872 }, { "epoch": 0.83, "learning_rate": 8.625531573457434e-05, "loss": 0.0596, "theoretical_loss": 3.347686386371668, "tokens_seen": 3018194944 }, { "epoch": 0.83, "learning_rate": 8.621519698306988e-05, "loss": 0.0626, "theoretical_loss": 3.347675317481899, "tokens_seen": 3018326016 }, { "epoch": 0.83, "learning_rate": 8.617507823156544e-05, "loss": 0.0606, "theoretical_loss": 3.3476642492073716, "tokens_seen": 3018457088 }, { "epoch": 0.83, "learning_rate": 8.613495948006098e-05, "loss": 0.0635, "theoretical_loss": 3.3476531815480253, "tokens_seen": 3018588160 }, { "epoch": 0.83, "learning_rate": 8.609484072855652e-05, "loss": 0.0614, "theoretical_loss": 3.347642114503799, "tokens_seen": 3018719232 }, { "epoch": 0.83, "learning_rate": 8.605472197705208e-05, "loss": 0.0619, "theoretical_loss": 3.347631048074631, "tokens_seen": 3018850304 }, { "epoch": 0.83, "learning_rate": 8.601460322554762e-05, "loss": 0.0619, "theoretical_loss": 3.3476199822604613, "tokens_seen": 3018981376 }, { "epoch": 0.83, "learning_rate": 8.597448447404317e-05, "loss": 0.0658, "theoretical_loss": 3.347608917061229, "tokens_seen": 3019112448 }, { "epoch": 0.83, "learning_rate": 8.593436572253871e-05, "loss": 0.0679, "theoretical_loss": 3.347597852476873, "tokens_seen": 3019243520 }, { "epoch": 0.83, "learning_rate": 8.589424697103427e-05, "loss": 0.066, "theoretical_loss": 3.347586788507332, "tokens_seen": 3019374592 }, { "epoch": 0.83, "learning_rate": 8.585412821952981e-05, "loss": 0.0633, "theoretical_loss": 3.3475757251525464, "tokens_seen": 3019505664 }, { "epoch": 0.83, "objective/train/advantage_avg": 0.0010515168542042375, "objective/train/docs_used": 1097308, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2052204608917236, "objective/train/original_loss": 1.2052204608917236, "objective/train/theoretical_loss": 3.347564662412454, "objective/train/tokens_used": 1390161376, "objective/train/value_avg": -0.006465911865234375, "objective/train/value_loss": 9.162596688838676e-05, "objective/train/value_max": -2.9087066650390625e-05, "objective/train/value_min": -0.3173828125, "objective/train/value_reward_corr": 0.7833055856891826, "objective/train/value_std": 0.012054443359375, "objective/train/weight_avg": 1.001096248626709, "objective/train/weighted_lm_loss": 1.206897258758545, "objective/train/weights_max": 1.0932601690292358, "objective/train/weights_min": 0.724988579750061, "theoretical_loss": 3.347564662412454, "tokens_seen": 3019636736 }, { "epoch": 0.83, "learning_rate": 8.581400946802535e-05, "loss": 0.0638, "theoretical_loss": 3.347564662412454, "tokens_seen": 3019636736 }, { "epoch": 0.83, "learning_rate": 8.57738907165209e-05, "loss": 0.0625, "theoretical_loss": 3.347553600286995, "tokens_seen": 3019767808 }, { "epoch": 0.83, "learning_rate": 8.573377196501645e-05, "loss": 0.0647, "theoretical_loss": 3.347542538776108, "tokens_seen": 3019898880 }, { "epoch": 0.83, "learning_rate": 8.5693653213512e-05, "loss": 0.0628, "theoretical_loss": 3.3475314778797323, "tokens_seen": 3020029952 }, { "epoch": 0.83, "learning_rate": 8.565353446200754e-05, "loss": 0.0642, "theoretical_loss": 3.347520417597807, "tokens_seen": 3020161024 }, { "epoch": 0.83, "learning_rate": 8.561341571050308e-05, "loss": 0.0634, "theoretical_loss": 3.3475093579302713, "tokens_seen": 3020292096 }, { "epoch": 0.83, "learning_rate": 8.557329695899864e-05, "loss": 0.0606, "theoretical_loss": 3.3474982988770647, "tokens_seen": 3020423168 }, { "epoch": 0.83, "learning_rate": 8.553317820749418e-05, "loss": 0.0641, "theoretical_loss": 3.347487240438126, "tokens_seen": 3020554240 }, { "epoch": 0.83, "learning_rate": 8.549305945598974e-05, "loss": 0.0621, "theoretical_loss": 3.347476182613395, "tokens_seen": 3020685312 }, { "epoch": 0.83, "learning_rate": 8.545294070448528e-05, "loss": 0.0662, "theoretical_loss": 3.3474651254028105, "tokens_seen": 3020816384 }, { "epoch": 0.83, "learning_rate": 8.541282195298082e-05, "loss": 0.0637, "theoretical_loss": 3.3474540688063117, "tokens_seen": 3020947456 }, { "epoch": 0.83, "learning_rate": 8.537270320147637e-05, "loss": 0.0666, "theoretical_loss": 3.347443012823838, "tokens_seen": 3021078528 }, { "epoch": 0.83, "learning_rate": 8.533258444997191e-05, "loss": 0.0615, "theoretical_loss": 3.347431957455329, "tokens_seen": 3021209600 }, { "epoch": 0.83, "learning_rate": 8.529246569846747e-05, "loss": 0.0622, "theoretical_loss": 3.3474209027007236, "tokens_seen": 3021340672 }, { "epoch": 0.83, "learning_rate": 8.525234694696301e-05, "loss": 0.0624, "theoretical_loss": 3.347409848559961, "tokens_seen": 3021471744 }, { "epoch": 0.83, "learning_rate": 8.521222819545855e-05, "loss": 0.0646, "theoretical_loss": 3.34739879503298, "tokens_seen": 3021602816 }, { "epoch": 0.83, "learning_rate": 8.51721094439541e-05, "loss": 0.0617, "theoretical_loss": 3.347387742119721, "tokens_seen": 3021733888 }, { "epoch": 0.83, "learning_rate": 8.513199069244966e-05, "loss": 0.0651, "theoretical_loss": 3.3473766898201234, "tokens_seen": 3021864960 }, { "epoch": 0.83, "learning_rate": 8.50918719409452e-05, "loss": 0.062, "theoretical_loss": 3.347365638134125, "tokens_seen": 3021996032 }, { "epoch": 0.83, "learning_rate": 8.505175318944074e-05, "loss": 0.0627, "theoretical_loss": 3.3473545870616666, "tokens_seen": 3022127104 }, { "epoch": 0.83, "learning_rate": 8.501163443793629e-05, "loss": 0.0617, "theoretical_loss": 3.3473435366026867, "tokens_seen": 3022258176 }, { "epoch": 0.83, "learning_rate": 8.497151568643184e-05, "loss": 0.0614, "theoretical_loss": 3.347332486757125, "tokens_seen": 3022389248 }, { "epoch": 0.83, "learning_rate": 8.49313969349274e-05, "loss": 0.0585, "theoretical_loss": 3.3473214375249207, "tokens_seen": 3022520320 }, { "epoch": 0.83, "learning_rate": 8.489127818342294e-05, "loss": 0.0612, "theoretical_loss": 3.3473103889060134, "tokens_seen": 3022651392 }, { "epoch": 0.83, "learning_rate": 8.485115943191848e-05, "loss": 0.0656, "theoretical_loss": 3.347299340900342, "tokens_seen": 3022782464 }, { "epoch": 0.83, "objective/train/advantage_avg": 0.00028997950721532106, "objective/train/docs_used": 1098469, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.1652213335037231, "objective/train/original_loss": 1.1652213335037231, "objective/train/theoretical_loss": 3.3472882935078463, "objective/train/tokens_used": 1393438176, "objective/train/value_avg": -0.00545501708984375, "objective/train/value_loss": 0.00018886252655647695, "objective/train/value_max": -3.349781036376953e-05, "objective/train/value_min": -0.26904296875, "objective/train/value_reward_corr": 0.6435026063821618, "objective/train/value_std": 0.01129913330078125, "objective/train/weight_avg": 1.0003737211227417, "objective/train/weighted_lm_loss": 1.166006326675415, "objective/train/weights_max": 1.1714891195297241, "objective/train/weights_min": 0.3682350218296051, "theoretical_loss": 3.3472882935078463, "tokens_seen": 3022913536 }, { "epoch": 0.83, "learning_rate": 8.481104068041402e-05, "loss": 0.061, "theoretical_loss": 3.3472882935078463, "tokens_seen": 3022913536 }, { "epoch": 0.83, "learning_rate": 8.477092192890957e-05, "loss": 0.0605, "theoretical_loss": 3.3472772467284657, "tokens_seen": 3023044608 }, { "epoch": 0.83, "learning_rate": 8.473080317740513e-05, "loss": 0.0638, "theoretical_loss": 3.347266200562139, "tokens_seen": 3023175680 }, { "epoch": 0.83, "learning_rate": 8.469068442590067e-05, "loss": 0.0619, "theoretical_loss": 3.3472551550088063, "tokens_seen": 3023306752 }, { "epoch": 0.83, "learning_rate": 8.465056567439622e-05, "loss": 0.0647, "theoretical_loss": 3.347244110068407, "tokens_seen": 3023437824 }, { "epoch": 0.83, "learning_rate": 8.461044692289175e-05, "loss": 0.067, "theoretical_loss": 3.34723306574088, "tokens_seen": 3023568896 }, { "epoch": 0.83, "learning_rate": 8.457032817138731e-05, "loss": 0.0646, "theoretical_loss": 3.3472220220261653, "tokens_seen": 3023699968 }, { "epoch": 0.83, "learning_rate": 8.453020941988286e-05, "loss": 0.0596, "theoretical_loss": 3.3472109789242017, "tokens_seen": 3023831040 }, { "epoch": 0.83, "learning_rate": 8.44900906683784e-05, "loss": 0.0616, "theoretical_loss": 3.347199936434929, "tokens_seen": 3023962112 }, { "epoch": 0.83, "learning_rate": 8.444997191687396e-05, "loss": 0.0636, "theoretical_loss": 3.347188894558287, "tokens_seen": 3024093184 }, { "epoch": 0.83, "learning_rate": 8.440985316536949e-05, "loss": 0.0654, "theoretical_loss": 3.3471778532942142, "tokens_seen": 3024224256 }, { "epoch": 0.83, "learning_rate": 8.436973441386504e-05, "loss": 0.0647, "theoretical_loss": 3.347166812642651, "tokens_seen": 3024355328 }, { "epoch": 0.83, "learning_rate": 8.43296156623606e-05, "loss": 0.0644, "theoretical_loss": 3.3471557726035366, "tokens_seen": 3024486400 }, { "epoch": 0.83, "learning_rate": 8.428949691085614e-05, "loss": 0.0657, "theoretical_loss": 3.34714473317681, "tokens_seen": 3024617472 }, { "epoch": 0.83, "learning_rate": 8.424937815935169e-05, "loss": 0.0642, "theoretical_loss": 3.3471336943624115, "tokens_seen": 3024748544 }, { "epoch": 0.83, "learning_rate": 8.420925940784722e-05, "loss": 0.0645, "theoretical_loss": 3.34712265616028, "tokens_seen": 3024879616 }, { "epoch": 0.83, "learning_rate": 8.416914065634277e-05, "loss": 0.0649, "theoretical_loss": 3.3471116185703553, "tokens_seen": 3025010688 }, { "epoch": 0.83, "learning_rate": 8.412902190483833e-05, "loss": 0.0648, "theoretical_loss": 3.347100581592577, "tokens_seen": 3025141760 }, { "epoch": 0.83, "learning_rate": 8.408890315333387e-05, "loss": 0.0625, "theoretical_loss": 3.3470895452268845, "tokens_seen": 3025272832 }, { "epoch": 0.83, "learning_rate": 8.404878440182943e-05, "loss": 0.062, "theoretical_loss": 3.347078509473217, "tokens_seen": 3025403904 }, { "epoch": 0.83, "learning_rate": 8.400866565032495e-05, "loss": 0.061, "theoretical_loss": 3.3470674743315145, "tokens_seen": 3025534976 }, { "epoch": 0.83, "learning_rate": 8.396854689882051e-05, "loss": 0.0671, "theoretical_loss": 3.3470564398017166, "tokens_seen": 3025666048 }, { "epoch": 0.83, "learning_rate": 8.392842814731606e-05, "loss": 0.0642, "theoretical_loss": 3.3470454058837626, "tokens_seen": 3025797120 }, { "epoch": 0.83, "learning_rate": 8.38883093958116e-05, "loss": 0.0621, "theoretical_loss": 3.3470343725775917, "tokens_seen": 3025928192 }, { "epoch": 0.83, "learning_rate": 8.384819064430716e-05, "loss": 0.0611, "theoretical_loss": 3.347023339883145, "tokens_seen": 3026059264 }, { "epoch": 0.83, "objective/train/advantage_avg": 0.0002769629063550383, "objective/train/docs_used": 1099580, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3835093975067139, "objective/train/original_loss": 1.3835093975067139, "objective/train/theoretical_loss": 3.34701230780036, "objective/train/tokens_used": 1396714976, "objective/train/value_avg": -0.006591796875, "objective/train/value_loss": 0.0001190670664072968, "objective/train/value_max": -3.045797348022461e-05, "objective/train/value_min": -0.2486572265625, "objective/train/value_reward_corr": 0.7777328119850808, "objective/train/value_std": 0.0135040283203125, "objective/train/weight_avg": 1.0003325939178467, "objective/train/weighted_lm_loss": 1.3834853172302246, "objective/train/weights_max": 1.1095370054244995, "objective/train/weights_min": 0.3936425745487213, "theoretical_loss": 3.34701230780036, "tokens_seen": 3026190336 }, { "epoch": 0.83, "learning_rate": 8.380807189280269e-05, "loss": 0.0665, "theoretical_loss": 3.34701230780036, "tokens_seen": 3026190336 }, { "epoch": 0.83, "learning_rate": 8.376795314129824e-05, "loss": 0.065, "theoretical_loss": 3.347001276329178, "tokens_seen": 3026321408 }, { "epoch": 0.83, "learning_rate": 8.37278343897938e-05, "loss": 0.0644, "theoretical_loss": 3.3469902454695375, "tokens_seen": 3026452480 }, { "epoch": 0.83, "learning_rate": 8.368771563828934e-05, "loss": 0.063, "theoretical_loss": 3.3469792152213786, "tokens_seen": 3026583552 }, { "epoch": 0.83, "learning_rate": 8.364759688678489e-05, "loss": 0.0651, "theoretical_loss": 3.346968185584641, "tokens_seen": 3026714624 }, { "epoch": 0.83, "learning_rate": 8.360747813528042e-05, "loss": 0.0648, "theoretical_loss": 3.346957156559264, "tokens_seen": 3026845696 }, { "epoch": 0.83, "learning_rate": 8.356735938377597e-05, "loss": 0.0601, "theoretical_loss": 3.3469461281451878, "tokens_seen": 3026976768 }, { "epoch": 0.83, "learning_rate": 8.352724063227153e-05, "loss": 0.0655, "theoretical_loss": 3.3469351003423515, "tokens_seen": 3027107840 }, { "epoch": 0.83, "learning_rate": 8.348712188076707e-05, "loss": 0.0603, "theoretical_loss": 3.346924073150695, "tokens_seen": 3027238912 }, { "epoch": 0.83, "learning_rate": 8.344700312926263e-05, "loss": 0.0659, "theoretical_loss": 3.3469130465701578, "tokens_seen": 3027369984 }, { "epoch": 0.83, "learning_rate": 8.340688437775817e-05, "loss": 0.0633, "theoretical_loss": 3.34690202060068, "tokens_seen": 3027501056 }, { "epoch": 0.83, "learning_rate": 8.336676562625371e-05, "loss": 0.0661, "theoretical_loss": 3.346890995242201, "tokens_seen": 3027632128 }, { "epoch": 0.84, "learning_rate": 8.332664687474926e-05, "loss": 0.0689, "theoretical_loss": 3.34687997049466, "tokens_seen": 3027763200 }, { "epoch": 0.84, "learning_rate": 8.32865281232448e-05, "loss": 0.0676, "theoretical_loss": 3.346868946357997, "tokens_seen": 3027894272 }, { "epoch": 0.84, "learning_rate": 8.324640937174036e-05, "loss": 0.0643, "theoretical_loss": 3.346857922832153, "tokens_seen": 3028025344 }, { "epoch": 0.84, "learning_rate": 8.32062906202359e-05, "loss": 0.0642, "theoretical_loss": 3.3468468999170655, "tokens_seen": 3028156416 }, { "epoch": 0.84, "learning_rate": 8.316617186873144e-05, "loss": 0.0649, "theoretical_loss": 3.346835877612676, "tokens_seen": 3028287488 }, { "epoch": 0.84, "learning_rate": 8.3126053117227e-05, "loss": 0.0631, "theoretical_loss": 3.346824855918923, "tokens_seen": 3028418560 }, { "epoch": 0.84, "learning_rate": 8.308593436572254e-05, "loss": 0.0657, "theoretical_loss": 3.3468138348357472, "tokens_seen": 3028549632 }, { "epoch": 0.84, "learning_rate": 8.304581561421809e-05, "loss": 0.0674, "theoretical_loss": 3.346802814363088, "tokens_seen": 3028680704 }, { "epoch": 0.84, "learning_rate": 8.300569686271363e-05, "loss": 0.0616, "theoretical_loss": 3.346791794500885, "tokens_seen": 3028811776 }, { "epoch": 0.84, "learning_rate": 8.296557811120918e-05, "loss": 0.0663, "theoretical_loss": 3.346780775249078, "tokens_seen": 3028942848 }, { "epoch": 0.84, "learning_rate": 8.292545935970473e-05, "loss": 0.0641, "theoretical_loss": 3.3467697566076073, "tokens_seen": 3029073920 }, { "epoch": 0.84, "learning_rate": 8.288534060820027e-05, "loss": 0.0648, "theoretical_loss": 3.346758738576412, "tokens_seen": 3029204992 }, { "epoch": 0.84, "learning_rate": 8.284522185669583e-05, "loss": 0.0642, "theoretical_loss": 3.346747721155432, "tokens_seen": 3029336064 }, { "epoch": 0.84, "objective/train/advantage_avg": 0.000528469099663198, "objective/train/docs_used": 1100821, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2996779680252075, "objective/train/original_loss": 1.299678087234497, "objective/train/theoretical_loss": 3.3467367043446075, "objective/train/tokens_used": 1399991776, "objective/train/value_avg": -0.00901031494140625, "objective/train/value_loss": 0.00017098078387789428, "objective/train/value_max": -2.956390380859375e-05, "objective/train/value_min": -0.418701171875, "objective/train/value_reward_corr": 0.7142675704452364, "objective/train/value_std": 0.0145721435546875, "objective/train/weight_avg": 1.0006085634231567, "objective/train/weighted_lm_loss": 1.300686240196228, "objective/train/weights_max": 1.4400057792663574, "objective/train/weights_min": 0.3683288097381592, "theoretical_loss": 3.3467367043446075, "tokens_seen": 3029467136 }, { "epoch": 0.84, "learning_rate": 8.280510310519137e-05, "loss": 0.0653, "theoretical_loss": 3.3467367043446075, "tokens_seen": 3029467136 }, { "epoch": 0.84, "learning_rate": 8.276498435368691e-05, "loss": 0.0642, "theoretical_loss": 3.346725688143878, "tokens_seen": 3029598208 }, { "epoch": 0.84, "learning_rate": 8.272486560218246e-05, "loss": 0.0653, "theoretical_loss": 3.3467146725531833, "tokens_seen": 3029729280 }, { "epoch": 0.84, "learning_rate": 8.2684746850678e-05, "loss": 0.0637, "theoretical_loss": 3.3467036575724634, "tokens_seen": 3029860352 }, { "epoch": 0.84, "learning_rate": 8.264462809917356e-05, "loss": 0.0658, "theoretical_loss": 3.346692643201658, "tokens_seen": 3029991424 }, { "epoch": 0.84, "learning_rate": 8.26045093476691e-05, "loss": 0.0663, "theoretical_loss": 3.346681629440707, "tokens_seen": 3030122496 }, { "epoch": 0.84, "learning_rate": 8.256439059616464e-05, "loss": 0.066, "theoretical_loss": 3.34667061628955, "tokens_seen": 3030253568 }, { "epoch": 0.84, "learning_rate": 8.25242718446602e-05, "loss": 0.066, "theoretical_loss": 3.3466596037481278, "tokens_seen": 3030384640 }, { "epoch": 0.84, "learning_rate": 8.248415309315574e-05, "loss": 0.0666, "theoretical_loss": 3.346648591816379, "tokens_seen": 3030515712 }, { "epoch": 0.84, "learning_rate": 8.24440343416513e-05, "loss": 0.064, "theoretical_loss": 3.346637580494244, "tokens_seen": 3030646784 }, { "epoch": 0.84, "learning_rate": 8.240391559014683e-05, "loss": 0.0612, "theoretical_loss": 3.3466265697816633, "tokens_seen": 3030777856 }, { "epoch": 0.84, "learning_rate": 8.236379683864238e-05, "loss": 0.0638, "theoretical_loss": 3.3466155596785763, "tokens_seen": 3030908928 }, { "epoch": 0.84, "learning_rate": 8.232367808713793e-05, "loss": 0.0657, "theoretical_loss": 3.3466045501849226, "tokens_seen": 3031040000 }, { "epoch": 0.84, "learning_rate": 8.228355933563347e-05, "loss": 0.0604, "theoretical_loss": 3.3465935413006425, "tokens_seen": 3031171072 }, { "epoch": 0.84, "learning_rate": 8.224344058412903e-05, "loss": 0.063, "theoretical_loss": 3.3465825330256753, "tokens_seen": 3031302144 }, { "epoch": 0.84, "learning_rate": 8.220332183262457e-05, "loss": 0.0631, "theoretical_loss": 3.3465715253599617, "tokens_seen": 3031433216 }, { "epoch": 0.84, "learning_rate": 8.216320308112012e-05, "loss": 0.0616, "theoretical_loss": 3.346560518303442, "tokens_seen": 3031564288 }, { "epoch": 0.84, "learning_rate": 8.212308432961566e-05, "loss": 0.0656, "theoretical_loss": 3.346549511856055, "tokens_seen": 3031695360 }, { "epoch": 0.84, "learning_rate": 8.20829655781112e-05, "loss": 0.0632, "theoretical_loss": 3.3465385060177413, "tokens_seen": 3031826432 }, { "epoch": 0.84, "learning_rate": 8.204284682660676e-05, "loss": 0.0652, "theoretical_loss": 3.3465275007884405, "tokens_seen": 3031957504 }, { "epoch": 0.84, "learning_rate": 8.20027280751023e-05, "loss": 0.0654, "theoretical_loss": 3.346516496168093, "tokens_seen": 3032088576 }, { "epoch": 0.84, "learning_rate": 8.196260932359786e-05, "loss": 0.063, "theoretical_loss": 3.3465054921566386, "tokens_seen": 3032219648 }, { "epoch": 0.84, "learning_rate": 8.19224905720934e-05, "loss": 0.0652, "theoretical_loss": 3.3464944887540176, "tokens_seen": 3032350720 }, { "epoch": 0.84, "learning_rate": 8.188237182058894e-05, "loss": 0.0663, "theoretical_loss": 3.346483485960169, "tokens_seen": 3032481792 }, { "epoch": 0.84, "learning_rate": 8.18422530690845e-05, "loss": 0.0613, "theoretical_loss": 3.346472483775034, "tokens_seen": 3032612864 }, { "epoch": 0.84, "objective/train/advantage_avg": 0.0005657462170347571, "objective/train/docs_used": 1102053, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3609541654586792, "objective/train/original_loss": 1.3609542846679688, "objective/train/theoretical_loss": 3.3464614821985523, "objective/train/tokens_used": 1403268576, "objective/train/value_avg": -0.00983428955078125, "objective/train/value_loss": 0.000690916960593313, "objective/train/value_max": -2.586841583251953e-05, "objective/train/value_min": -0.7744140625, "objective/train/value_reward_corr": 0.6535876378805732, "objective/train/value_std": 0.0241241455078125, "objective/train/weight_avg": 1.0008677244186401, "objective/train/weighted_lm_loss": 1.360958456993103, "objective/train/weights_max": 1.798527479171753, "objective/train/weights_min": 0.3779332637786865, "theoretical_loss": 3.3464614821985523, "tokens_seen": 3032743936 }, { "epoch": 0.84, "learning_rate": 8.180213431758004e-05, "loss": 0.0675, "theoretical_loss": 3.3464614821985523, "tokens_seen": 3032743936 }, { "epoch": 0.84, "learning_rate": 8.176201556607559e-05, "loss": 0.0632, "theoretical_loss": 3.3464504812306637, "tokens_seen": 3032875008 }, { "epoch": 0.84, "learning_rate": 8.172189681457113e-05, "loss": 0.0635, "theoretical_loss": 3.3464394808713083, "tokens_seen": 3033006080 }, { "epoch": 0.84, "learning_rate": 8.168177806306667e-05, "loss": 0.0677, "theoretical_loss": 3.3464284811204257, "tokens_seen": 3033137152 }, { "epoch": 0.84, "learning_rate": 8.164165931156223e-05, "loss": 0.0651, "theoretical_loss": 3.3464174819779564, "tokens_seen": 3033268224 }, { "epoch": 0.84, "learning_rate": 8.160154056005777e-05, "loss": 0.0626, "theoretical_loss": 3.346406483443841, "tokens_seen": 3033399296 }, { "epoch": 0.84, "learning_rate": 8.156142180855332e-05, "loss": 0.0611, "theoretical_loss": 3.3463954855180185, "tokens_seen": 3033530368 }, { "epoch": 0.84, "learning_rate": 8.152130305704886e-05, "loss": 0.066, "theoretical_loss": 3.34638448820043, "tokens_seen": 3033661440 }, { "epoch": 0.84, "learning_rate": 8.14811843055444e-05, "loss": 0.0616, "theoretical_loss": 3.346373491491015, "tokens_seen": 3033792512 }, { "epoch": 0.84, "learning_rate": 8.144106555403996e-05, "loss": 0.0638, "theoretical_loss": 3.346362495389713, "tokens_seen": 3033923584 }, { "epoch": 0.84, "learning_rate": 8.14009468025355e-05, "loss": 0.0596, "theoretical_loss": 3.346351499896465, "tokens_seen": 3034054656 }, { "epoch": 0.84, "learning_rate": 8.136082805103106e-05, "loss": 0.0652, "theoretical_loss": 3.346340505011211, "tokens_seen": 3034185728 }, { "epoch": 0.84, "learning_rate": 8.13207092995266e-05, "loss": 0.0626, "theoretical_loss": 3.3463295107338915, "tokens_seen": 3034316800 }, { "epoch": 0.84, "learning_rate": 8.128059054802214e-05, "loss": 0.0614, "theoretical_loss": 3.3463185170644456, "tokens_seen": 3034447872 }, { "epoch": 0.84, "learning_rate": 8.12404717965177e-05, "loss": 0.0633, "theoretical_loss": 3.346307524002814, "tokens_seen": 3034578944 }, { "epoch": 0.84, "learning_rate": 8.120035304501325e-05, "loss": 0.0627, "theoretical_loss": 3.3462965315489366, "tokens_seen": 3034710016 }, { "epoch": 0.84, "learning_rate": 8.116023429350879e-05, "loss": 0.0656, "theoretical_loss": 3.346285539702754, "tokens_seen": 3034841088 }, { "epoch": 0.84, "learning_rate": 8.112011554200433e-05, "loss": 0.066, "theoretical_loss": 3.3462745484642062, "tokens_seen": 3034972160 }, { "epoch": 0.84, "learning_rate": 8.107999679049987e-05, "loss": 0.0669, "theoretical_loss": 3.3462635578332334, "tokens_seen": 3035103232 }, { "epoch": 0.84, "learning_rate": 8.103987803899543e-05, "loss": 0.067, "theoretical_loss": 3.346252567809775, "tokens_seen": 3035234304 }, { "epoch": 0.84, "learning_rate": 8.099975928749098e-05, "loss": 0.0639, "theoretical_loss": 3.3462415783937725, "tokens_seen": 3035365376 }, { "epoch": 0.84, "learning_rate": 8.095964053598652e-05, "loss": 0.0657, "theoretical_loss": 3.346230589585165, "tokens_seen": 3035496448 }, { "epoch": 0.84, "learning_rate": 8.091952178448208e-05, "loss": 0.064, "theoretical_loss": 3.3462196013838934, "tokens_seen": 3035627520 }, { "epoch": 0.84, "learning_rate": 8.08794030329776e-05, "loss": 0.0646, "theoretical_loss": 3.346208613789898, "tokens_seen": 3035758592 }, { "epoch": 0.84, "learning_rate": 8.083928428147316e-05, "loss": 0.0646, "theoretical_loss": 3.3461976268031184, "tokens_seen": 3035889664 }, { "epoch": 0.84, "objective/train/advantage_avg": 0.00017903382831718773, "objective/train/docs_used": 1103322, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2529382705688477, "objective/train/original_loss": 1.2529382705688477, "objective/train/theoretical_loss": 3.346186640423495, "objective/train/tokens_used": 1406545376, "objective/train/value_avg": -0.0085601806640625, "objective/train/value_loss": 0.00018852585344575346, "objective/train/value_max": -2.4497509002685547e-05, "objective/train/value_min": -0.271484375, "objective/train/value_reward_corr": 0.7619738966458764, "objective/train/value_std": 0.0167388916015625, "objective/train/weight_avg": 1.0002644062042236, "objective/train/weighted_lm_loss": 1.2527917623519897, "objective/train/weights_max": 1.1628448963165283, "objective/train/weights_min": 0.36819693446159363, "theoretical_loss": 3.346186640423495, "tokens_seen": 3036020736 }, { "epoch": 0.84, "learning_rate": 8.079916552996872e-05, "loss": 0.0642, "theoretical_loss": 3.346186640423495, "tokens_seen": 3036020736 }, { "epoch": 0.84, "learning_rate": 8.075904677846426e-05, "loss": 0.0631, "theoretical_loss": 3.3461756546509682, "tokens_seen": 3036151808 }, { "epoch": 0.84, "learning_rate": 8.071892802695981e-05, "loss": 0.0655, "theoretical_loss": 3.346164669485478, "tokens_seen": 3036282880 }, { "epoch": 0.84, "learning_rate": 8.067880927545534e-05, "loss": 0.0669, "theoretical_loss": 3.346153684926965, "tokens_seen": 3036413952 }, { "epoch": 0.84, "learning_rate": 8.06386905239509e-05, "loss": 0.0627, "theoretical_loss": 3.3461427009753697, "tokens_seen": 3036545024 }, { "epoch": 0.84, "learning_rate": 8.059857177244645e-05, "loss": 0.0679, "theoretical_loss": 3.3461317176306316, "tokens_seen": 3036676096 }, { "epoch": 0.84, "learning_rate": 8.055845302094199e-05, "loss": 0.0646, "theoretical_loss": 3.346120734892691, "tokens_seen": 3036807168 }, { "epoch": 0.84, "learning_rate": 8.051833426943755e-05, "loss": 0.0643, "theoretical_loss": 3.3461097527614894, "tokens_seen": 3036938240 }, { "epoch": 0.84, "learning_rate": 8.047821551793307e-05, "loss": 0.0633, "theoretical_loss": 3.3460987712369654, "tokens_seen": 3037069312 }, { "epoch": 0.84, "learning_rate": 8.043809676642863e-05, "loss": 0.0653, "theoretical_loss": 3.3460877903190607, "tokens_seen": 3037200384 }, { "epoch": 0.84, "learning_rate": 8.039797801492418e-05, "loss": 0.0621, "theoretical_loss": 3.346076810007715, "tokens_seen": 3037331456 }, { "epoch": 0.84, "learning_rate": 8.035785926341972e-05, "loss": 0.0657, "theoretical_loss": 3.3460658303028685, "tokens_seen": 3037462528 }, { "epoch": 0.84, "learning_rate": 8.031774051191528e-05, "loss": 0.0614, "theoretical_loss": 3.346054851204462, "tokens_seen": 3037593600 }, { "epoch": 0.84, "learning_rate": 8.027762176041081e-05, "loss": 0.0675, "theoretical_loss": 3.3460438727124355, "tokens_seen": 3037724672 }, { "epoch": 0.84, "learning_rate": 8.023750300890636e-05, "loss": 0.0649, "theoretical_loss": 3.3460328948267293, "tokens_seen": 3037855744 }, { "epoch": 0.84, "learning_rate": 8.019738425740192e-05, "loss": 0.0661, "theoretical_loss": 3.346021917547284, "tokens_seen": 3037986816 }, { "epoch": 0.84, "learning_rate": 8.015726550589746e-05, "loss": 0.0636, "theoretical_loss": 3.3460109408740397, "tokens_seen": 3038117888 }, { "epoch": 0.84, "learning_rate": 8.011714675439301e-05, "loss": 0.0623, "theoretical_loss": 3.345999964806937, "tokens_seen": 3038248960 }, { "epoch": 0.84, "learning_rate": 8.007702800288854e-05, "loss": 0.0633, "theoretical_loss": 3.345988989345916, "tokens_seen": 3038380032 }, { "epoch": 0.84, "learning_rate": 8.00369092513841e-05, "loss": 0.067, "theoretical_loss": 3.3459780144909175, "tokens_seen": 3038511104 }, { "epoch": 0.84, "learning_rate": 7.999679049987965e-05, "loss": 0.0658, "theoretical_loss": 3.3459670402418813, "tokens_seen": 3038642176 }, { "epoch": 0.84, "learning_rate": 7.995667174837519e-05, "loss": 0.0595, "theoretical_loss": 3.3459560665987484, "tokens_seen": 3038773248 }, { "epoch": 0.84, "learning_rate": 7.991655299687075e-05, "loss": 0.0639, "theoretical_loss": 3.345945093561459, "tokens_seen": 3038904320 }, { "epoch": 0.84, "learning_rate": 7.987643424536627e-05, "loss": 0.061, "theoretical_loss": 3.3459341211299534, "tokens_seen": 3039035392 }, { "epoch": 0.84, "learning_rate": 7.983631549386183e-05, "loss": 0.0643, "theoretical_loss": 3.3459231493041726, "tokens_seen": 3039166464 }, { "epoch": 0.84, "objective/train/advantage_avg": 0.00028767719049938023, "objective/train/docs_used": 1104383, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3804874420166016, "objective/train/original_loss": 1.3804872035980225, "objective/train/theoretical_loss": 3.345912178084056, "objective/train/tokens_used": 1409822176, "objective/train/value_avg": -0.006359100341796875, "objective/train/value_loss": 0.0003065995406359434, "objective/train/value_max": -2.1755695343017578e-05, "objective/train/value_min": -0.63525390625, "objective/train/value_reward_corr": 0.5759249192264536, "objective/train/value_std": 0.01290130615234375, "objective/train/weight_avg": 1.0004074573516846, "objective/train/weighted_lm_loss": 1.3810067176818848, "objective/train/weights_max": 1.3047291040420532, "objective/train/weights_min": 0.23720934987068176, "theoretical_loss": 3.345912178084056, "tokens_seen": 3039297536 }, { "epoch": 0.84, "learning_rate": 7.979619674235738e-05, "loss": 0.067, "theoretical_loss": 3.345912178084056, "tokens_seen": 3039297536 }, { "epoch": 0.84, "learning_rate": 7.975607799085293e-05, "loss": 0.0657, "theoretical_loss": 3.345901207469545, "tokens_seen": 3039428608 }, { "epoch": 0.84, "learning_rate": 7.971595923934848e-05, "loss": 0.0617, "theoretical_loss": 3.3458902374605795, "tokens_seen": 3039559680 }, { "epoch": 0.84, "learning_rate": 7.967584048784402e-05, "loss": 0.0654, "theoretical_loss": 3.3458792680571, "tokens_seen": 3039690752 }, { "epoch": 0.84, "learning_rate": 7.963572173633956e-05, "loss": 0.0624, "theoretical_loss": 3.3458682992590476, "tokens_seen": 3039821824 }, { "epoch": 0.84, "learning_rate": 7.959560298483512e-05, "loss": 0.0642, "theoretical_loss": 3.345857331066362, "tokens_seen": 3039952896 }, { "epoch": 0.84, "learning_rate": 7.955548423333066e-05, "loss": 0.0645, "theoretical_loss": 3.345846363478984, "tokens_seen": 3040083968 }, { "epoch": 0.84, "learning_rate": 7.951536548182621e-05, "loss": 0.0626, "theoretical_loss": 3.345835396496854, "tokens_seen": 3040215040 }, { "epoch": 0.84, "learning_rate": 7.947524673032175e-05, "loss": 0.065, "theoretical_loss": 3.345824430119913, "tokens_seen": 3040346112 }, { "epoch": 0.84, "learning_rate": 7.94351279788173e-05, "loss": 0.0636, "theoretical_loss": 3.345813464348101, "tokens_seen": 3040477184 }, { "epoch": 0.84, "learning_rate": 7.939500922731285e-05, "loss": 0.0627, "theoretical_loss": 3.3458024991813584, "tokens_seen": 3040608256 }, { "epoch": 0.84, "learning_rate": 7.935489047580839e-05, "loss": 0.0652, "theoretical_loss": 3.345791534619626, "tokens_seen": 3040739328 }, { "epoch": 0.84, "learning_rate": 7.931477172430395e-05, "loss": 0.0599, "theoretical_loss": 3.3457805706628445, "tokens_seen": 3040870400 }, { "epoch": 0.84, "learning_rate": 7.927465297279949e-05, "loss": 0.063, "theoretical_loss": 3.345769607310954, "tokens_seen": 3041001472 }, { "epoch": 0.84, "learning_rate": 7.923453422129503e-05, "loss": 0.0607, "theoretical_loss": 3.345758644563896, "tokens_seen": 3041132544 }, { "epoch": 0.84, "learning_rate": 7.919441546979058e-05, "loss": 0.0653, "theoretical_loss": 3.34574768242161, "tokens_seen": 3041263616 }, { "epoch": 0.84, "learning_rate": 7.915429671828613e-05, "loss": 0.0661, "theoretical_loss": 3.345736720884037, "tokens_seen": 3041394688 }, { "epoch": 0.84, "learning_rate": 7.911417796678168e-05, "loss": 0.0644, "theoretical_loss": 3.3457257599511174, "tokens_seen": 3041525760 }, { "epoch": 0.84, "learning_rate": 7.907405921527722e-05, "loss": 0.0615, "theoretical_loss": 3.345714799622792, "tokens_seen": 3041656832 }, { "epoch": 0.84, "learning_rate": 7.903394046377276e-05, "loss": 0.0647, "theoretical_loss": 3.3457038398990013, "tokens_seen": 3041787904 }, { "epoch": 0.84, "learning_rate": 7.899382171226832e-05, "loss": 0.0604, "theoretical_loss": 3.345692880779686, "tokens_seen": 3041918976 }, { "epoch": 0.84, "learning_rate": 7.895370296076386e-05, "loss": 0.0615, "theoretical_loss": 3.345681922264787, "tokens_seen": 3042050048 }, { "epoch": 0.84, "learning_rate": 7.891358420925941e-05, "loss": 0.0674, "theoretical_loss": 3.345670964354244, "tokens_seen": 3042181120 }, { "epoch": 0.84, "learning_rate": 7.887346545775496e-05, "loss": 0.0667, "theoretical_loss": 3.3456600070479987, "tokens_seen": 3042312192 }, { "epoch": 0.84, "learning_rate": 7.88333467062505e-05, "loss": 0.0654, "theoretical_loss": 3.3456490503459912, "tokens_seen": 3042443264 }, { "epoch": 0.84, "objective/train/advantage_avg": 0.00018792021728586406, "objective/train/docs_used": 1105520, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2352629899978638, "objective/train/original_loss": 1.2352629899978638, "objective/train/theoretical_loss": 3.3456380942481623, "objective/train/tokens_used": 1413098976, "objective/train/value_avg": -0.00894927978515625, "objective/train/value_loss": 0.0002201708557549864, "objective/train/value_max": -2.5451183319091797e-05, "objective/train/value_min": -0.955078125, "objective/train/value_reward_corr": 0.7776802329081822, "objective/train/value_std": 0.01837158203125, "objective/train/weight_avg": 1.000286340713501, "objective/train/weighted_lm_loss": 1.235485315322876, "objective/train/weights_max": 1.19859778881073, "objective/train/weights_min": 0.37098827958106995, "theoretical_loss": 3.3456380942481623, "tokens_seen": 3042574336 }, { "epoch": 0.84, "learning_rate": 7.879322795474605e-05, "loss": 0.0608, "theoretical_loss": 3.3456380942481623, "tokens_seen": 3042574336 }, { "epoch": 0.84, "learning_rate": 7.875310920324159e-05, "loss": 0.0632, "theoretical_loss": 3.3456271387544523, "tokens_seen": 3042705408 }, { "epoch": 0.84, "learning_rate": 7.871299045173715e-05, "loss": 0.0644, "theoretical_loss": 3.345616183864802, "tokens_seen": 3042836480 }, { "epoch": 0.84, "learning_rate": 7.867287170023269e-05, "loss": 0.0608, "theoretical_loss": 3.345605229579153, "tokens_seen": 3042967552 }, { "epoch": 0.84, "learning_rate": 7.863275294872823e-05, "loss": 0.0666, "theoretical_loss": 3.3455942758974446, "tokens_seen": 3043098624 }, { "epoch": 0.84, "learning_rate": 7.859263419722379e-05, "loss": 0.062, "theoretical_loss": 3.345583322819618, "tokens_seen": 3043229696 }, { "epoch": 0.84, "learning_rate": 7.855251544571933e-05, "loss": 0.0646, "theoretical_loss": 3.3455723703456144, "tokens_seen": 3043360768 }, { "epoch": 0.84, "learning_rate": 7.851239669421488e-05, "loss": 0.0662, "theoretical_loss": 3.345561418475374, "tokens_seen": 3043491840 }, { "epoch": 0.84, "learning_rate": 7.847227794271042e-05, "loss": 0.067, "theoretical_loss": 3.345550467208837, "tokens_seen": 3043622912 }, { "epoch": 0.84, "learning_rate": 7.843215919120598e-05, "loss": 0.0646, "theoretical_loss": 3.3455395165459456, "tokens_seen": 3043753984 }, { "epoch": 0.84, "learning_rate": 7.839204043970152e-05, "loss": 0.066, "theoretical_loss": 3.3455285664866397, "tokens_seen": 3043885056 }, { "epoch": 0.84, "learning_rate": 7.835192168819706e-05, "loss": 0.0653, "theoretical_loss": 3.3455176170308594, "tokens_seen": 3044016128 }, { "epoch": 0.84, "learning_rate": 7.831180293669261e-05, "loss": 0.0654, "theoretical_loss": 3.3455066681785466, "tokens_seen": 3044147200 }, { "epoch": 0.85, "learning_rate": 7.827168418518816e-05, "loss": 0.064, "theoretical_loss": 3.3454957199296413, "tokens_seen": 3044278272 }, { "epoch": 0.85, "learning_rate": 7.823156543368371e-05, "loss": 0.0646, "theoretical_loss": 3.3454847722840846, "tokens_seen": 3044409344 }, { "epoch": 0.85, "learning_rate": 7.819144668217925e-05, "loss": 0.0653, "theoretical_loss": 3.3454738252418172, "tokens_seen": 3044540416 }, { "epoch": 0.85, "learning_rate": 7.81513279306748e-05, "loss": 0.0608, "theoretical_loss": 3.34546287880278, "tokens_seen": 3044671488 }, { "epoch": 0.85, "learning_rate": 7.811120917917035e-05, "loss": 0.0673, "theoretical_loss": 3.3454519329669132, "tokens_seen": 3044802560 }, { "epoch": 0.85, "learning_rate": 7.807109042766589e-05, "loss": 0.0643, "theoretical_loss": 3.3454409877341584, "tokens_seen": 3044933632 }, { "epoch": 0.85, "learning_rate": 7.803097167616144e-05, "loss": 0.0663, "theoretical_loss": 3.345430043104456, "tokens_seen": 3045064704 }, { "epoch": 0.85, "learning_rate": 7.799085292465699e-05, "loss": 0.0649, "theoretical_loss": 3.3454190990777466, "tokens_seen": 3045195776 }, { "epoch": 0.85, "learning_rate": 7.795073417315253e-05, "loss": 0.0622, "theoretical_loss": 3.345408155653972, "tokens_seen": 3045326848 }, { "epoch": 0.85, "learning_rate": 7.791061542164808e-05, "loss": 0.0654, "theoretical_loss": 3.345397212833072, "tokens_seen": 3045457920 }, { "epoch": 0.85, "learning_rate": 7.787049667014362e-05, "loss": 0.062, "theoretical_loss": 3.3453862706149877, "tokens_seen": 3045588992 }, { "epoch": 0.85, "learning_rate": 7.783037791863918e-05, "loss": 0.0616, "theoretical_loss": 3.34537532899966, "tokens_seen": 3045720064 }, { "epoch": 0.85, "objective/train/advantage_avg": 0.00031785431201569736, "objective/train/docs_used": 1106703, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2452338933944702, "objective/train/original_loss": 1.2452340126037598, "objective/train/theoretical_loss": 3.34536438798703, "objective/train/tokens_used": 1416375776, "objective/train/value_avg": -0.00634002685546875, "objective/train/value_loss": 0.0002800138317979872, "objective/train/value_max": -2.2292137145996094e-05, "objective/train/value_min": -0.369384765625, "objective/train/value_reward_corr": 0.621117614381432, "objective/train/value_std": 0.0133056640625, "objective/train/weight_avg": 1.0004363059997559, "objective/train/weighted_lm_loss": 1.245703935623169, "objective/train/weights_max": 1.210507869720459, "objective/train/weights_min": 0.3684363067150116, "theoretical_loss": 3.34536438798703, "tokens_seen": 3045851136 }, { "epoch": 0.85, "learning_rate": 7.779025916713472e-05, "loss": 0.0619, "theoretical_loss": 3.34536438798703, "tokens_seen": 3045851136 }, { "epoch": 0.85, "learning_rate": 7.775014041563026e-05, "loss": 0.0628, "theoretical_loss": 3.345353447577038, "tokens_seen": 3045982208 }, { "epoch": 0.85, "learning_rate": 7.771002166412582e-05, "loss": 0.0637, "theoretical_loss": 3.345342507769626, "tokens_seen": 3046113280 }, { "epoch": 0.85, "learning_rate": 7.766990291262136e-05, "loss": 0.0669, "theoretical_loss": 3.3453315685647333, "tokens_seen": 3046244352 }, { "epoch": 0.85, "learning_rate": 7.762978416111691e-05, "loss": 0.065, "theoretical_loss": 3.345320629962302, "tokens_seen": 3046375424 }, { "epoch": 0.85, "learning_rate": 7.758966540961245e-05, "loss": 0.0596, "theoretical_loss": 3.3453096919622727, "tokens_seen": 3046506496 }, { "epoch": 0.85, "learning_rate": 7.7549546658108e-05, "loss": 0.0668, "theoretical_loss": 3.345298754564586, "tokens_seen": 3046637568 }, { "epoch": 0.85, "learning_rate": 7.750942790660355e-05, "loss": 0.0622, "theoretical_loss": 3.3452878177691834, "tokens_seen": 3046768640 }, { "epoch": 0.85, "learning_rate": 7.746930915509909e-05, "loss": 0.063, "theoretical_loss": 3.3452768815760052, "tokens_seen": 3046899712 }, { "epoch": 0.85, "learning_rate": 7.742919040359464e-05, "loss": 0.0633, "theoretical_loss": 3.345265945984993, "tokens_seen": 3047030784 }, { "epoch": 0.85, "learning_rate": 7.738907165209019e-05, "loss": 0.0642, "theoretical_loss": 3.3452550109960866, "tokens_seen": 3047161856 }, { "epoch": 0.85, "learning_rate": 7.734895290058573e-05, "loss": 0.063, "theoretical_loss": 3.3452440766092284, "tokens_seen": 3047292928 }, { "epoch": 0.85, "learning_rate": 7.730883414908128e-05, "loss": 0.0612, "theoretical_loss": 3.3452331428243585, "tokens_seen": 3047424000 }, { "epoch": 0.85, "learning_rate": 7.726871539757684e-05, "loss": 0.0677, "theoretical_loss": 3.3452222096414177, "tokens_seen": 3047555072 }, { "epoch": 0.85, "learning_rate": 7.722859664607238e-05, "loss": 0.065, "theoretical_loss": 3.345211277060348, "tokens_seen": 3047686144 }, { "epoch": 0.85, "learning_rate": 7.718847789456793e-05, "loss": 0.061, "theoretical_loss": 3.345200345081089, "tokens_seen": 3047817216 }, { "epoch": 0.85, "learning_rate": 7.714835914306346e-05, "loss": 0.0611, "theoretical_loss": 3.345189413703583, "tokens_seen": 3047948288 }, { "epoch": 0.85, "learning_rate": 7.710824039155902e-05, "loss": 0.0634, "theoretical_loss": 3.3451784829277695, "tokens_seen": 3048079360 }, { "epoch": 0.85, "learning_rate": 7.706812164005457e-05, "loss": 0.0653, "theoretical_loss": 3.3451675527535913, "tokens_seen": 3048210432 }, { "epoch": 0.85, "learning_rate": 7.702800288855011e-05, "loss": 0.0654, "theoretical_loss": 3.345156623180988, "tokens_seen": 3048341504 }, { "epoch": 0.85, "learning_rate": 7.698788413704567e-05, "loss": 0.066, "theoretical_loss": 3.3451456942099016, "tokens_seen": 3048472576 }, { "epoch": 0.85, "learning_rate": 7.69477653855412e-05, "loss": 0.0633, "theoretical_loss": 3.3451347658402724, "tokens_seen": 3048603648 }, { "epoch": 0.85, "learning_rate": 7.690764663403675e-05, "loss": 0.0639, "theoretical_loss": 3.3451238380720416, "tokens_seen": 3048734720 }, { "epoch": 0.85, "learning_rate": 7.68675278825323e-05, "loss": 0.0654, "theoretical_loss": 3.3451129109051507, "tokens_seen": 3048865792 }, { "epoch": 0.85, "learning_rate": 7.682740913102785e-05, "loss": 0.0634, "theoretical_loss": 3.34510198433954, "tokens_seen": 3048996864 }, { "epoch": 0.85, "objective/train/advantage_avg": 0.0001547599385958165, "objective/train/docs_used": 1107994, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2190630435943604, "objective/train/original_loss": 1.2190629243850708, "objective/train/theoretical_loss": 3.345091058375151, "objective/train/tokens_used": 1419652576, "objective/train/value_avg": -0.00797271728515625, "objective/train/value_loss": 0.00037065884680487216, "objective/train/value_max": -3.24249267578125e-05, "objective/train/value_min": -0.467041015625, "objective/train/value_reward_corr": 0.577065896399404, "objective/train/value_std": 0.01387786865234375, "objective/train/weight_avg": 1.0003005266189575, "objective/train/weighted_lm_loss": 1.2191667556762695, "objective/train/weights_max": 1.3775582313537598, "objective/train/weights_min": 0.2237391620874405, "theoretical_loss": 3.345091058375151, "tokens_seen": 3049127936 }, { "epoch": 0.85, "learning_rate": 7.67872903795234e-05, "loss": 0.0623, "theoretical_loss": 3.345091058375151, "tokens_seen": 3049127936 }, { "epoch": 0.85, "learning_rate": 7.674717162801893e-05, "loss": 0.0655, "theoretical_loss": 3.3450801330119253, "tokens_seen": 3049259008 }, { "epoch": 0.85, "learning_rate": 7.670705287651448e-05, "loss": 0.0661, "theoretical_loss": 3.3450692082498032, "tokens_seen": 3049390080 }, { "epoch": 0.85, "learning_rate": 7.666693412501004e-05, "loss": 0.0621, "theoretical_loss": 3.3450582840887257, "tokens_seen": 3049521152 }, { "epoch": 0.85, "learning_rate": 7.662681537350558e-05, "loss": 0.0639, "theoretical_loss": 3.3450473605286346, "tokens_seen": 3049652224 }, { "epoch": 0.85, "learning_rate": 7.658669662200113e-05, "loss": 0.0644, "theoretical_loss": 3.3450364375694708, "tokens_seen": 3049783296 }, { "epoch": 0.85, "learning_rate": 7.654657787049666e-05, "loss": 0.0689, "theoretical_loss": 3.345025515211175, "tokens_seen": 3049914368 }, { "epoch": 0.85, "learning_rate": 7.650645911899222e-05, "loss": 0.0617, "theoretical_loss": 3.345014593453689, "tokens_seen": 3050045440 }, { "epoch": 0.85, "learning_rate": 7.646634036748777e-05, "loss": 0.0658, "theoretical_loss": 3.3450036722969534, "tokens_seen": 3050176512 }, { "epoch": 0.85, "learning_rate": 7.642622161598331e-05, "loss": 0.062, "theoretical_loss": 3.344992751740909, "tokens_seen": 3050307584 }, { "epoch": 0.85, "learning_rate": 7.638610286447887e-05, "loss": 0.0621, "theoretical_loss": 3.344981831785498, "tokens_seen": 3050438656 }, { "epoch": 0.85, "learning_rate": 7.63459841129744e-05, "loss": 0.0657, "theoretical_loss": 3.344970912430661, "tokens_seen": 3050569728 }, { "epoch": 0.85, "learning_rate": 7.630586536146995e-05, "loss": 0.0618, "theoretical_loss": 3.344959993676339, "tokens_seen": 3050700800 }, { "epoch": 0.85, "learning_rate": 7.62657466099655e-05, "loss": 0.0615, "theoretical_loss": 3.344949075522473, "tokens_seen": 3050831872 }, { "epoch": 0.85, "learning_rate": 7.622562785846105e-05, "loss": 0.0624, "theoretical_loss": 3.344938157969005, "tokens_seen": 3050962944 }, { "epoch": 0.85, "learning_rate": 7.61855091069566e-05, "loss": 0.0639, "theoretical_loss": 3.3449272410158755, "tokens_seen": 3051094016 }, { "epoch": 0.85, "learning_rate": 7.614539035545213e-05, "loss": 0.0612, "theoretical_loss": 3.344916324663026, "tokens_seen": 3051225088 }, { "epoch": 0.85, "learning_rate": 7.610527160394768e-05, "loss": 0.0627, "theoretical_loss": 3.3449054089103973, "tokens_seen": 3051356160 }, { "epoch": 0.85, "learning_rate": 7.606515285244324e-05, "loss": 0.0642, "theoretical_loss": 3.3448944937579315, "tokens_seen": 3051487232 }, { "epoch": 0.85, "learning_rate": 7.602503410093878e-05, "loss": 0.0618, "theoretical_loss": 3.3448835792055687, "tokens_seen": 3051618304 }, { "epoch": 0.85, "learning_rate": 7.598491534943433e-05, "loss": 0.0635, "theoretical_loss": 3.344872665253251, "tokens_seen": 3051749376 }, { "epoch": 0.85, "learning_rate": 7.594479659792988e-05, "loss": 0.0666, "theoretical_loss": 3.344861751900919, "tokens_seen": 3051880448 }, { "epoch": 0.85, "learning_rate": 7.590467784642542e-05, "loss": 0.0631, "theoretical_loss": 3.3448508391485148, "tokens_seen": 3052011520 }, { "epoch": 0.85, "learning_rate": 7.586455909492097e-05, "loss": 0.0649, "theoretical_loss": 3.344839926995979, "tokens_seen": 3052142592 }, { "epoch": 0.85, "learning_rate": 7.582444034341651e-05, "loss": 0.0656, "theoretical_loss": 3.344829015443253, "tokens_seen": 3052273664 }, { "epoch": 0.85, "objective/train/advantage_avg": 0.0007163534173741937, "objective/train/docs_used": 1109048, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3082166910171509, "objective/train/original_loss": 1.3082165718078613, "objective/train/theoretical_loss": 3.3448181044902783, "objective/train/tokens_used": 1422929376, "objective/train/value_avg": -0.00933074951171875, "objective/train/value_loss": 0.00027137910365127027, "objective/train/value_max": -2.568960189819336e-05, "objective/train/value_min": -0.64111328125, "objective/train/value_reward_corr": 0.7455151081078795, "objective/train/value_std": 0.0189361572265625, "objective/train/weight_avg": 1.0008348226547241, "objective/train/weighted_lm_loss": 1.3085218667984009, "objective/train/weights_max": 1.394136667251587, "objective/train/weights_min": 0.37944716215133667, "theoretical_loss": 3.3448181044902783, "tokens_seen": 3052404736 }, { "epoch": 0.85, "learning_rate": 7.578432159191207e-05, "loss": 0.0656, "theoretical_loss": 3.3448181044902783, "tokens_seen": 3052404736 }, { "epoch": 0.85, "learning_rate": 7.574420284040761e-05, "loss": 0.0619, "theoretical_loss": 3.3448071941369957, "tokens_seen": 3052535808 }, { "epoch": 0.85, "learning_rate": 7.570408408890315e-05, "loss": 0.0622, "theoretical_loss": 3.3447962843833468, "tokens_seen": 3052666880 }, { "epoch": 0.85, "learning_rate": 7.56639653373987e-05, "loss": 0.0655, "theoretical_loss": 3.3447853752292724, "tokens_seen": 3052797952 }, { "epoch": 0.85, "learning_rate": 7.562384658589425e-05, "loss": 0.0642, "theoretical_loss": 3.344774466674715, "tokens_seen": 3052929024 }, { "epoch": 0.85, "learning_rate": 7.55837278343898e-05, "loss": 0.066, "theoretical_loss": 3.344763558719615, "tokens_seen": 3053060096 }, { "epoch": 0.85, "learning_rate": 7.554360908288534e-05, "loss": 0.0616, "theoretical_loss": 3.3447526513639136, "tokens_seen": 3053191168 }, { "epoch": 0.85, "learning_rate": 7.550349033138088e-05, "loss": 0.0626, "theoretical_loss": 3.344741744607553, "tokens_seen": 3053322240 }, { "epoch": 0.85, "learning_rate": 7.546337157987644e-05, "loss": 0.0609, "theoretical_loss": 3.3447308384504737, "tokens_seen": 3053453312 }, { "epoch": 0.85, "learning_rate": 7.542325282837198e-05, "loss": 0.0627, "theoretical_loss": 3.344719932892617, "tokens_seen": 3053584384 }, { "epoch": 0.85, "learning_rate": 7.538313407686753e-05, "loss": 0.0607, "theoretical_loss": 3.3447090279339253, "tokens_seen": 3053715456 }, { "epoch": 0.85, "learning_rate": 7.534301532536308e-05, "loss": 0.0632, "theoretical_loss": 3.3446981235743385, "tokens_seen": 3053846528 }, { "epoch": 0.85, "learning_rate": 7.530289657385862e-05, "loss": 0.0602, "theoretical_loss": 3.344687219813799, "tokens_seen": 3053977600 }, { "epoch": 0.85, "learning_rate": 7.526277782235417e-05, "loss": 0.0651, "theoretical_loss": 3.344676316652248, "tokens_seen": 3054108672 }, { "epoch": 0.85, "learning_rate": 7.522265907084971e-05, "loss": 0.0622, "theoretical_loss": 3.344665414089627, "tokens_seen": 3054239744 }, { "epoch": 0.85, "learning_rate": 7.518254031934527e-05, "loss": 0.0662, "theoretical_loss": 3.344654512125877, "tokens_seen": 3054370816 }, { "epoch": 0.85, "learning_rate": 7.514242156784081e-05, "loss": 0.068, "theoretical_loss": 3.3446436107609396, "tokens_seen": 3054501888 }, { "epoch": 0.85, "learning_rate": 7.510230281633635e-05, "loss": 0.0615, "theoretical_loss": 3.344632709994756, "tokens_seen": 3054632960 }, { "epoch": 0.85, "learning_rate": 7.50621840648319e-05, "loss": 0.0621, "theoretical_loss": 3.3446218098272684, "tokens_seen": 3054764032 }, { "epoch": 0.85, "learning_rate": 7.502206531332745e-05, "loss": 0.0631, "theoretical_loss": 3.3446109102584174, "tokens_seen": 3054895104 }, { "epoch": 0.85, "learning_rate": 7.4981946561823e-05, "loss": 0.062, "theoretical_loss": 3.344600011288145, "tokens_seen": 3055026176 }, { "epoch": 0.85, "learning_rate": 7.494182781031854e-05, "loss": 0.0593, "theoretical_loss": 3.344589112916392, "tokens_seen": 3055157248 }, { "epoch": 0.85, "learning_rate": 7.490170905881408e-05, "loss": 0.0649, "theoretical_loss": 3.3445782151431005, "tokens_seen": 3055288320 }, { "epoch": 0.85, "learning_rate": 7.486159030730964e-05, "loss": 0.0664, "theoretical_loss": 3.3445673179682114, "tokens_seen": 3055419392 }, { "epoch": 0.85, "learning_rate": 7.482147155580518e-05, "loss": 0.0659, "theoretical_loss": 3.3445564213916668, "tokens_seen": 3055550464 }, { "epoch": 0.85, "objective/train/advantage_avg": 6.269667096603371e-07, "objective/train/docs_used": 1110166, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2464030981063843, "objective/train/original_loss": 1.2464027404785156, "objective/train/theoretical_loss": 3.3445455254134075, "objective/train/tokens_used": 1426206176, "objective/train/value_avg": -0.00566864013671875, "objective/train/value_loss": 9.210449206875637e-05, "objective/train/value_max": -2.372264862060547e-05, "objective/train/value_min": -0.33447265625, "objective/train/value_reward_corr": 0.6960168379590043, "objective/train/value_std": 0.0095367431640625, "objective/train/weight_avg": 1.00004243850708, "objective/train/weighted_lm_loss": 1.245870590209961, "objective/train/weights_max": 1.1860806941986084, "objective/train/weights_min": 0.372718870639801, "theoretical_loss": 3.3445455254134075, "tokens_seen": 3055681536 }, { "epoch": 0.85, "learning_rate": 7.478135280430074e-05, "loss": 0.0654, "theoretical_loss": 3.3445455254134075, "tokens_seen": 3055681536 }, { "epoch": 0.85, "learning_rate": 7.474123405279628e-05, "loss": 0.0619, "theoretical_loss": 3.3445346300333756, "tokens_seen": 3055812608 }, { "epoch": 0.85, "learning_rate": 7.470111530129183e-05, "loss": 0.0637, "theoretical_loss": 3.3445237352515123, "tokens_seen": 3055943680 }, { "epoch": 0.85, "learning_rate": 7.466099654978737e-05, "loss": 0.062, "theoretical_loss": 3.344512841067759, "tokens_seen": 3056074752 }, { "epoch": 0.85, "learning_rate": 7.462087779828291e-05, "loss": 0.0655, "theoretical_loss": 3.3445019474820574, "tokens_seen": 3056205824 }, { "epoch": 0.85, "learning_rate": 7.458075904677847e-05, "loss": 0.0665, "theoretical_loss": 3.344491054494349, "tokens_seen": 3056336896 }, { "epoch": 0.85, "learning_rate": 7.454064029527401e-05, "loss": 0.0629, "theoretical_loss": 3.3444801621045754, "tokens_seen": 3056467968 }, { "epoch": 0.85, "learning_rate": 7.450052154376957e-05, "loss": 0.0634, "theoretical_loss": 3.3444692703126777, "tokens_seen": 3056599040 }, { "epoch": 0.85, "learning_rate": 7.44604027922651e-05, "loss": 0.0641, "theoretical_loss": 3.3444583791185982, "tokens_seen": 3056730112 }, { "epoch": 0.85, "learning_rate": 7.442028404076065e-05, "loss": 0.0641, "theoretical_loss": 3.3444474885222784, "tokens_seen": 3056861184 }, { "epoch": 0.85, "learning_rate": 7.43801652892562e-05, "loss": 0.0608, "theoretical_loss": 3.3444365985236586, "tokens_seen": 3056992256 }, { "epoch": 0.85, "learning_rate": 7.434004653775174e-05, "loss": 0.0648, "theoretical_loss": 3.3444257091226817, "tokens_seen": 3057123328 }, { "epoch": 0.85, "learning_rate": 7.42999277862473e-05, "loss": 0.0621, "theoretical_loss": 3.344414820319289, "tokens_seen": 3057254400 }, { "epoch": 0.85, "learning_rate": 7.425980903474284e-05, "loss": 0.0648, "theoretical_loss": 3.344403932113422, "tokens_seen": 3057385472 }, { "epoch": 0.85, "learning_rate": 7.421969028323838e-05, "loss": 0.0646, "theoretical_loss": 3.344393044505022, "tokens_seen": 3057516544 }, { "epoch": 0.85, "learning_rate": 7.417957153173394e-05, "loss": 0.0616, "theoretical_loss": 3.344382157494031, "tokens_seen": 3057647616 }, { "epoch": 0.85, "learning_rate": 7.413945278022948e-05, "loss": 0.0627, "theoretical_loss": 3.3443712710803903, "tokens_seen": 3057778688 }, { "epoch": 0.85, "learning_rate": 7.409933402872503e-05, "loss": 0.0658, "theoretical_loss": 3.344360385264042, "tokens_seen": 3057909760 }, { "epoch": 0.85, "learning_rate": 7.405921527722057e-05, "loss": 0.0645, "theoretical_loss": 3.3443495000449275, "tokens_seen": 3058040832 }, { "epoch": 0.85, "learning_rate": 7.401909652571611e-05, "loss": 0.065, "theoretical_loss": 3.3443386154229877, "tokens_seen": 3058171904 }, { "epoch": 0.85, "learning_rate": 7.397897777421167e-05, "loss": 0.0628, "theoretical_loss": 3.3443277313981654, "tokens_seen": 3058302976 }, { "epoch": 0.85, "learning_rate": 7.393885902270721e-05, "loss": 0.0645, "theoretical_loss": 3.344316847970402, "tokens_seen": 3058434048 }, { "epoch": 0.85, "learning_rate": 7.389874027120277e-05, "loss": 0.0617, "theoretical_loss": 3.3443059651396383, "tokens_seen": 3058565120 }, { "epoch": 0.85, "learning_rate": 7.385862151969831e-05, "loss": 0.0638, "theoretical_loss": 3.344295082905817, "tokens_seen": 3058696192 }, { "epoch": 0.85, "learning_rate": 7.381850276819385e-05, "loss": 0.0624, "theoretical_loss": 3.344284201268879, "tokens_seen": 3058827264 }, { "epoch": 0.85, "objective/train/advantage_avg": 0.00041742753819562495, "objective/train/docs_used": 1111330, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3257805109024048, "objective/train/original_loss": 1.3257803916931152, "objective/train/theoretical_loss": 3.344273320228767, "objective/train/tokens_used": 1429482976, "objective/train/value_avg": -0.006229400634765625, "objective/train/value_loss": 0.00016397896979469806, "objective/train/value_max": -4.7206878662109375e-05, "objective/train/value_min": -0.634765625, "objective/train/value_reward_corr": 0.6773975726873489, "objective/train/value_std": 0.01287841796875, "objective/train/weight_avg": 1.0004942417144775, "objective/train/weighted_lm_loss": 1.3259880542755127, "objective/train/weights_max": 1.4826022386550903, "objective/train/weights_min": 0.3705597221851349, "theoretical_loss": 3.344273320228767, "tokens_seen": 3058958336 }, { "epoch": 0.85, "learning_rate": 7.37783840166894e-05, "loss": 0.0617, "theoretical_loss": 3.344273320228767, "tokens_seen": 3058958336 }, { "epoch": 0.85, "learning_rate": 7.373826526518494e-05, "loss": 0.0638, "theoretical_loss": 3.3442624397854215, "tokens_seen": 3059089408 }, { "epoch": 0.85, "learning_rate": 7.36981465136805e-05, "loss": 0.0633, "theoretical_loss": 3.344251559938785, "tokens_seen": 3059220480 }, { "epoch": 0.85, "learning_rate": 7.365802776217604e-05, "loss": 0.0636, "theoretical_loss": 3.3442406806887988, "tokens_seen": 3059351552 }, { "epoch": 0.85, "learning_rate": 7.361790901067158e-05, "loss": 0.0632, "theoretical_loss": 3.3442298020354047, "tokens_seen": 3059482624 }, { "epoch": 0.85, "learning_rate": 7.357779025916714e-05, "loss": 0.064, "theoretical_loss": 3.344218923978545, "tokens_seen": 3059613696 }, { "epoch": 0.85, "learning_rate": 7.353767150766268e-05, "loss": 0.0643, "theoretical_loss": 3.3442080465181605, "tokens_seen": 3059744768 }, { "epoch": 0.85, "learning_rate": 7.349755275615823e-05, "loss": 0.0612, "theoretical_loss": 3.3441971696541932, "tokens_seen": 3059875840 }, { "epoch": 0.85, "learning_rate": 7.345743400465379e-05, "loss": 0.0626, "theoretical_loss": 3.3441862933865854, "tokens_seen": 3060006912 }, { "epoch": 0.85, "learning_rate": 7.341731525314932e-05, "loss": 0.0638, "theoretical_loss": 3.3441754177152787, "tokens_seen": 3060137984 }, { "epoch": 0.85, "learning_rate": 7.337719650164487e-05, "loss": 0.0661, "theoretical_loss": 3.3441645426402142, "tokens_seen": 3060269056 }, { "epoch": 0.85, "learning_rate": 7.333707775014042e-05, "loss": 0.0632, "theoretical_loss": 3.3441536681613346, "tokens_seen": 3060400128 }, { "epoch": 0.85, "learning_rate": 7.329695899863597e-05, "loss": 0.0639, "theoretical_loss": 3.344142794278581, "tokens_seen": 3060531200 }, { "epoch": 0.85, "learning_rate": 7.325684024713152e-05, "loss": 0.0627, "theoretical_loss": 3.344131920991895, "tokens_seen": 3060662272 }, { "epoch": 0.86, "learning_rate": 7.321672149562705e-05, "loss": 0.0632, "theoretical_loss": 3.34412104830122, "tokens_seen": 3060793344 }, { "epoch": 0.86, "learning_rate": 7.31766027441226e-05, "loss": 0.0616, "theoretical_loss": 3.3441101762064958, "tokens_seen": 3060924416 }, { "epoch": 0.86, "learning_rate": 7.313648399261816e-05, "loss": 0.0664, "theoretical_loss": 3.344099304707665, "tokens_seen": 3061055488 }, { "epoch": 0.86, "learning_rate": 7.30963652411137e-05, "loss": 0.0628, "theoretical_loss": 3.3440884338046697, "tokens_seen": 3061186560 }, { "epoch": 0.86, "learning_rate": 7.305624648960925e-05, "loss": 0.0641, "theoretical_loss": 3.3440775634974513, "tokens_seen": 3061317632 }, { "epoch": 0.86, "learning_rate": 7.301612773810478e-05, "loss": 0.063, "theoretical_loss": 3.344066693785952, "tokens_seen": 3061448704 }, { "epoch": 0.86, "learning_rate": 7.297600898660034e-05, "loss": 0.0598, "theoretical_loss": 3.3440558246701135, "tokens_seen": 3061579776 }, { "epoch": 0.86, "learning_rate": 7.293589023509589e-05, "loss": 0.0627, "theoretical_loss": 3.3440449561498777, "tokens_seen": 3061710848 }, { "epoch": 0.86, "learning_rate": 7.289577148359143e-05, "loss": 0.0685, "theoretical_loss": 3.3440340882251864, "tokens_seen": 3061841920 }, { "epoch": 0.86, "learning_rate": 7.285565273208699e-05, "loss": 0.0628, "theoretical_loss": 3.3440232208959815, "tokens_seen": 3061972992 }, { "epoch": 0.86, "learning_rate": 7.281553398058252e-05, "loss": 0.0659, "theoretical_loss": 3.344012354162205, "tokens_seen": 3062104064 }, { "epoch": 0.86, "objective/train/advantage_avg": -9.976100409403443e-05, "objective/train/docs_used": 1112415, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4123433828353882, "objective/train/original_loss": 1.4123433828353882, "objective/train/theoretical_loss": 3.3440014880237987, "objective/train/tokens_used": 1432759776, "objective/train/value_avg": -0.0097198486328125, "objective/train/value_loss": 0.00022726522001903504, "objective/train/value_max": -1.4483928680419922e-05, "objective/train/value_min": -0.28759765625, "objective/train/value_reward_corr": 0.7169332884706514, "objective/train/value_std": 0.016937255859375, "objective/train/weight_avg": 1.0000026226043701, "objective/train/weighted_lm_loss": 1.4114145040512085, "objective/train/weights_max": 1.1310542821884155, "objective/train/weights_min": 0.3713989555835724, "theoretical_loss": 3.3440014880237987, "tokens_seen": 3062235136 }, { "epoch": 0.86, "learning_rate": 7.277541522907807e-05, "loss": 0.0655, "theoretical_loss": 3.3440014880237987, "tokens_seen": 3062235136 }, { "epoch": 0.86, "learning_rate": 7.273529647757363e-05, "loss": 0.0643, "theoretical_loss": 3.3439906224807046, "tokens_seen": 3062366208 }, { "epoch": 0.86, "learning_rate": 7.269517772606917e-05, "loss": 0.0624, "theoretical_loss": 3.343979757532864, "tokens_seen": 3062497280 }, { "epoch": 0.86, "learning_rate": 7.265505897456472e-05, "loss": 0.0665, "theoretical_loss": 3.34396889318022, "tokens_seen": 3062628352 }, { "epoch": 0.86, "learning_rate": 7.261494022306025e-05, "loss": 0.0632, "theoretical_loss": 3.343958029422713, "tokens_seen": 3062759424 }, { "epoch": 0.86, "learning_rate": 7.25748214715558e-05, "loss": 0.0632, "theoretical_loss": 3.3439471662602864, "tokens_seen": 3062890496 }, { "epoch": 0.86, "learning_rate": 7.253470272005136e-05, "loss": 0.0627, "theoretical_loss": 3.3439363036928813, "tokens_seen": 3063021568 }, { "epoch": 0.86, "learning_rate": 7.24945839685469e-05, "loss": 0.0661, "theoretical_loss": 3.34392544172044, "tokens_seen": 3063152640 }, { "epoch": 0.86, "learning_rate": 7.245446521704246e-05, "loss": 0.0656, "theoretical_loss": 3.343914580342904, "tokens_seen": 3063283712 }, { "epoch": 0.86, "learning_rate": 7.241434646553798e-05, "loss": 0.067, "theoretical_loss": 3.3439037195602155, "tokens_seen": 3063414784 }, { "epoch": 0.86, "learning_rate": 7.237422771403354e-05, "loss": 0.0616, "theoretical_loss": 3.3438928593723167, "tokens_seen": 3063545856 }, { "epoch": 0.86, "learning_rate": 7.233410896252909e-05, "loss": 0.0618, "theoretical_loss": 3.3438819997791493, "tokens_seen": 3063676928 }, { "epoch": 0.86, "learning_rate": 7.229399021102463e-05, "loss": 0.0639, "theoretical_loss": 3.3438711407806556, "tokens_seen": 3063808000 }, { "epoch": 0.86, "learning_rate": 7.225387145952019e-05, "loss": 0.0662, "theoretical_loss": 3.343860282376778, "tokens_seen": 3063939072 }, { "epoch": 0.86, "learning_rate": 7.221375270801572e-05, "loss": 0.0641, "theoretical_loss": 3.343849424567457, "tokens_seen": 3064070144 }, { "epoch": 0.86, "learning_rate": 7.217363395651127e-05, "loss": 0.0615, "theoretical_loss": 3.343838567352636, "tokens_seen": 3064201216 }, { "epoch": 0.86, "learning_rate": 7.213351520500683e-05, "loss": 0.0596, "theoretical_loss": 3.3438277107322563, "tokens_seen": 3064332288 }, { "epoch": 0.86, "learning_rate": 7.209339645350237e-05, "loss": 0.0627, "theoretical_loss": 3.34381685470626, "tokens_seen": 3064463360 }, { "epoch": 0.86, "learning_rate": 7.205327770199792e-05, "loss": 0.0611, "theoretical_loss": 3.34380599927459, "tokens_seen": 3064594432 }, { "epoch": 0.86, "learning_rate": 7.201315895049346e-05, "loss": 0.0596, "theoretical_loss": 3.3437951444371867, "tokens_seen": 3064725504 }, { "epoch": 0.86, "learning_rate": 7.1973040198989e-05, "loss": 0.0635, "theoretical_loss": 3.3437842901939936, "tokens_seen": 3064856576 }, { "epoch": 0.86, "learning_rate": 7.193292144748456e-05, "loss": 0.0654, "theoretical_loss": 3.3437734365449527, "tokens_seen": 3064987648 }, { "epoch": 0.86, "learning_rate": 7.18928026959801e-05, "loss": 0.0618, "theoretical_loss": 3.343762583490005, "tokens_seen": 3065118720 }, { "epoch": 0.86, "learning_rate": 7.185268394447566e-05, "loss": 0.063, "theoretical_loss": 3.3437517310290934, "tokens_seen": 3065249792 }, { "epoch": 0.86, "learning_rate": 7.18125651929712e-05, "loss": 0.0642, "theoretical_loss": 3.3437408791621603, "tokens_seen": 3065380864 }, { "epoch": 0.86, "objective/train/advantage_avg": -0.00032096687937155366, "objective/train/docs_used": 1113675, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3556197881698608, "objective/train/original_loss": 1.3556196689605713, "objective/train/theoretical_loss": 3.343730027889147, "objective/train/tokens_used": 1436036576, "objective/train/value_avg": -0.01302337646484375, "objective/train/value_loss": 0.0006610297132283449, "objective/train/value_max": -2.9325485229492188e-05, "objective/train/value_min": -0.65966796875, "objective/train/value_reward_corr": 0.744386082006465, "objective/train/value_std": 0.0274658203125, "objective/train/weight_avg": 0.9999649524688721, "objective/train/weighted_lm_loss": 1.3545544147491455, "objective/train/weights_max": 1.485138177871704, "objective/train/weights_min": 0.3804965913295746, "theoretical_loss": 3.343730027889147, "tokens_seen": 3065511936 }, { "epoch": 0.86, "learning_rate": 7.177244644146674e-05, "loss": 0.0689, "theoretical_loss": 3.343730027889147, "tokens_seen": 3065511936 }, { "epoch": 0.86, "learning_rate": 7.173232768996229e-05, "loss": 0.0647, "theoretical_loss": 3.3437191772099957, "tokens_seen": 3065643008 }, { "epoch": 0.86, "learning_rate": 7.169220893845783e-05, "loss": 0.0626, "theoretical_loss": 3.3437083271246486, "tokens_seen": 3065774080 }, { "epoch": 0.86, "learning_rate": 7.165209018695339e-05, "loss": 0.0603, "theoretical_loss": 3.3436974776330484, "tokens_seen": 3065905152 }, { "epoch": 0.86, "learning_rate": 7.161197143544893e-05, "loss": 0.066, "theoretical_loss": 3.343686628735137, "tokens_seen": 3066036224 }, { "epoch": 0.86, "learning_rate": 7.157185268394447e-05, "loss": 0.0651, "theoretical_loss": 3.3436757804308552, "tokens_seen": 3066167296 }, { "epoch": 0.86, "learning_rate": 7.153173393244003e-05, "loss": 0.0633, "theoretical_loss": 3.3436649327201473, "tokens_seen": 3066298368 }, { "epoch": 0.86, "learning_rate": 7.149161518093557e-05, "loss": 0.0662, "theoretical_loss": 3.343654085602954, "tokens_seen": 3066429440 }, { "epoch": 0.86, "learning_rate": 7.145149642943112e-05, "loss": 0.0628, "theoretical_loss": 3.343643239079218, "tokens_seen": 3066560512 }, { "epoch": 0.86, "learning_rate": 7.141137767792666e-05, "loss": 0.0633, "theoretical_loss": 3.343632393148881, "tokens_seen": 3066691584 }, { "epoch": 0.86, "learning_rate": 7.13712589264222e-05, "loss": 0.062, "theoretical_loss": 3.343621547811886, "tokens_seen": 3066822656 }, { "epoch": 0.86, "learning_rate": 7.133114017491776e-05, "loss": 0.063, "theoretical_loss": 3.3436107030681743, "tokens_seen": 3066953728 }, { "epoch": 0.86, "learning_rate": 7.12910214234133e-05, "loss": 0.0629, "theoretical_loss": 3.3435998589176883, "tokens_seen": 3067084800 }, { "epoch": 0.86, "learning_rate": 7.125090267190886e-05, "loss": 0.0642, "theoretical_loss": 3.3435890153603705, "tokens_seen": 3067215872 }, { "epoch": 0.86, "learning_rate": 7.12107839204044e-05, "loss": 0.0627, "theoretical_loss": 3.343578172396163, "tokens_seen": 3067346944 }, { "epoch": 0.86, "learning_rate": 7.117066516889994e-05, "loss": 0.064, "theoretical_loss": 3.3435673300250084, "tokens_seen": 3067478016 }, { "epoch": 0.86, "learning_rate": 7.11305464173955e-05, "loss": 0.0657, "theoretical_loss": 3.3435564882468483, "tokens_seen": 3067609088 }, { "epoch": 0.86, "learning_rate": 7.109042766589103e-05, "loss": 0.0606, "theoretical_loss": 3.343545647061625, "tokens_seen": 3067740160 }, { "epoch": 0.86, "learning_rate": 7.105030891438659e-05, "loss": 0.0597, "theoretical_loss": 3.343534806469281, "tokens_seen": 3067871232 }, { "epoch": 0.86, "learning_rate": 7.101019016288213e-05, "loss": 0.0627, "theoretical_loss": 3.3435239664697587, "tokens_seen": 3068002304 }, { "epoch": 0.86, "learning_rate": 7.097007141137769e-05, "loss": 0.0647, "theoretical_loss": 3.343513127063, "tokens_seen": 3068133376 }, { "epoch": 0.86, "learning_rate": 7.092995265987323e-05, "loss": 0.065, "theoretical_loss": 3.343502288248947, "tokens_seen": 3068264448 }, { "epoch": 0.86, "learning_rate": 7.088983390836877e-05, "loss": 0.0642, "theoretical_loss": 3.343491450027542, "tokens_seen": 3068395520 }, { "epoch": 0.86, "learning_rate": 7.084971515686432e-05, "loss": 0.063, "theoretical_loss": 3.3434806123987277, "tokens_seen": 3068526592 }, { "epoch": 0.86, "learning_rate": 7.080959640535986e-05, "loss": 0.0628, "theoretical_loss": 3.343469775362447, "tokens_seen": 3068657664 }, { "epoch": 0.86, "objective/train/advantage_avg": 0.00046714095515199006, "objective/train/docs_used": 1114913, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.1925150156021118, "objective/train/original_loss": 1.1925151348114014, "objective/train/theoretical_loss": 3.3434589389186407, "objective/train/tokens_used": 1439313376, "objective/train/value_avg": -0.0074615478515625, "objective/train/value_loss": 0.00013005717482883483, "objective/train/value_max": -2.0623207092285156e-05, "objective/train/value_min": -0.3369140625, "objective/train/value_reward_corr": 0.7460020932373714, "objective/train/value_std": 0.01322174072265625, "objective/train/weight_avg": 1.0005296468734741, "objective/train/weighted_lm_loss": 1.192487120628357, "objective/train/weights_max": 1.1714041233062744, "objective/train/weights_min": 0.6158847212791443, "theoretical_loss": 3.3434589389186407, "tokens_seen": 3068788736 }, { "epoch": 0.86, "learning_rate": 7.076947765385542e-05, "loss": 0.0632, "theoretical_loss": 3.3434589389186407, "tokens_seen": 3068788736 }, { "epoch": 0.86, "learning_rate": 7.072935890235096e-05, "loss": 0.0604, "theoretical_loss": 3.3434481030672516, "tokens_seen": 3068919808 }, { "epoch": 0.86, "learning_rate": 7.06892401508465e-05, "loss": 0.0679, "theoretical_loss": 3.343437267808223, "tokens_seen": 3069050880 }, { "epoch": 0.86, "learning_rate": 7.064912139934206e-05, "loss": 0.0633, "theoretical_loss": 3.343426433141496, "tokens_seen": 3069181952 }, { "epoch": 0.86, "learning_rate": 7.06090026478376e-05, "loss": 0.0617, "theoretical_loss": 3.343415599067013, "tokens_seen": 3069313024 }, { "epoch": 0.86, "learning_rate": 7.056888389633315e-05, "loss": 0.0635, "theoretical_loss": 3.343404765584717, "tokens_seen": 3069444096 }, { "epoch": 0.86, "learning_rate": 7.05287651448287e-05, "loss": 0.0631, "theoretical_loss": 3.3433939326945503, "tokens_seen": 3069575168 }, { "epoch": 0.86, "learning_rate": 7.048864639332424e-05, "loss": 0.0679, "theoretical_loss": 3.343383100396455, "tokens_seen": 3069706240 }, { "epoch": 0.86, "learning_rate": 7.044852764181979e-05, "loss": 0.0659, "theoretical_loss": 3.3433722686903735, "tokens_seen": 3069837312 }, { "epoch": 0.86, "learning_rate": 7.040840889031533e-05, "loss": 0.0669, "theoretical_loss": 3.343361437576248, "tokens_seen": 3069968384 }, { "epoch": 0.86, "learning_rate": 7.036829013881089e-05, "loss": 0.0642, "theoretical_loss": 3.343350607054021, "tokens_seen": 3070099456 }, { "epoch": 0.86, "learning_rate": 7.032817138730643e-05, "loss": 0.0625, "theoretical_loss": 3.343339777123635, "tokens_seen": 3070230528 }, { "epoch": 0.86, "learning_rate": 7.028805263580197e-05, "loss": 0.0625, "theoretical_loss": 3.3433289477850323, "tokens_seen": 3070361600 }, { "epoch": 0.86, "learning_rate": 7.024793388429752e-05, "loss": 0.0636, "theoretical_loss": 3.343318119038155, "tokens_seen": 3070492672 }, { "epoch": 0.86, "learning_rate": 7.020781513279307e-05, "loss": 0.066, "theoretical_loss": 3.343307290882946, "tokens_seen": 3070623744 }, { "epoch": 0.86, "learning_rate": 7.016769638128862e-05, "loss": 0.0641, "theoretical_loss": 3.343296463319348, "tokens_seen": 3070754816 }, { "epoch": 0.86, "learning_rate": 7.012757762978416e-05, "loss": 0.0622, "theoretical_loss": 3.3432856363473022, "tokens_seen": 3070885888 }, { "epoch": 0.86, "learning_rate": 7.00874588782797e-05, "loss": 0.0665, "theoretical_loss": 3.3432748099667524, "tokens_seen": 3071016960 }, { "epoch": 0.86, "learning_rate": 7.004734012677526e-05, "loss": 0.062, "theoretical_loss": 3.34326398417764, "tokens_seen": 3071148032 }, { "epoch": 0.86, "learning_rate": 7.00072213752708e-05, "loss": 0.0637, "theoretical_loss": 3.343253158979908, "tokens_seen": 3071279104 }, { "epoch": 0.86, "learning_rate": 6.996710262376635e-05, "loss": 0.0615, "theoretical_loss": 3.343242334373499, "tokens_seen": 3071410176 }, { "epoch": 0.86, "learning_rate": 6.99269838722619e-05, "loss": 0.0626, "theoretical_loss": 3.343231510358355, "tokens_seen": 3071541248 }, { "epoch": 0.86, "learning_rate": 6.988686512075744e-05, "loss": 0.0642, "theoretical_loss": 3.3432206869344188, "tokens_seen": 3071672320 }, { "epoch": 0.86, "learning_rate": 6.984674636925299e-05, "loss": 0.0633, "theoretical_loss": 3.343209864101633, "tokens_seen": 3071803392 }, { "epoch": 0.86, "learning_rate": 6.980662761774853e-05, "loss": 0.0639, "theoretical_loss": 3.3431990418599393, "tokens_seen": 3071934464 }, { "epoch": 0.86, "objective/train/advantage_avg": 0.00021118615404702723, "objective/train/docs_used": 1116127, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.406591534614563, "objective/train/original_loss": 1.406591534614563, "objective/train/theoretical_loss": 3.343188220209281, "objective/train/tokens_used": 1442590176, "objective/train/value_avg": -0.0076446533203125, "objective/train/value_loss": 0.00036279429332353175, "objective/train/value_max": -2.6464462280273438e-05, "objective/train/value_min": -0.77197265625, "objective/train/value_reward_corr": 0.6470761847832202, "objective/train/value_std": 0.0159454345703125, "objective/train/weight_avg": 1.0003688335418701, "objective/train/weighted_lm_loss": 1.4065793752670288, "objective/train/weights_max": 1.4855008125305176, "objective/train/weights_min": 0.3739636242389679, "theoretical_loss": 3.343188220209281, "tokens_seen": 3072065536 }, { "epoch": 0.86, "learning_rate": 6.976650886624409e-05, "loss": 0.0629, "theoretical_loss": 3.343188220209281, "tokens_seen": 3072065536 }, { "epoch": 0.86, "learning_rate": 6.972639011473964e-05, "loss": 0.0668, "theoretical_loss": 3.3431773991496003, "tokens_seen": 3072196608 }, { "epoch": 0.86, "learning_rate": 6.968627136323517e-05, "loss": 0.0646, "theoretical_loss": 3.34316657868084, "tokens_seen": 3072327680 }, { "epoch": 0.86, "learning_rate": 6.964615261173072e-05, "loss": 0.0604, "theoretical_loss": 3.3431557588029426, "tokens_seen": 3072458752 }, { "epoch": 0.86, "learning_rate": 6.960603386022627e-05, "loss": 0.0604, "theoretical_loss": 3.34314493951585, "tokens_seen": 3072589824 }, { "epoch": 0.86, "learning_rate": 6.956591510872182e-05, "loss": 0.0642, "theoretical_loss": 3.3431341208195056, "tokens_seen": 3072720896 }, { "epoch": 0.86, "learning_rate": 6.952579635721738e-05, "loss": 0.0677, "theoretical_loss": 3.343123302713851, "tokens_seen": 3072851968 }, { "epoch": 0.86, "learning_rate": 6.94856776057129e-05, "loss": 0.0632, "theoretical_loss": 3.3431124851988296, "tokens_seen": 3072983040 }, { "epoch": 0.86, "learning_rate": 6.944555885420846e-05, "loss": 0.0601, "theoretical_loss": 3.3431016682743833, "tokens_seen": 3073114112 }, { "epoch": 0.86, "learning_rate": 6.940544010270401e-05, "loss": 0.0661, "theoretical_loss": 3.3430908519404556, "tokens_seen": 3073245184 }, { "epoch": 0.86, "learning_rate": 6.936532135119955e-05, "loss": 0.067, "theoretical_loss": 3.343080036196988, "tokens_seen": 3073376256 }, { "epoch": 0.86, "learning_rate": 6.932520259969511e-05, "loss": 0.0642, "theoretical_loss": 3.343069221043924, "tokens_seen": 3073507328 }, { "epoch": 0.86, "learning_rate": 6.928508384819064e-05, "loss": 0.0631, "theoretical_loss": 3.3430584064812052, "tokens_seen": 3073638400 }, { "epoch": 0.86, "learning_rate": 6.924496509668619e-05, "loss": 0.0621, "theoretical_loss": 3.3430475925087753, "tokens_seen": 3073769472 }, { "epoch": 0.86, "learning_rate": 6.920484634518175e-05, "loss": 0.067, "theoretical_loss": 3.3430367791265763, "tokens_seen": 3073900544 }, { "epoch": 0.86, "learning_rate": 6.916472759367729e-05, "loss": 0.0637, "theoretical_loss": 3.343025966334551, "tokens_seen": 3074031616 }, { "epoch": 0.86, "learning_rate": 6.912460884217284e-05, "loss": 0.0626, "theoretical_loss": 3.3430151541326416, "tokens_seen": 3074162688 }, { "epoch": 0.86, "learning_rate": 6.908449009066837e-05, "loss": 0.0623, "theoretical_loss": 3.3430043425207914, "tokens_seen": 3074293760 }, { "epoch": 0.86, "learning_rate": 6.904437133916392e-05, "loss": 0.0643, "theoretical_loss": 3.3429935314989425, "tokens_seen": 3074424832 }, { "epoch": 0.86, "learning_rate": 6.900425258765948e-05, "loss": 0.0641, "theoretical_loss": 3.3429827210670378, "tokens_seen": 3074555904 }, { "epoch": 0.86, "learning_rate": 6.896413383615502e-05, "loss": 0.0629, "theoretical_loss": 3.34297191122502, "tokens_seen": 3074686976 }, { "epoch": 0.86, "learning_rate": 6.892401508465058e-05, "loss": 0.0664, "theoretical_loss": 3.3429611019728314, "tokens_seen": 3074818048 }, { "epoch": 0.86, "learning_rate": 6.88838963331461e-05, "loss": 0.0658, "theoretical_loss": 3.342950293310415, "tokens_seen": 3074949120 }, { "epoch": 0.86, "learning_rate": 6.884377758164166e-05, "loss": 0.0645, "theoretical_loss": 3.342939485237714, "tokens_seen": 3075080192 }, { "epoch": 0.86, "learning_rate": 6.880365883013721e-05, "loss": 0.0658, "theoretical_loss": 3.3429286777546703, "tokens_seen": 3075211264 }, { "epoch": 0.86, "objective/train/advantage_avg": -0.0007716118125244975, "objective/train/docs_used": 1117177, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3223882913589478, "objective/train/original_loss": 1.3223882913589478, "objective/train/theoretical_loss": 3.3429178708612266, "objective/train/tokens_used": 1445866976, "objective/train/value_avg": -0.0078887939453125, "objective/train/value_loss": 0.0003730752505362034, "objective/train/value_max": -1.9252300262451172e-05, "objective/train/value_min": -0.66943359375, "objective/train/value_reward_corr": 0.5928912488045733, "objective/train/value_std": 0.015960693359375, "objective/train/weight_avg": 0.9993921518325806, "objective/train/weighted_lm_loss": 1.3209376335144043, "objective/train/weights_max": 1.6252752542495728, "objective/train/weights_min": 0.3710109293460846, "theoretical_loss": 3.3429178708612266, "tokens_seen": 3075342336 }, { "epoch": 0.86, "learning_rate": 6.876354007863275e-05, "loss": 0.0642, "theoretical_loss": 3.3429178708612266, "tokens_seen": 3075342336 }, { "epoch": 0.86, "learning_rate": 6.872342132712831e-05, "loss": 0.0633, "theoretical_loss": 3.342907064557326, "tokens_seen": 3075473408 }, { "epoch": 0.86, "learning_rate": 6.868330257562384e-05, "loss": 0.062, "theoretical_loss": 3.342896258842911, "tokens_seen": 3075604480 }, { "epoch": 0.86, "learning_rate": 6.864318382411939e-05, "loss": 0.0632, "theoretical_loss": 3.3428854537179244, "tokens_seen": 3075735552 }, { "epoch": 0.86, "learning_rate": 6.860306507261495e-05, "loss": 0.0627, "theoretical_loss": 3.3428746491823094, "tokens_seen": 3075866624 }, { "epoch": 0.86, "learning_rate": 6.856294632111049e-05, "loss": 0.0638, "theoretical_loss": 3.3428638452360078, "tokens_seen": 3075997696 }, { "epoch": 0.86, "learning_rate": 6.852282756960604e-05, "loss": 0.0641, "theoretical_loss": 3.342853041878963, "tokens_seen": 3076128768 }, { "epoch": 0.86, "learning_rate": 6.848270881810157e-05, "loss": 0.0665, "theoretical_loss": 3.342842239111117, "tokens_seen": 3076259840 }, { "epoch": 0.86, "learning_rate": 6.844259006659713e-05, "loss": 0.0642, "theoretical_loss": 3.3428314369324137, "tokens_seen": 3076390912 }, { "epoch": 0.86, "learning_rate": 6.840247131509268e-05, "loss": 0.0622, "theoretical_loss": 3.342820635342795, "tokens_seen": 3076521984 }, { "epoch": 0.86, "learning_rate": 6.836235256358822e-05, "loss": 0.0642, "theoretical_loss": 3.342809834342204, "tokens_seen": 3076653056 }, { "epoch": 0.86, "learning_rate": 6.832223381208378e-05, "loss": 0.0611, "theoretical_loss": 3.342799033930584, "tokens_seen": 3076784128 }, { "epoch": 0.86, "learning_rate": 6.828211506057932e-05, "loss": 0.0612, "theoretical_loss": 3.3427882341078767, "tokens_seen": 3076915200 }, { "epoch": 0.86, "learning_rate": 6.824199630907486e-05, "loss": 0.0649, "theoretical_loss": 3.3427774348740256, "tokens_seen": 3077046272 }, { "epoch": 0.86, "learning_rate": 6.820187755757041e-05, "loss": 0.067, "theoretical_loss": 3.3427666362289736, "tokens_seen": 3077177344 }, { "epoch": 0.87, "learning_rate": 6.816175880606596e-05, "loss": 0.0645, "theoretical_loss": 3.342755838172663, "tokens_seen": 3077308416 }, { "epoch": 0.87, "learning_rate": 6.812164005456151e-05, "loss": 0.062, "theoretical_loss": 3.3427450407050374, "tokens_seen": 3077439488 }, { "epoch": 0.87, "learning_rate": 6.808152130305705e-05, "loss": 0.0622, "theoretical_loss": 3.3427342438260386, "tokens_seen": 3077570560 }, { "epoch": 0.87, "learning_rate": 6.804140255155259e-05, "loss": 0.0634, "theoretical_loss": 3.3427234475356102, "tokens_seen": 3077701632 }, { "epoch": 0.87, "learning_rate": 6.800128380004815e-05, "loss": 0.0639, "theoretical_loss": 3.342712651833695, "tokens_seen": 3077832704 }, { "epoch": 0.87, "learning_rate": 6.796116504854369e-05, "loss": 0.0622, "theoretical_loss": 3.3427018567202356, "tokens_seen": 3077963776 }, { "epoch": 0.87, "learning_rate": 6.792104629703924e-05, "loss": 0.0613, "theoretical_loss": 3.342691062195175, "tokens_seen": 3078094848 }, { "epoch": 0.87, "learning_rate": 6.788092754553478e-05, "loss": 0.0628, "theoretical_loss": 3.342680268258456, "tokens_seen": 3078225920 }, { "epoch": 0.87, "learning_rate": 6.784080879403033e-05, "loss": 0.0636, "theoretical_loss": 3.3426694749100214, "tokens_seen": 3078356992 }, { "epoch": 0.87, "learning_rate": 6.780069004252588e-05, "loss": 0.0623, "theoretical_loss": 3.3426586821498145, "tokens_seen": 3078488064 }, { "epoch": 0.87, "objective/train/advantage_avg": -0.0002898703678511083, "objective/train/docs_used": 1118389, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.21840500831604, "objective/train/original_loss": 1.218404769897461, "objective/train/theoretical_loss": 3.342647889977778, "objective/train/tokens_used": 1449143776, "objective/train/value_avg": -0.00856781005859375, "objective/train/value_loss": 0.00019520283967722207, "objective/train/value_max": -4.166364669799805e-05, "objective/train/value_min": -0.306396484375, "objective/train/value_reward_corr": 0.755447284300915, "objective/train/value_std": 0.016510009765625, "objective/train/weight_avg": 0.9998024106025696, "objective/train/weighted_lm_loss": 1.2169106006622314, "objective/train/weights_max": 1.1539889574050903, "objective/train/weights_min": 0.37205690145492554, "theoretical_loss": 3.342647889977778, "tokens_seen": 3078619136 }, { "epoch": 0.87, "learning_rate": 6.776057129102142e-05, "loss": 0.0632, "theoretical_loss": 3.342647889977778, "tokens_seen": 3078619136 }, { "epoch": 0.87, "learning_rate": 6.772045253951698e-05, "loss": 0.06, "theoretical_loss": 3.3426370983938547, "tokens_seen": 3078750208 }, { "epoch": 0.87, "learning_rate": 6.768033378801252e-05, "loss": 0.0646, "theoretical_loss": 3.342626307397987, "tokens_seen": 3078881280 }, { "epoch": 0.87, "learning_rate": 6.764021503650806e-05, "loss": 0.0672, "theoretical_loss": 3.342615516990119, "tokens_seen": 3079012352 }, { "epoch": 0.87, "learning_rate": 6.760009628500361e-05, "loss": 0.064, "theoretical_loss": 3.342604727170193, "tokens_seen": 3079143424 }, { "epoch": 0.87, "learning_rate": 6.755997753349916e-05, "loss": 0.0636, "theoretical_loss": 3.342593937938152, "tokens_seen": 3079274496 }, { "epoch": 0.87, "learning_rate": 6.751985878199471e-05, "loss": 0.064, "theoretical_loss": 3.3425831492939384, "tokens_seen": 3079405568 }, { "epoch": 0.87, "learning_rate": 6.747974003049025e-05, "loss": 0.0643, "theoretical_loss": 3.342572361237496, "tokens_seen": 3079536640 }, { "epoch": 0.87, "learning_rate": 6.743962127898579e-05, "loss": 0.0616, "theoretical_loss": 3.3425615737687675, "tokens_seen": 3079667712 }, { "epoch": 0.87, "learning_rate": 6.739950252748135e-05, "loss": 0.0634, "theoretical_loss": 3.3425507868876956, "tokens_seen": 3079798784 }, { "epoch": 0.87, "learning_rate": 6.735938377597689e-05, "loss": 0.0631, "theoretical_loss": 3.342540000594224, "tokens_seen": 3079929856 }, { "epoch": 0.87, "learning_rate": 6.731926502447244e-05, "loss": 0.0641, "theoretical_loss": 3.342529214888294, "tokens_seen": 3080060928 }, { "epoch": 0.87, "learning_rate": 6.727914627296799e-05, "loss": 0.0633, "theoretical_loss": 3.342518429769851, "tokens_seen": 3080192000 }, { "epoch": 0.87, "learning_rate": 6.723902752146353e-05, "loss": 0.0678, "theoretical_loss": 3.342507645238836, "tokens_seen": 3080323072 }, { "epoch": 0.87, "learning_rate": 6.719890876995908e-05, "loss": 0.0631, "theoretical_loss": 3.342496861295193, "tokens_seen": 3080454144 }, { "epoch": 0.87, "learning_rate": 6.715879001845462e-05, "loss": 0.0619, "theoretical_loss": 3.3424860779388643, "tokens_seen": 3080585216 }, { "epoch": 0.87, "learning_rate": 6.711867126695018e-05, "loss": 0.0629, "theoretical_loss": 3.342475295169794, "tokens_seen": 3080716288 }, { "epoch": 0.87, "learning_rate": 6.707855251544572e-05, "loss": 0.0617, "theoretical_loss": 3.3424645129879247, "tokens_seen": 3080847360 }, { "epoch": 0.87, "learning_rate": 6.703843376394127e-05, "loss": 0.0654, "theoretical_loss": 3.342453731393199, "tokens_seen": 3080978432 }, { "epoch": 0.87, "learning_rate": 6.699831501243681e-05, "loss": 0.0655, "theoretical_loss": 3.3424429503855597, "tokens_seen": 3081109504 }, { "epoch": 0.87, "learning_rate": 6.695819626093236e-05, "loss": 0.0609, "theoretical_loss": 3.342432169964951, "tokens_seen": 3081240576 }, { "epoch": 0.87, "learning_rate": 6.691807750942791e-05, "loss": 0.06, "theoretical_loss": 3.3424213901313156, "tokens_seen": 3081371648 }, { "epoch": 0.87, "learning_rate": 6.687795875792345e-05, "loss": 0.0633, "theoretical_loss": 3.3424106108845955, "tokens_seen": 3081502720 }, { "epoch": 0.87, "learning_rate": 6.683784000641901e-05, "loss": 0.0605, "theoretical_loss": 3.3423998322247352, "tokens_seen": 3081633792 }, { "epoch": 0.87, "learning_rate": 6.679772125491455e-05, "loss": 0.0643, "theoretical_loss": 3.342389054151677, "tokens_seen": 3081764864 }, { "epoch": 0.87, "objective/train/advantage_avg": 0.0007749533979222178, "objective/train/docs_used": 1119488, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.36904776096344, "objective/train/original_loss": 1.3690478801727295, "objective/train/theoretical_loss": 3.3423782766653645, "objective/train/tokens_used": 1452420576, "objective/train/value_avg": -0.006237030029296875, "objective/train/value_loss": 0.00033683108631521463, "objective/train/value_max": -4.470348358154297e-05, "objective/train/value_min": -0.373046875, "objective/train/value_reward_corr": 0.5574164138384483, "objective/train/value_std": 0.01213836669921875, "objective/train/weight_avg": 1.0009167194366455, "objective/train/weighted_lm_loss": 1.369233250617981, "objective/train/weights_max": 1.3834433555603027, "objective/train/weights_min": 0.3686373233795166, "theoretical_loss": 3.3423782766653645, "tokens_seen": 3081895936 }, { "epoch": 0.87, "learning_rate": 6.675760250341009e-05, "loss": 0.064, "theoretical_loss": 3.3423782766653645, "tokens_seen": 3081895936 }, { "epoch": 0.87, "learning_rate": 6.671748375190564e-05, "loss": 0.0644, "theoretical_loss": 3.34236749976574, "tokens_seen": 3082027008 }, { "epoch": 0.87, "learning_rate": 6.667736500040119e-05, "loss": 0.0634, "theoretical_loss": 3.3423567234527476, "tokens_seen": 3082158080 }, { "epoch": 0.87, "learning_rate": 6.663724624889674e-05, "loss": 0.0621, "theoretical_loss": 3.34234594772633, "tokens_seen": 3082289152 }, { "epoch": 0.87, "learning_rate": 6.659712749739228e-05, "loss": 0.0663, "theoretical_loss": 3.34233517258643, "tokens_seen": 3082420224 }, { "epoch": 0.87, "learning_rate": 6.655700874588782e-05, "loss": 0.0627, "theoretical_loss": 3.342324398032991, "tokens_seen": 3082551296 }, { "epoch": 0.87, "learning_rate": 6.651688999438338e-05, "loss": 0.0645, "theoretical_loss": 3.342313624065956, "tokens_seen": 3082682368 }, { "epoch": 0.87, "learning_rate": 6.647677124287892e-05, "loss": 0.0615, "theoretical_loss": 3.342302850685269, "tokens_seen": 3082813440 }, { "epoch": 0.87, "learning_rate": 6.643665249137447e-05, "loss": 0.0643, "theoretical_loss": 3.342292077890872, "tokens_seen": 3082944512 }, { "epoch": 0.87, "learning_rate": 6.639653373987002e-05, "loss": 0.0625, "theoretical_loss": 3.3422813056827088, "tokens_seen": 3083075584 }, { "epoch": 0.87, "learning_rate": 6.635641498836556e-05, "loss": 0.0635, "theoretical_loss": 3.3422705340607224, "tokens_seen": 3083206656 }, { "epoch": 0.87, "learning_rate": 6.631629623686111e-05, "loss": 0.0634, "theoretical_loss": 3.342259763024856, "tokens_seen": 3083337728 }, { "epoch": 0.87, "learning_rate": 6.627617748535665e-05, "loss": 0.0604, "theoretical_loss": 3.3422489925750525, "tokens_seen": 3083468800 }, { "epoch": 0.87, "learning_rate": 6.623605873385221e-05, "loss": 0.0617, "theoretical_loss": 3.3422382227112557, "tokens_seen": 3083599872 }, { "epoch": 0.87, "learning_rate": 6.619593998234775e-05, "loss": 0.0633, "theoretical_loss": 3.3422274534334084, "tokens_seen": 3083730944 }, { "epoch": 0.87, "learning_rate": 6.615582123084329e-05, "loss": 0.0609, "theoretical_loss": 3.3422166847414543, "tokens_seen": 3083862016 }, { "epoch": 0.87, "learning_rate": 6.611570247933885e-05, "loss": 0.0645, "theoretical_loss": 3.3422059166353355, "tokens_seen": 3083993088 }, { "epoch": 0.87, "learning_rate": 6.607558372783439e-05, "loss": 0.064, "theoretical_loss": 3.3421951491149966, "tokens_seen": 3084124160 }, { "epoch": 0.87, "learning_rate": 6.603546497632994e-05, "loss": 0.0627, "theoretical_loss": 3.3421843821803803, "tokens_seen": 3084255232 }, { "epoch": 0.87, "learning_rate": 6.59953462248255e-05, "loss": 0.0631, "theoretical_loss": 3.3421736158314292, "tokens_seen": 3084386304 }, { "epoch": 0.87, "learning_rate": 6.595522747332102e-05, "loss": 0.0624, "theoretical_loss": 3.342162850068088, "tokens_seen": 3084517376 }, { "epoch": 0.87, "learning_rate": 6.591510872181658e-05, "loss": 0.0665, "theoretical_loss": 3.3421520848902984, "tokens_seen": 3084648448 }, { "epoch": 0.87, "learning_rate": 6.587498997031212e-05, "loss": 0.0635, "theoretical_loss": 3.342141320298005, "tokens_seen": 3084779520 }, { "epoch": 0.87, "learning_rate": 6.583487121880767e-05, "loss": 0.0611, "theoretical_loss": 3.3421305562911496, "tokens_seen": 3084910592 }, { "epoch": 0.87, "learning_rate": 6.579475246730323e-05, "loss": 0.0606, "theoretical_loss": 3.342119792869677, "tokens_seen": 3085041664 }, { "epoch": 0.87, "objective/train/advantage_avg": -0.00022682550479657948, "objective/train/docs_used": 1120785, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.301485300064087, "objective/train/original_loss": 1.301485300064087, "objective/train/theoretical_loss": 3.342109030033529, "objective/train/tokens_used": 1455697376, "objective/train/value_avg": -0.008758544921875, "objective/train/value_loss": 0.0002458332746755332, "objective/train/value_max": -2.3365020751953125e-05, "objective/train/value_min": -0.91943359375, "objective/train/value_reward_corr": 0.7405189487778814, "objective/train/value_std": 0.0176239013671875, "objective/train/weight_avg": 0.9998883008956909, "objective/train/weighted_lm_loss": 1.3008143901824951, "objective/train/weights_max": 1.592208743095398, "objective/train/weights_min": 0.38919898867607117, "theoretical_loss": 3.342109030033529, "tokens_seen": 3085172736 }, { "epoch": 0.87, "learning_rate": 6.575463371579876e-05, "loss": 0.0657, "theoretical_loss": 3.342109030033529, "tokens_seen": 3085172736 }, { "epoch": 0.87, "learning_rate": 6.571451496429431e-05, "loss": 0.0633, "theoretical_loss": 3.3420982677826507, "tokens_seen": 3085303808 }, { "epoch": 0.87, "learning_rate": 6.567439621278985e-05, "loss": 0.0645, "theoretical_loss": 3.342087506116984, "tokens_seen": 3085434880 }, { "epoch": 0.87, "learning_rate": 6.563427746128541e-05, "loss": 0.0617, "theoretical_loss": 3.342076745036473, "tokens_seen": 3085565952 }, { "epoch": 0.87, "learning_rate": 6.559415870978096e-05, "loss": 0.063, "theoretical_loss": 3.3420659845410605, "tokens_seen": 3085697024 }, { "epoch": 0.87, "learning_rate": 6.555403995827649e-05, "loss": 0.06, "theoretical_loss": 3.34205522463069, "tokens_seen": 3085828096 }, { "epoch": 0.87, "learning_rate": 6.551392120677205e-05, "loss": 0.0641, "theoretical_loss": 3.342044465305305, "tokens_seen": 3085959168 }, { "epoch": 0.87, "learning_rate": 6.54738024552676e-05, "loss": 0.0643, "theoretical_loss": 3.3420337065648487, "tokens_seen": 3086090240 }, { "epoch": 0.87, "learning_rate": 6.543368370376314e-05, "loss": 0.0635, "theoretical_loss": 3.3420229484092645, "tokens_seen": 3086221312 }, { "epoch": 0.87, "learning_rate": 6.53935649522587e-05, "loss": 0.0639, "theoretical_loss": 3.3420121908384957, "tokens_seen": 3086352384 }, { "epoch": 0.87, "learning_rate": 6.535344620075422e-05, "loss": 0.062, "theoretical_loss": 3.3420014338524857, "tokens_seen": 3086483456 }, { "epoch": 0.87, "learning_rate": 6.531332744924978e-05, "loss": 0.066, "theoretical_loss": 3.341990677451178, "tokens_seen": 3086614528 }, { "epoch": 0.87, "learning_rate": 6.527320869774533e-05, "loss": 0.0633, "theoretical_loss": 3.3419799216345156, "tokens_seen": 3086745600 }, { "epoch": 0.87, "learning_rate": 6.523308994624088e-05, "loss": 0.0602, "theoretical_loss": 3.3419691664024427, "tokens_seen": 3086876672 }, { "epoch": 0.87, "learning_rate": 6.519297119473643e-05, "loss": 0.0623, "theoretical_loss": 3.341958411754902, "tokens_seen": 3087007744 }, { "epoch": 0.87, "learning_rate": 6.515285244323196e-05, "loss": 0.066, "theoretical_loss": 3.341947657691837, "tokens_seen": 3087138816 }, { "epoch": 0.87, "learning_rate": 6.511273369172751e-05, "loss": 0.062, "theoretical_loss": 3.3419369042131915, "tokens_seen": 3087269888 }, { "epoch": 0.87, "learning_rate": 6.507261494022307e-05, "loss": 0.0621, "theoretical_loss": 3.3419261513189085, "tokens_seen": 3087400960 }, { "epoch": 0.87, "learning_rate": 6.503249618871861e-05, "loss": 0.062, "theoretical_loss": 3.341915399008932, "tokens_seen": 3087532032 }, { "epoch": 0.87, "learning_rate": 6.499237743721416e-05, "loss": 0.0634, "theoretical_loss": 3.3419046472832044, "tokens_seen": 3087663104 }, { "epoch": 0.87, "learning_rate": 6.495225868570969e-05, "loss": 0.062, "theoretical_loss": 3.3418938961416704, "tokens_seen": 3087794176 }, { "epoch": 0.87, "learning_rate": 6.491213993420525e-05, "loss": 0.061, "theoretical_loss": 3.341883145584273, "tokens_seen": 3087925248 }, { "epoch": 0.87, "learning_rate": 6.48720211827008e-05, "loss": 0.0631, "theoretical_loss": 3.341872395610955, "tokens_seen": 3088056320 }, { "epoch": 0.87, "learning_rate": 6.483190243119634e-05, "loss": 0.0641, "theoretical_loss": 3.3418616462216604, "tokens_seen": 3088187392 }, { "epoch": 0.87, "learning_rate": 6.47917836796919e-05, "loss": 0.0628, "theoretical_loss": 3.3418508974163332, "tokens_seen": 3088318464 }, { "epoch": 0.87, "objective/train/advantage_avg": 0.0005529692280106246, "objective/train/docs_used": 1122042, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2044538259506226, "objective/train/original_loss": 1.204453945159912, "objective/train/theoretical_loss": 3.3418401491949163, "objective/train/tokens_used": 1458974176, "objective/train/value_avg": -0.007289886474609375, "objective/train/value_loss": 0.00016172639152500778, "objective/train/value_max": -2.777576446533203e-05, "objective/train/value_min": -0.2427978515625, "objective/train/value_reward_corr": 0.6667806988660727, "objective/train/value_std": 0.0120697021484375, "objective/train/weight_avg": 1.000625491142273, "objective/train/weighted_lm_loss": 1.2047663927078247, "objective/train/weights_max": 1.1311445236206055, "objective/train/weights_min": 0.3681500554084778, "theoretical_loss": 3.3418401491949163, "tokens_seen": 3088449536 }, { "epoch": 0.87, "learning_rate": 6.475166492818742e-05, "loss": 0.0633, "theoretical_loss": 3.3418401491949163, "tokens_seen": 3088449536 }, { "epoch": 0.87, "learning_rate": 6.471154617668298e-05, "loss": 0.0647, "theoretical_loss": 3.3418294015573533, "tokens_seen": 3088580608 }, { "epoch": 0.87, "learning_rate": 6.467142742517853e-05, "loss": 0.0638, "theoretical_loss": 3.3418186545035873, "tokens_seen": 3088711680 }, { "epoch": 0.87, "learning_rate": 6.463130867367408e-05, "loss": 0.064, "theoretical_loss": 3.3418079080335628, "tokens_seen": 3088842752 }, { "epoch": 0.87, "learning_rate": 6.459118992216963e-05, "loss": 0.0657, "theoretical_loss": 3.3417971621472224, "tokens_seen": 3088973824 }, { "epoch": 0.87, "learning_rate": 6.455107117066517e-05, "loss": 0.0618, "theoretical_loss": 3.34178641684451, "tokens_seen": 3089104896 }, { "epoch": 0.87, "learning_rate": 6.451095241916071e-05, "loss": 0.0606, "theoretical_loss": 3.3417756721253693, "tokens_seen": 3089235968 }, { "epoch": 0.87, "learning_rate": 6.447083366765627e-05, "loss": 0.0655, "theoretical_loss": 3.3417649279897437, "tokens_seen": 3089367040 }, { "epoch": 0.87, "learning_rate": 6.443071491615181e-05, "loss": 0.0625, "theoretical_loss": 3.3417541844375767, "tokens_seen": 3089498112 }, { "epoch": 0.87, "learning_rate": 6.439059616464736e-05, "loss": 0.0632, "theoretical_loss": 3.341743441468812, "tokens_seen": 3089629184 }, { "epoch": 0.87, "learning_rate": 6.43504774131429e-05, "loss": 0.0652, "theoretical_loss": 3.341732699083393, "tokens_seen": 3089760256 }, { "epoch": 0.87, "learning_rate": 6.431035866163845e-05, "loss": 0.0681, "theoretical_loss": 3.3417219572812633, "tokens_seen": 3089891328 }, { "epoch": 0.87, "learning_rate": 6.4270239910134e-05, "loss": 0.0578, "theoretical_loss": 3.3417112160623663, "tokens_seen": 3090022400 }, { "epoch": 0.87, "learning_rate": 6.423012115862954e-05, "loss": 0.0622, "theoretical_loss": 3.3417004754266464, "tokens_seen": 3090153472 }, { "epoch": 0.87, "learning_rate": 6.41900024071251e-05, "loss": 0.0618, "theoretical_loss": 3.3416897353740462, "tokens_seen": 3090284544 }, { "epoch": 0.87, "learning_rate": 6.414988365562064e-05, "loss": 0.0632, "theoretical_loss": 3.34167899590451, "tokens_seen": 3090415616 }, { "epoch": 0.87, "learning_rate": 6.410976490411618e-05, "loss": 0.0615, "theoretical_loss": 3.3416682570179814, "tokens_seen": 3090546688 }, { "epoch": 0.87, "learning_rate": 6.406964615261174e-05, "loss": 0.0636, "theoretical_loss": 3.3416575187144035, "tokens_seen": 3090677760 }, { "epoch": 0.87, "learning_rate": 6.402952740110728e-05, "loss": 0.066, "theoretical_loss": 3.3416467809937203, "tokens_seen": 3090808832 }, { "epoch": 0.87, "learning_rate": 6.398940864960283e-05, "loss": 0.0605, "theoretical_loss": 3.341636043855875, "tokens_seen": 3090939904 }, { "epoch": 0.87, "learning_rate": 6.394928989809837e-05, "loss": 0.0651, "theoretical_loss": 3.341625307300812, "tokens_seen": 3091070976 }, { "epoch": 0.87, "learning_rate": 6.390917114659391e-05, "loss": 0.0637, "theoretical_loss": 3.3416145713284746, "tokens_seen": 3091202048 }, { "epoch": 0.87, "learning_rate": 6.386905239508947e-05, "loss": 0.0646, "theoretical_loss": 3.3416038359388063, "tokens_seen": 3091333120 }, { "epoch": 0.87, "learning_rate": 6.382893364358501e-05, "loss": 0.0643, "theoretical_loss": 3.341593101131751, "tokens_seen": 3091464192 }, { "epoch": 0.87, "learning_rate": 6.378881489208056e-05, "loss": 0.0637, "theoretical_loss": 3.3415823669072524, "tokens_seen": 3091595264 }, { "epoch": 0.87, "objective/train/advantage_avg": 0.001152012962847948, "objective/train/docs_used": 1123259, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3666328191757202, "objective/train/original_loss": 1.3666326999664307, "objective/train/theoretical_loss": 3.341571633265254, "objective/train/tokens_used": 1462250976, "objective/train/value_avg": -0.00862884521484375, "objective/train/value_loss": 0.00043364803423173726, "objective/train/value_max": -2.2470951080322266e-05, "objective/train/value_min": -0.86083984375, "objective/train/value_reward_corr": 0.6748142989066686, "objective/train/value_std": 0.0214691162109375, "objective/train/weight_avg": 1.0013582706451416, "objective/train/weighted_lm_loss": 1.3689039945602417, "objective/train/weights_max": 2.134645938873291, "objective/train/weights_min": 0.3843713402748108, "theoretical_loss": 3.341571633265254, "tokens_seen": 3091726336 }, { "epoch": 0.87, "learning_rate": 6.37486961405761e-05, "loss": 0.0649, "theoretical_loss": 3.341571633265254, "tokens_seen": 3091726336 }, { "epoch": 0.87, "learning_rate": 6.370857738907165e-05, "loss": 0.0622, "theoretical_loss": 3.3415609002056996, "tokens_seen": 3091857408 }, { "epoch": 0.87, "learning_rate": 6.36684586375672e-05, "loss": 0.0613, "theoretical_loss": 3.341550167728533, "tokens_seen": 3091988480 }, { "epoch": 0.87, "learning_rate": 6.362833988606274e-05, "loss": 0.0647, "theoretical_loss": 3.3415394358336976, "tokens_seen": 3092119552 }, { "epoch": 0.87, "learning_rate": 6.35882211345583e-05, "loss": 0.0642, "theoretical_loss": 3.3415287045211377, "tokens_seen": 3092250624 }, { "epoch": 0.87, "learning_rate": 6.354810238305384e-05, "loss": 0.0642, "theoretical_loss": 3.3415179737907965, "tokens_seen": 3092381696 }, { "epoch": 0.87, "learning_rate": 6.350798363154938e-05, "loss": 0.0662, "theoretical_loss": 3.3415072436426176, "tokens_seen": 3092512768 }, { "epoch": 0.87, "learning_rate": 6.346786488004494e-05, "loss": 0.0643, "theoretical_loss": 3.3414965140765456, "tokens_seen": 3092643840 }, { "epoch": 0.87, "learning_rate": 6.342774612854048e-05, "loss": 0.063, "theoretical_loss": 3.341485785092523, "tokens_seen": 3092774912 }, { "epoch": 0.87, "learning_rate": 6.338762737703603e-05, "loss": 0.07, "theoretical_loss": 3.341475056690495, "tokens_seen": 3092905984 }, { "epoch": 0.87, "learning_rate": 6.334750862553157e-05, "loss": 0.0615, "theoretical_loss": 3.341464328870404, "tokens_seen": 3093037056 }, { "epoch": 0.87, "learning_rate": 6.330738987402713e-05, "loss": 0.0646, "theoretical_loss": 3.3414536016321947, "tokens_seen": 3093168128 }, { "epoch": 0.87, "learning_rate": 6.326727112252267e-05, "loss": 0.0615, "theoretical_loss": 3.3414428749758107, "tokens_seen": 3093299200 }, { "epoch": 0.87, "learning_rate": 6.322715237101821e-05, "loss": 0.0626, "theoretical_loss": 3.3414321489011956, "tokens_seen": 3093430272 }, { "epoch": 0.87, "learning_rate": 6.318703361951377e-05, "loss": 0.0644, "theoretical_loss": 3.341421423408293, "tokens_seen": 3093561344 }, { "epoch": 0.87, "learning_rate": 6.31469148680093e-05, "loss": 0.0662, "theoretical_loss": 3.3414106984970475, "tokens_seen": 3093692416 }, { "epoch": 0.88, "learning_rate": 6.310679611650486e-05, "loss": 0.064, "theoretical_loss": 3.341399974167402, "tokens_seen": 3093823488 }, { "epoch": 0.88, "learning_rate": 6.30666773650004e-05, "loss": 0.0604, "theoretical_loss": 3.341389250419301, "tokens_seen": 3093954560 }, { "epoch": 0.88, "learning_rate": 6.302655861349594e-05, "loss": 0.0686, "theoretical_loss": 3.3413785272526875, "tokens_seen": 3094085632 }, { "epoch": 0.88, "learning_rate": 6.29864398619915e-05, "loss": 0.0625, "theoretical_loss": 3.3413678046675064, "tokens_seen": 3094216704 }, { "epoch": 0.88, "learning_rate": 6.294632111048704e-05, "loss": 0.0654, "theoretical_loss": 3.341357082663701, "tokens_seen": 3094347776 }, { "epoch": 0.88, "learning_rate": 6.29062023589826e-05, "loss": 0.064, "theoretical_loss": 3.341346361241215, "tokens_seen": 3094478848 }, { "epoch": 0.88, "learning_rate": 6.286608360747814e-05, "loss": 0.0629, "theoretical_loss": 3.3413356403999925, "tokens_seen": 3094609920 }, { "epoch": 0.88, "learning_rate": 6.282596485597368e-05, "loss": 0.0606, "theoretical_loss": 3.3413249201399773, "tokens_seen": 3094740992 }, { "epoch": 0.88, "learning_rate": 6.278584610446923e-05, "loss": 0.0586, "theoretical_loss": 3.341314200461113, "tokens_seen": 3094872064 }, { "epoch": 0.88, "objective/train/advantage_avg": -0.0004765399789903313, "objective/train/docs_used": 1124452, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.1821767091751099, "objective/train/original_loss": 1.1821765899658203, "objective/train/theoretical_loss": 3.3413034813633447, "objective/train/tokens_used": 1465527776, "objective/train/value_avg": -0.01067352294921875, "objective/train/value_loss": 0.0003532466071192175, "objective/train/value_max": -5.1021575927734375e-05, "objective/train/value_min": -0.9033203125, "objective/train/value_reward_corr": 0.7975348639951059, "objective/train/value_std": 0.0218658447265625, "objective/train/weight_avg": 0.9996815323829651, "objective/train/weighted_lm_loss": 1.1802136898040771, "objective/train/weights_max": 1.263445258140564, "objective/train/weights_min": 0.4122066795825958, "theoretical_loss": 3.3413034813633447, "tokens_seen": 3095003136 }, { "epoch": 0.88, "learning_rate": 6.274572735296477e-05, "loss": 0.0623, "theoretical_loss": 3.3413034813633447, "tokens_seen": 3095003136 }, { "epoch": 0.88, "learning_rate": 6.270560860146033e-05, "loss": 0.068, "theoretical_loss": 3.3412927628466145, "tokens_seen": 3095134208 }, { "epoch": 0.88, "learning_rate": 6.266548984995587e-05, "loss": 0.0598, "theoretical_loss": 3.3412820449108676, "tokens_seen": 3095265280 }, { "epoch": 0.88, "learning_rate": 6.262537109845141e-05, "loss": 0.0658, "theoretical_loss": 3.341271327556047, "tokens_seen": 3095396352 }, { "epoch": 0.88, "learning_rate": 6.258525234694697e-05, "loss": 0.0649, "theoretical_loss": 3.341260610782098, "tokens_seen": 3095527424 }, { "epoch": 0.88, "learning_rate": 6.254513359544251e-05, "loss": 0.0643, "theoretical_loss": 3.341249894588963, "tokens_seen": 3095658496 }, { "epoch": 0.88, "learning_rate": 6.250501484393806e-05, "loss": 0.0621, "theoretical_loss": 3.3412391789765863, "tokens_seen": 3095789568 }, { "epoch": 0.88, "learning_rate": 6.24648960924336e-05, "loss": 0.063, "theoretical_loss": 3.3412284639449124, "tokens_seen": 3095920640 }, { "epoch": 0.88, "learning_rate": 6.242477734092916e-05, "loss": 0.0651, "theoretical_loss": 3.341217749493885, "tokens_seen": 3096051712 }, { "epoch": 0.88, "learning_rate": 6.23846585894247e-05, "loss": 0.063, "theoretical_loss": 3.341207035623448, "tokens_seen": 3096182784 }, { "epoch": 0.88, "learning_rate": 6.234453983792024e-05, "loss": 0.0631, "theoretical_loss": 3.3411963223335452, "tokens_seen": 3096313856 }, { "epoch": 0.88, "learning_rate": 6.23044210864158e-05, "loss": 0.0625, "theoretical_loss": 3.341185609624121, "tokens_seen": 3096444928 }, { "epoch": 0.88, "learning_rate": 6.226430233491134e-05, "loss": 0.0622, "theoretical_loss": 3.3411748974951188, "tokens_seen": 3096576000 }, { "epoch": 0.88, "learning_rate": 6.222418358340689e-05, "loss": 0.0623, "theoretical_loss": 3.341164185946483, "tokens_seen": 3096707072 }, { "epoch": 0.88, "learning_rate": 6.218406483190243e-05, "loss": 0.0605, "theoretical_loss": 3.3411534749781575, "tokens_seen": 3096838144 }, { "epoch": 0.88, "learning_rate": 6.214394608039797e-05, "loss": 0.0671, "theoretical_loss": 3.3411427645900864, "tokens_seen": 3096969216 }, { "epoch": 0.88, "learning_rate": 6.210382732889353e-05, "loss": 0.0632, "theoretical_loss": 3.3411320547822134, "tokens_seen": 3097100288 }, { "epoch": 0.88, "learning_rate": 6.206370857738907e-05, "loss": 0.0636, "theoretical_loss": 3.341121345554483, "tokens_seen": 3097231360 }, { "epoch": 0.88, "learning_rate": 6.202358982588463e-05, "loss": 0.0642, "theoretical_loss": 3.341110636906839, "tokens_seen": 3097362432 }, { "epoch": 0.88, "learning_rate": 6.198347107438017e-05, "loss": 0.0666, "theoretical_loss": 3.341099928839225, "tokens_seen": 3097493504 }, { "epoch": 0.88, "learning_rate": 6.194335232287571e-05, "loss": 0.063, "theoretical_loss": 3.3410892213515853, "tokens_seen": 3097624576 }, { "epoch": 0.88, "learning_rate": 6.190323357137126e-05, "loss": 0.0605, "theoretical_loss": 3.3410785144438644, "tokens_seen": 3097755648 }, { "epoch": 0.88, "learning_rate": 6.18631148198668e-05, "loss": 0.0626, "theoretical_loss": 3.341067808116006, "tokens_seen": 3097886720 }, { "epoch": 0.88, "learning_rate": 6.182299606836236e-05, "loss": 0.0616, "theoretical_loss": 3.3410571023679543, "tokens_seen": 3098017792 }, { "epoch": 0.88, "learning_rate": 6.17828773168579e-05, "loss": 0.0675, "theoretical_loss": 3.341046397199653, "tokens_seen": 3098148864 }, { "epoch": 0.88, "objective/train/advantage_avg": 0.0002857371873687953, "objective/train/docs_used": 1125757, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3028098344802856, "objective/train/original_loss": 1.3028099536895752, "objective/train/theoretical_loss": 3.3410356926110465, "objective/train/tokens_used": 1468804576, "objective/train/value_avg": -0.006511688232421875, "objective/train/value_loss": 0.00010136462515220046, "objective/train/value_max": -3.3974647521972656e-05, "objective/train/value_min": -0.6494140625, "objective/train/value_reward_corr": 0.7335392780457014, "objective/train/value_std": 0.01220703125, "objective/train/weight_avg": 1.0003368854522705, "objective/train/weighted_lm_loss": 1.3038235902786255, "objective/train/weights_max": 1.7527374029159546, "objective/train/weights_min": 0.7189736366271973, "theoretical_loss": 3.3410356926110465, "tokens_seen": 3098279936 }, { "epoch": 0.88, "learning_rate": 6.174275856535344e-05, "loss": 0.0654, "theoretical_loss": 3.3410356926110465, "tokens_seen": 3098279936 }, { "epoch": 0.88, "learning_rate": 6.1702639813849e-05, "loss": 0.0631, "theoretical_loss": 3.341024988602079, "tokens_seen": 3098411008 }, { "epoch": 0.88, "learning_rate": 6.166252106234454e-05, "loss": 0.0647, "theoretical_loss": 3.3410142851726943, "tokens_seen": 3098542080 }, { "epoch": 0.88, "learning_rate": 6.162240231084009e-05, "loss": 0.065, "theoretical_loss": 3.3410035823228363, "tokens_seen": 3098673152 }, { "epoch": 0.88, "learning_rate": 6.158228355933563e-05, "loss": 0.0628, "theoretical_loss": 3.3409928800524495, "tokens_seen": 3098804224 }, { "epoch": 0.88, "learning_rate": 6.154216480783119e-05, "loss": 0.0637, "theoretical_loss": 3.3409821783614784, "tokens_seen": 3098935296 }, { "epoch": 0.88, "learning_rate": 6.150204605632673e-05, "loss": 0.0668, "theoretical_loss": 3.3409714772498664, "tokens_seen": 3099066368 }, { "epoch": 0.88, "learning_rate": 6.146192730482227e-05, "loss": 0.0642, "theoretical_loss": 3.340960776717558, "tokens_seen": 3099197440 }, { "epoch": 0.88, "learning_rate": 6.142180855331783e-05, "loss": 0.0625, "theoretical_loss": 3.3409500767644973, "tokens_seen": 3099328512 }, { "epoch": 0.88, "learning_rate": 6.138168980181337e-05, "loss": 0.0665, "theoretical_loss": 3.340939377390628, "tokens_seen": 3099459584 }, { "epoch": 0.88, "learning_rate": 6.134157105030892e-05, "loss": 0.065, "theoretical_loss": 3.340928678595895, "tokens_seen": 3099590656 }, { "epoch": 0.88, "learning_rate": 6.130145229880446e-05, "loss": 0.0657, "theoretical_loss": 3.340917980380242, "tokens_seen": 3099721728 }, { "epoch": 0.88, "learning_rate": 6.12613335473e-05, "loss": 0.0637, "theoretical_loss": 3.3409072827436135, "tokens_seen": 3099852800 }, { "epoch": 0.88, "learning_rate": 6.122121479579556e-05, "loss": 0.0626, "theoretical_loss": 3.3408965856859534, "tokens_seen": 3099983872 }, { "epoch": 0.88, "learning_rate": 6.11810960442911e-05, "loss": 0.0652, "theoretical_loss": 3.3408858892072058, "tokens_seen": 3100114944 }, { "epoch": 0.88, "learning_rate": 6.114097729278666e-05, "loss": 0.0635, "theoretical_loss": 3.340875193307315, "tokens_seen": 3100246016 }, { "epoch": 0.88, "learning_rate": 6.11008585412822e-05, "loss": 0.065, "theoretical_loss": 3.3408644979862254, "tokens_seen": 3100377088 }, { "epoch": 0.88, "learning_rate": 6.106073978977774e-05, "loss": 0.0618, "theoretical_loss": 3.3408538032438813, "tokens_seen": 3100508160 }, { "epoch": 0.88, "learning_rate": 6.102062103827329e-05, "loss": 0.063, "theoretical_loss": 3.340843109080226, "tokens_seen": 3100639232 }, { "epoch": 0.88, "learning_rate": 6.098050228676884e-05, "loss": 0.0623, "theoretical_loss": 3.340832415495205, "tokens_seen": 3100770304 }, { "epoch": 0.88, "learning_rate": 6.094038353526438e-05, "loss": 0.0646, "theoretical_loss": 3.340821722488762, "tokens_seen": 3100901376 }, { "epoch": 0.88, "learning_rate": 6.090026478375993e-05, "loss": 0.0652, "theoretical_loss": 3.340811030060841, "tokens_seen": 3101032448 }, { "epoch": 0.88, "learning_rate": 6.086014603225547e-05, "loss": 0.0618, "theoretical_loss": 3.3408003382113862, "tokens_seen": 3101163520 }, { "epoch": 0.88, "learning_rate": 6.0820027280751026e-05, "loss": 0.0599, "theoretical_loss": 3.3407896469403426, "tokens_seen": 3101294592 }, { "epoch": 0.88, "learning_rate": 6.0779908529246574e-05, "loss": 0.0647, "theoretical_loss": 3.3407789562476538, "tokens_seen": 3101425664 }, { "epoch": 0.88, "objective/train/advantage_avg": 7.757762796245515e-05, "objective/train/docs_used": 1126928, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.281265139579773, "objective/train/original_loss": 1.2812650203704834, "objective/train/theoretical_loss": 3.340768266133264, "objective/train/tokens_used": 1472081376, "objective/train/value_avg": -0.00724029541015625, "objective/train/value_loss": 0.00013835018035024405, "objective/train/value_max": -1.9252300262451172e-05, "objective/train/value_min": -0.611328125, "objective/train/value_reward_corr": 0.7891277166741453, "objective/train/value_std": 0.01474761962890625, "objective/train/weight_avg": 1.000144600868225, "objective/train/weighted_lm_loss": 1.2810375690460205, "objective/train/weights_max": 1.2373887300491333, "objective/train/weights_min": 0.608086347579956, "theoretical_loss": 3.340768266133264, "tokens_seen": 3101556736 }, { "epoch": 0.88, "learning_rate": 6.0739789777742116e-05, "loss": 0.0656, "theoretical_loss": 3.340768266133264, "tokens_seen": 3101556736 }, { "epoch": 0.88, "learning_rate": 6.0699671026237664e-05, "loss": 0.0636, "theoretical_loss": 3.3407575765971176, "tokens_seen": 3101687808 }, { "epoch": 0.88, "learning_rate": 6.0659552274733205e-05, "loss": 0.0651, "theoretical_loss": 3.3407468876391593, "tokens_seen": 3101818880 }, { "epoch": 0.88, "learning_rate": 6.061943352322876e-05, "loss": 0.0633, "theoretical_loss": 3.340736199259333, "tokens_seen": 3101949952 }, { "epoch": 0.88, "learning_rate": 6.057931477172431e-05, "loss": 0.0616, "theoretical_loss": 3.3407255114575833, "tokens_seen": 3102081024 }, { "epoch": 0.88, "learning_rate": 6.053919602021985e-05, "loss": 0.0608, "theoretical_loss": 3.3407148242338542, "tokens_seen": 3102212096 }, { "epoch": 0.88, "learning_rate": 6.04990772687154e-05, "loss": 0.0631, "theoretical_loss": 3.34070413758809, "tokens_seen": 3102343168 }, { "epoch": 0.88, "learning_rate": 6.045895851721095e-05, "loss": 0.0642, "theoretical_loss": 3.3406934515202353, "tokens_seen": 3102474240 }, { "epoch": 0.88, "learning_rate": 6.041883976570649e-05, "loss": 0.064, "theoretical_loss": 3.3406827660302345, "tokens_seen": 3102605312 }, { "epoch": 0.88, "learning_rate": 6.037872101420204e-05, "loss": 0.0638, "theoretical_loss": 3.3406720811180315, "tokens_seen": 3102736384 }, { "epoch": 0.88, "learning_rate": 6.033860226269758e-05, "loss": 0.0655, "theoretical_loss": 3.3406613967835708, "tokens_seen": 3102867456 }, { "epoch": 0.88, "learning_rate": 6.029848351119313e-05, "loss": 0.0593, "theoretical_loss": 3.340650713026797, "tokens_seen": 3102998528 }, { "epoch": 0.88, "learning_rate": 6.0258364759688686e-05, "loss": 0.0614, "theoretical_loss": 3.340640029847654, "tokens_seen": 3103129600 }, { "epoch": 0.88, "learning_rate": 6.021824600818423e-05, "loss": 0.0658, "theoretical_loss": 3.340629347246087, "tokens_seen": 3103260672 }, { "epoch": 0.88, "learning_rate": 6.0178127256679775e-05, "loss": 0.0636, "theoretical_loss": 3.3406186652220398, "tokens_seen": 3103391744 }, { "epoch": 0.88, "learning_rate": 6.0138008505175316e-05, "loss": 0.0603, "theoretical_loss": 3.3406079837754565, "tokens_seen": 3103522816 }, { "epoch": 0.88, "learning_rate": 6.0097889753670864e-05, "loss": 0.0657, "theoretical_loss": 3.340597302906282, "tokens_seen": 3103653888 }, { "epoch": 0.88, "learning_rate": 6.005777100216642e-05, "loss": 0.0616, "theoretical_loss": 3.3405866226144605, "tokens_seen": 3103784960 }, { "epoch": 0.88, "learning_rate": 6.001765225066196e-05, "loss": 0.0637, "theoretical_loss": 3.3405759428999366, "tokens_seen": 3103916032 }, { "epoch": 0.88, "learning_rate": 5.997753349915751e-05, "loss": 0.0641, "theoretical_loss": 3.3405652637626546, "tokens_seen": 3104047104 }, { "epoch": 0.88, "learning_rate": 5.9937414747653056e-05, "loss": 0.0624, "theoretical_loss": 3.340554585202559, "tokens_seen": 3104178176 }, { "epoch": 0.88, "learning_rate": 5.98972959961486e-05, "loss": 0.0651, "theoretical_loss": 3.340543907219594, "tokens_seen": 3104309248 }, { "epoch": 0.88, "learning_rate": 5.985717724464415e-05, "loss": 0.0641, "theoretical_loss": 3.3405332298137043, "tokens_seen": 3104440320 }, { "epoch": 0.88, "learning_rate": 5.9817058493139694e-05, "loss": 0.0677, "theoretical_loss": 3.340522552984834, "tokens_seen": 3104571392 }, { "epoch": 0.88, "learning_rate": 5.977693974163524e-05, "loss": 0.0632, "theoretical_loss": 3.340511876732928, "tokens_seen": 3104702464 }, { "epoch": 0.88, "objective/train/advantage_avg": 0.0008081789710558951, "objective/train/docs_used": 1128234, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.206903338432312, "objective/train/original_loss": 1.2069032192230225, "objective/train/theoretical_loss": 3.3405012010579305, "objective/train/tokens_used": 1475358176, "objective/train/value_avg": -0.0097503662109375, "objective/train/value_loss": 0.0003384881420060992, "objective/train/value_max": -3.451108932495117e-05, "objective/train/value_min": -0.98486328125, "objective/train/value_reward_corr": 0.8347299117086568, "objective/train/value_std": 0.02685546875, "objective/train/weight_avg": 1.0009649991989136, "objective/train/weighted_lm_loss": 1.2073787450790405, "objective/train/weights_max": 1.951137661933899, "objective/train/weights_min": 0.36996933817863464, "theoretical_loss": 3.3405012010579305, "tokens_seen": 3104833536 }, { "epoch": 0.88, "learning_rate": 5.973682099013079e-05, "loss": 0.0638, "theoretical_loss": 3.3405012010579305, "tokens_seen": 3104833536 }, { "epoch": 0.88, "learning_rate": 5.969670223862633e-05, "loss": 0.0642, "theoretical_loss": 3.340490525959786, "tokens_seen": 3104964608 }, { "epoch": 0.88, "learning_rate": 5.9656583487121886e-05, "loss": 0.0627, "theoretical_loss": 3.340479851438439, "tokens_seen": 3105095680 }, { "epoch": 0.88, "learning_rate": 5.961646473561743e-05, "loss": 0.0651, "theoretical_loss": 3.3404691774938344, "tokens_seen": 3105226752 }, { "epoch": 0.88, "learning_rate": 5.9576345984112975e-05, "loss": 0.0628, "theoretical_loss": 3.340458504125916, "tokens_seen": 3105357824 }, { "epoch": 0.88, "learning_rate": 5.9536227232608524e-05, "loss": 0.0643, "theoretical_loss": 3.3404478313346284, "tokens_seen": 3105488896 }, { "epoch": 0.88, "learning_rate": 5.9496108481104065e-05, "loss": 0.0649, "theoretical_loss": 3.3404371591199165, "tokens_seen": 3105619968 }, { "epoch": 0.88, "learning_rate": 5.945598972959962e-05, "loss": 0.0627, "theoretical_loss": 3.3404264874817247, "tokens_seen": 3105751040 }, { "epoch": 0.88, "learning_rate": 5.941587097809516e-05, "loss": 0.0642, "theoretical_loss": 3.3404158164199975, "tokens_seen": 3105882112 }, { "epoch": 0.88, "learning_rate": 5.937575222659071e-05, "loss": 0.0641, "theoretical_loss": 3.3404051459346795, "tokens_seen": 3106013184 }, { "epoch": 0.88, "learning_rate": 5.933563347508626e-05, "loss": 0.0691, "theoretical_loss": 3.3403944760257147, "tokens_seen": 3106144256 }, { "epoch": 0.88, "learning_rate": 5.92955147235818e-05, "loss": 0.0671, "theoretical_loss": 3.3403838066930485, "tokens_seen": 3106275328 }, { "epoch": 0.88, "learning_rate": 5.925539597207735e-05, "loss": 0.0647, "theoretical_loss": 3.340373137936625, "tokens_seen": 3106406400 }, { "epoch": 0.88, "learning_rate": 5.92152772205729e-05, "loss": 0.0661, "theoretical_loss": 3.3403624697563887, "tokens_seen": 3106537472 }, { "epoch": 0.88, "learning_rate": 5.917515846906844e-05, "loss": 0.0677, "theoretical_loss": 3.340351802152284, "tokens_seen": 3106668544 }, { "epoch": 0.88, "learning_rate": 5.913503971756399e-05, "loss": 0.0661, "theoretical_loss": 3.3403411351242567, "tokens_seen": 3106799616 }, { "epoch": 0.88, "learning_rate": 5.909492096605953e-05, "loss": 0.0634, "theoretical_loss": 3.3403304686722497, "tokens_seen": 3106930688 }, { "epoch": 0.88, "learning_rate": 5.905480221455509e-05, "loss": 0.0654, "theoretical_loss": 3.3403198027962087, "tokens_seen": 3107061760 }, { "epoch": 0.88, "learning_rate": 5.9014683463050635e-05, "loss": 0.0669, "theoretical_loss": 3.3403091374960776, "tokens_seen": 3107192832 }, { "epoch": 0.88, "learning_rate": 5.8974564711546176e-05, "loss": 0.0666, "theoretical_loss": 3.3402984727718015, "tokens_seen": 3107323904 }, { "epoch": 0.88, "learning_rate": 5.8934445960041724e-05, "loss": 0.0664, "theoretical_loss": 3.340287808623325, "tokens_seen": 3107454976 }, { "epoch": 0.88, "learning_rate": 5.8894327208537265e-05, "loss": 0.0665, "theoretical_loss": 3.3402771450505924, "tokens_seen": 3107586048 }, { "epoch": 0.88, "learning_rate": 5.885420845703282e-05, "loss": 0.0607, "theoretical_loss": 3.340266482053549, "tokens_seen": 3107717120 }, { "epoch": 0.88, "learning_rate": 5.881408970552837e-05, "loss": 0.0681, "theoretical_loss": 3.340255819632139, "tokens_seen": 3107848192 }, { "epoch": 0.88, "learning_rate": 5.877397095402391e-05, "loss": 0.0647, "theoretical_loss": 3.3402451577863066, "tokens_seen": 3107979264 }, { "epoch": 0.88, "objective/train/advantage_avg": -0.0004493003070820123, "objective/train/docs_used": 1129377, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3612782955169678, "objective/train/original_loss": 1.3612782955169678, "objective/train/theoretical_loss": 3.340234496515997, "objective/train/tokens_used": 1478634976, "objective/train/value_avg": -0.00789642333984375, "objective/train/value_loss": 0.0002992206427734345, "objective/train/value_max": -3.8504600524902344e-05, "objective/train/value_min": -0.3837890625, "objective/train/value_reward_corr": 0.7492048547373404, "objective/train/value_std": 0.0171356201171875, "objective/train/weight_avg": 0.9996810555458069, "objective/train/weighted_lm_loss": 1.3593413829803467, "objective/train/weights_max": 1.1778565645217896, "objective/train/weights_min": 0.36833932995796204, "theoretical_loss": 3.340234496515997, "tokens_seen": 3108110336 }, { "epoch": 0.88, "learning_rate": 5.873385220251946e-05, "loss": 0.0639, "theoretical_loss": 3.340234496515997, "tokens_seen": 3108110336 }, { "epoch": 0.88, "learning_rate": 5.8693733451015006e-05, "loss": 0.059, "theoretical_loss": 3.340223835821155, "tokens_seen": 3108241408 }, { "epoch": 0.88, "learning_rate": 5.8653614699510554e-05, "loss": 0.0657, "theoretical_loss": 3.3402131757017246, "tokens_seen": 3108372480 }, { "epoch": 0.88, "learning_rate": 5.86134959480061e-05, "loss": 0.0638, "theoretical_loss": 3.3402025161576514, "tokens_seen": 3108503552 }, { "epoch": 0.88, "learning_rate": 5.857337719650164e-05, "loss": 0.0682, "theoretical_loss": 3.3401918571888793, "tokens_seen": 3108634624 }, { "epoch": 0.88, "learning_rate": 5.853325844499719e-05, "loss": 0.0645, "theoretical_loss": 3.340181198795354, "tokens_seen": 3108765696 }, { "epoch": 0.88, "learning_rate": 5.8493139693492746e-05, "loss": 0.0655, "theoretical_loss": 3.3401705409770184, "tokens_seen": 3108896768 }, { "epoch": 0.88, "learning_rate": 5.845302094198829e-05, "loss": 0.0634, "theoretical_loss": 3.340159883733819, "tokens_seen": 3109027840 }, { "epoch": 0.88, "learning_rate": 5.8412902190483835e-05, "loss": 0.0656, "theoretical_loss": 3.3401492270657, "tokens_seen": 3109158912 }, { "epoch": 0.88, "learning_rate": 5.8372783438979377e-05, "loss": 0.0652, "theoretical_loss": 3.3401385709726052, "tokens_seen": 3109289984 }, { "epoch": 0.88, "learning_rate": 5.8332664687474925e-05, "loss": 0.0668, "theoretical_loss": 3.340127915454481, "tokens_seen": 3109421056 }, { "epoch": 0.88, "learning_rate": 5.829254593597048e-05, "loss": 0.064, "theoretical_loss": 3.3401172605112706, "tokens_seen": 3109552128 }, { "epoch": 0.88, "learning_rate": 5.825242718446602e-05, "loss": 0.0618, "theoretical_loss": 3.3401066061429194, "tokens_seen": 3109683200 }, { "epoch": 0.88, "learning_rate": 5.821230843296157e-05, "loss": 0.0639, "theoretical_loss": 3.3400959523493725, "tokens_seen": 3109814272 }, { "epoch": 0.88, "learning_rate": 5.817218968145711e-05, "loss": 0.0652, "theoretical_loss": 3.340085299130574, "tokens_seen": 3109945344 }, { "epoch": 0.88, "learning_rate": 5.813207092995266e-05, "loss": 0.0653, "theoretical_loss": 3.3400746464864692, "tokens_seen": 3110076416 }, { "epoch": 0.88, "learning_rate": 5.809195217844821e-05, "loss": 0.0652, "theoretical_loss": 3.3400639944170027, "tokens_seen": 3110207488 }, { "epoch": 0.89, "learning_rate": 5.8051833426943754e-05, "loss": 0.0637, "theoretical_loss": 3.340053342922119, "tokens_seen": 3110338560 }, { "epoch": 0.89, "learning_rate": 5.80117146754393e-05, "loss": 0.0654, "theoretical_loss": 3.3400426920017634, "tokens_seen": 3110469632 }, { "epoch": 0.89, "learning_rate": 5.797159592393485e-05, "loss": 0.0668, "theoretical_loss": 3.3400320416558804, "tokens_seen": 3110600704 }, { "epoch": 0.89, "learning_rate": 5.793147717243039e-05, "loss": 0.0613, "theoretical_loss": 3.340021391884415, "tokens_seen": 3110731776 }, { "epoch": 0.89, "learning_rate": 5.7891358420925947e-05, "loss": 0.068, "theoretical_loss": 3.340010742687311, "tokens_seen": 3110862848 }, { "epoch": 0.89, "learning_rate": 5.785123966942149e-05, "loss": 0.0695, "theoretical_loss": 3.340000094064515, "tokens_seen": 3110993920 }, { "epoch": 0.89, "learning_rate": 5.7811120917917036e-05, "loss": 0.0612, "theoretical_loss": 3.339989446015971, "tokens_seen": 3111124992 }, { "epoch": 0.89, "learning_rate": 5.7771002166412584e-05, "loss": 0.0656, "theoretical_loss": 3.3399787985416234, "tokens_seen": 3111256064 }, { "epoch": 0.89, "objective/train/advantage_avg": 0.0011575415264815092, "objective/train/docs_used": 1130437, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3548108339309692, "objective/train/original_loss": 1.3548104763031006, "objective/train/theoretical_loss": 3.3399681516414175, "objective/train/tokens_used": 1481911776, "objective/train/value_avg": -0.01018524169921875, "objective/train/value_loss": 0.0003381419228389859, "objective/train/value_max": -3.3736228942871094e-05, "objective/train/value_min": -0.7265625, "objective/train/value_reward_corr": 0.7548006412151438, "objective/train/value_std": 0.0216522216796875, "objective/train/weight_avg": 1.001311182975769, "objective/train/weighted_lm_loss": 1.355912446975708, "objective/train/weights_max": 1.5961527824401855, "objective/train/weights_min": 0.3835744857788086, "theoretical_loss": 3.3399681516414175, "tokens_seen": 3111387136 }, { "epoch": 0.89, "learning_rate": 5.7730883414908125e-05, "loss": 0.0645, "theoretical_loss": 3.3399681516414175, "tokens_seen": 3111387136 }, { "epoch": 0.89, "learning_rate": 5.769076466340368e-05, "loss": 0.0683, "theoretical_loss": 3.339957505315298, "tokens_seen": 3111518208 }, { "epoch": 0.89, "learning_rate": 5.765064591189922e-05, "loss": 0.0632, "theoretical_loss": 3.33994685956321, "tokens_seen": 3111649280 }, { "epoch": 0.89, "learning_rate": 5.761052716039477e-05, "loss": 0.0629, "theoretical_loss": 3.339936214385098, "tokens_seen": 3111780352 }, { "epoch": 0.89, "learning_rate": 5.757040840889032e-05, "loss": 0.0603, "theoretical_loss": 3.3399255697809074, "tokens_seen": 3111911424 }, { "epoch": 0.89, "learning_rate": 5.753028965738586e-05, "loss": 0.0666, "theoretical_loss": 3.3399149257505822, "tokens_seen": 3112042496 }, { "epoch": 0.89, "learning_rate": 5.7490170905881414e-05, "loss": 0.0648, "theoretical_loss": 3.3399042822940688, "tokens_seen": 3112173568 }, { "epoch": 0.89, "learning_rate": 5.745005215437696e-05, "loss": 0.0632, "theoretical_loss": 3.3398936394113106, "tokens_seen": 3112304640 }, { "epoch": 0.89, "learning_rate": 5.74099334028725e-05, "loss": 0.0644, "theoretical_loss": 3.339882997102253, "tokens_seen": 3112435712 }, { "epoch": 0.89, "learning_rate": 5.736981465136805e-05, "loss": 0.065, "theoretical_loss": 3.339872355366841, "tokens_seen": 3112566784 }, { "epoch": 0.89, "learning_rate": 5.732969589986359e-05, "loss": 0.0641, "theoretical_loss": 3.3398617142050195, "tokens_seen": 3112697856 }, { "epoch": 0.89, "learning_rate": 5.728957714835915e-05, "loss": 0.0648, "theoretical_loss": 3.339851073616734, "tokens_seen": 3112828928 }, { "epoch": 0.89, "learning_rate": 5.7249458396854695e-05, "loss": 0.0661, "theoretical_loss": 3.3398404336019283, "tokens_seen": 3112960000 }, { "epoch": 0.89, "learning_rate": 5.7209339645350236e-05, "loss": 0.0638, "theoretical_loss": 3.3398297941605484, "tokens_seen": 3113091072 }, { "epoch": 0.89, "learning_rate": 5.7169220893845784e-05, "loss": 0.0648, "theoretical_loss": 3.3398191552925383, "tokens_seen": 3113222144 }, { "epoch": 0.89, "learning_rate": 5.7129102142341326e-05, "loss": 0.0632, "theoretical_loss": 3.339808516997844, "tokens_seen": 3113353216 }, { "epoch": 0.89, "learning_rate": 5.708898339083688e-05, "loss": 0.0623, "theoretical_loss": 3.33979787927641, "tokens_seen": 3113484288 }, { "epoch": 0.89, "learning_rate": 5.704886463933243e-05, "loss": 0.0649, "theoretical_loss": 3.3397872421281805, "tokens_seen": 3113615360 }, { "epoch": 0.89, "learning_rate": 5.700874588782797e-05, "loss": 0.0639, "theoretical_loss": 3.3397766055531015, "tokens_seen": 3113746432 }, { "epoch": 0.89, "learning_rate": 5.696862713632352e-05, "loss": 0.0614, "theoretical_loss": 3.339765969551118, "tokens_seen": 3113877504 }, { "epoch": 0.89, "learning_rate": 5.692850838481906e-05, "loss": 0.0644, "theoretical_loss": 3.3397553341221746, "tokens_seen": 3114008576 }, { "epoch": 0.89, "learning_rate": 5.6888389633314614e-05, "loss": 0.0691, "theoretical_loss": 3.339744699266216, "tokens_seen": 3114139648 }, { "epoch": 0.89, "learning_rate": 5.684827088181016e-05, "loss": 0.0662, "theoretical_loss": 3.3397340649831877, "tokens_seen": 3114270720 }, { "epoch": 0.89, "learning_rate": 5.6808152130305703e-05, "loss": 0.0644, "theoretical_loss": 3.339723431273035, "tokens_seen": 3114401792 }, { "epoch": 0.89, "learning_rate": 5.676803337880125e-05, "loss": 0.068, "theoretical_loss": 3.3397127981357024, "tokens_seen": 3114532864 }, { "epoch": 0.89, "objective/train/advantage_avg": 0.0012461742153391242, "objective/train/docs_used": 1131665, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.347029209136963, "objective/train/original_loss": 1.3470290899276733, "objective/train/theoretical_loss": 3.339702165571135, "objective/train/tokens_used": 1485188576, "objective/train/value_avg": -0.00799560546875, "objective/train/value_loss": 0.00019136874470859766, "objective/train/value_max": -2.3365020751953125e-05, "objective/train/value_min": -0.29443359375, "objective/train/value_reward_corr": 0.786960842850529, "objective/train/value_std": 0.017974853515625, "objective/train/weight_avg": 1.0013327598571777, "objective/train/weighted_lm_loss": 1.3490720987319946, "objective/train/weights_max": 1.153391718864441, "objective/train/weights_min": 0.3725000023841858, "theoretical_loss": 3.339702165571135, "tokens_seen": 3114663936 }, { "epoch": 0.89, "learning_rate": 5.67279146272968e-05, "loss": 0.0645, "theoretical_loss": 3.339702165571135, "tokens_seen": 3114663936 }, { "epoch": 0.89, "learning_rate": 5.668779587579235e-05, "loss": 0.0634, "theoretical_loss": 3.339691533579278, "tokens_seen": 3114795008 }, { "epoch": 0.89, "learning_rate": 5.6647677124287896e-05, "loss": 0.0673, "theoretical_loss": 3.3396809021600764, "tokens_seen": 3114926080 }, { "epoch": 0.89, "learning_rate": 5.660755837278344e-05, "loss": 0.0599, "theoretical_loss": 3.3396702713134756, "tokens_seen": 3115057152 }, { "epoch": 0.89, "learning_rate": 5.6567439621278985e-05, "loss": 0.0639, "theoretical_loss": 3.33965964103942, "tokens_seen": 3115188224 }, { "epoch": 0.89, "learning_rate": 5.652732086977454e-05, "loss": 0.0618, "theoretical_loss": 3.3396490113378547, "tokens_seen": 3115319296 }, { "epoch": 0.89, "learning_rate": 5.648720211827008e-05, "loss": 0.0686, "theoretical_loss": 3.339638382208726, "tokens_seen": 3115450368 }, { "epoch": 0.89, "learning_rate": 5.644708336676563e-05, "loss": 0.0629, "theoretical_loss": 3.3396277536519774, "tokens_seen": 3115581440 }, { "epoch": 0.89, "learning_rate": 5.640696461526117e-05, "loss": 0.0649, "theoretical_loss": 3.3396171256675546, "tokens_seen": 3115712512 }, { "epoch": 0.89, "learning_rate": 5.636684586375672e-05, "loss": 0.0675, "theoretical_loss": 3.339606498255403, "tokens_seen": 3115843584 }, { "epoch": 0.89, "learning_rate": 5.632672711225227e-05, "loss": 0.0657, "theoretical_loss": 3.339595871415468, "tokens_seen": 3115974656 }, { "epoch": 0.89, "learning_rate": 5.6286608360747815e-05, "loss": 0.0681, "theoretical_loss": 3.3395852451476937, "tokens_seen": 3116105728 }, { "epoch": 0.89, "learning_rate": 5.624648960924336e-05, "loss": 0.0634, "theoretical_loss": 3.3395746194520255, "tokens_seen": 3116236800 }, { "epoch": 0.89, "learning_rate": 5.620637085773891e-05, "loss": 0.062, "theoretical_loss": 3.339563994328409, "tokens_seen": 3116367872 }, { "epoch": 0.89, "learning_rate": 5.616625210623445e-05, "loss": 0.0639, "theoretical_loss": 3.3395533697767896, "tokens_seen": 3116498944 }, { "epoch": 0.89, "learning_rate": 5.612613335473001e-05, "loss": 0.0654, "theoretical_loss": 3.339542745797112, "tokens_seen": 3116630016 }, { "epoch": 0.89, "learning_rate": 5.608601460322555e-05, "loss": 0.0643, "theoretical_loss": 3.3395321223893206, "tokens_seen": 3116761088 }, { "epoch": 0.89, "learning_rate": 5.6045895851721096e-05, "loss": 0.065, "theoretical_loss": 3.3395214995533617, "tokens_seen": 3116892160 }, { "epoch": 0.89, "learning_rate": 5.6005777100216644e-05, "loss": 0.0629, "theoretical_loss": 3.33951087728918, "tokens_seen": 3117023232 }, { "epoch": 0.89, "learning_rate": 5.5965658348712186e-05, "loss": 0.0683, "theoretical_loss": 3.339500255596721, "tokens_seen": 3117154304 }, { "epoch": 0.89, "learning_rate": 5.592553959720774e-05, "loss": 0.0616, "theoretical_loss": 3.3394896344759295, "tokens_seen": 3117285376 }, { "epoch": 0.89, "learning_rate": 5.588542084570328e-05, "loss": 0.0651, "theoretical_loss": 3.339479013926751, "tokens_seen": 3117416448 }, { "epoch": 0.89, "learning_rate": 5.584530209419883e-05, "loss": 0.0648, "theoretical_loss": 3.3394683939491308, "tokens_seen": 3117547520 }, { "epoch": 0.89, "learning_rate": 5.580518334269438e-05, "loss": 0.0647, "theoretical_loss": 3.3394577745430136, "tokens_seen": 3117678592 }, { "epoch": 0.89, "learning_rate": 5.576506459118992e-05, "loss": 0.0642, "theoretical_loss": 3.3394471557083447, "tokens_seen": 3117809664 }, { "epoch": 0.89, "objective/train/advantage_avg": 4.530885234999005e-06, "objective/train/docs_used": 1132901, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2882665395736694, "objective/train/original_loss": 1.2882664203643799, "objective/train/theoretical_loss": 3.33943653744507, "objective/train/tokens_used": 1488465376, "objective/train/value_avg": -0.0103912353515625, "objective/train/value_loss": 0.0003230677102692425, "objective/train/value_max": -2.5451183319091797e-05, "objective/train/value_min": -0.66552734375, "objective/train/value_reward_corr": 0.824176312825324, "objective/train/value_std": 0.025604248046875, "objective/train/weight_avg": 1.0001542568206787, "objective/train/weighted_lm_loss": 1.2888431549072266, "objective/train/weights_max": 1.8980669975280762, "objective/train/weights_min": 0.38783547282218933, "theoretical_loss": 3.33943653744507, "tokens_seen": 3117940736 }, { "epoch": 0.89, "learning_rate": 5.5724945839685474e-05, "loss": 0.0658, "theoretical_loss": 3.33943653744507, "tokens_seen": 3117940736 }, { "epoch": 0.89, "learning_rate": 5.5684827088181015e-05, "loss": 0.0639, "theoretical_loss": 3.3394259197531335, "tokens_seen": 3118071808 }, { "epoch": 0.89, "learning_rate": 5.564470833667656e-05, "loss": 0.0682, "theoretical_loss": 3.339415302632482, "tokens_seen": 3118202880 }, { "epoch": 0.89, "learning_rate": 5.560458958517211e-05, "loss": 0.0612, "theoretical_loss": 3.3394046860830597, "tokens_seen": 3118333952 }, { "epoch": 0.89, "learning_rate": 5.556447083366765e-05, "loss": 0.0672, "theoretical_loss": 3.339394070104812, "tokens_seen": 3118465024 }, { "epoch": 0.89, "learning_rate": 5.552435208216321e-05, "loss": 0.066, "theoretical_loss": 3.3393834546976846, "tokens_seen": 3118596096 }, { "epoch": 0.89, "learning_rate": 5.5484233330658756e-05, "loss": 0.0662, "theoretical_loss": 3.339372839861622, "tokens_seen": 3118727168 }, { "epoch": 0.89, "learning_rate": 5.54441145791543e-05, "loss": 0.0672, "theoretical_loss": 3.3393622255965703, "tokens_seen": 3118858240 }, { "epoch": 0.89, "learning_rate": 5.5403995827649845e-05, "loss": 0.0664, "theoretical_loss": 3.339351611902474, "tokens_seen": 3118989312 }, { "epoch": 0.89, "learning_rate": 5.5363877076145386e-05, "loss": 0.0661, "theoretical_loss": 3.3393409987792793, "tokens_seen": 3119120384 }, { "epoch": 0.89, "learning_rate": 5.532375832464094e-05, "loss": 0.0653, "theoretical_loss": 3.339330386226931, "tokens_seen": 3119251456 }, { "epoch": 0.89, "learning_rate": 5.528363957313649e-05, "loss": 0.0682, "theoretical_loss": 3.339319774245374, "tokens_seen": 3119382528 }, { "epoch": 0.89, "learning_rate": 5.524352082163203e-05, "loss": 0.0675, "theoretical_loss": 3.3393091628345544, "tokens_seen": 3119513600 }, { "epoch": 0.89, "learning_rate": 5.520340207012758e-05, "loss": 0.0666, "theoretical_loss": 3.339298551994417, "tokens_seen": 3119644672 }, { "epoch": 0.89, "learning_rate": 5.516328331862312e-05, "loss": 0.0649, "theoretical_loss": 3.3392879417249075, "tokens_seen": 3119775744 }, { "epoch": 0.89, "learning_rate": 5.5123164567118674e-05, "loss": 0.0668, "theoretical_loss": 3.339277332025971, "tokens_seen": 3119906816 }, { "epoch": 0.89, "learning_rate": 5.508304581561422e-05, "loss": 0.0669, "theoretical_loss": 3.3392667228975528, "tokens_seen": 3120037888 }, { "epoch": 0.89, "learning_rate": 5.5042927064109764e-05, "loss": 0.069, "theoretical_loss": 3.3392561143395985, "tokens_seen": 3120168960 }, { "epoch": 0.89, "learning_rate": 5.500280831260531e-05, "loss": 0.0709, "theoretical_loss": 3.339245506352053, "tokens_seen": 3120300032 }, { "epoch": 0.89, "learning_rate": 5.496268956110085e-05, "loss": 0.0654, "theoretical_loss": 3.339234898934863, "tokens_seen": 3120431104 }, { "epoch": 0.89, "learning_rate": 5.492257080959641e-05, "loss": 0.0683, "theoretical_loss": 3.339224292087972, "tokens_seen": 3120562176 }, { "epoch": 0.89, "learning_rate": 5.4882452058091956e-05, "loss": 0.0645, "theoretical_loss": 3.339213685811326, "tokens_seen": 3120693248 }, { "epoch": 0.89, "learning_rate": 5.48423333065875e-05, "loss": 0.0693, "theoretical_loss": 3.339203080104871, "tokens_seen": 3120824320 }, { "epoch": 0.89, "learning_rate": 5.4802214555083045e-05, "loss": 0.0649, "theoretical_loss": 3.339192474968552, "tokens_seen": 3120955392 }, { "epoch": 0.89, "learning_rate": 5.4762095803578593e-05, "loss": 0.0634, "theoretical_loss": 3.3391818704023146, "tokens_seen": 3121086464 }, { "epoch": 0.89, "objective/train/advantage_avg": 0.0010572571773082018, "objective/train/docs_used": 1133992, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2817366123199463, "objective/train/original_loss": 1.2817363739013672, "objective/train/theoretical_loss": 3.3391712664061037, "objective/train/tokens_used": 1491742176, "objective/train/value_avg": -0.007259368896484375, "objective/train/value_loss": 0.0001490499562351033, "objective/train/value_max": -3.916025161743164e-05, "objective/train/value_min": -0.2391357421875, "objective/train/value_reward_corr": 0.7399782498451701, "objective/train/value_std": 0.01389312744140625, "objective/train/weight_avg": 1.001127004623413, "objective/train/weighted_lm_loss": 1.282719373703003, "objective/train/weights_max": 1.1131260395050049, "objective/train/weights_min": 0.3696533143520355, "theoretical_loss": 3.3391712664061037, "tokens_seen": 3121217536 }, { "epoch": 0.89, "learning_rate": 5.472197705207414e-05, "loss": 0.0652, "theoretical_loss": 3.3391712664061037, "tokens_seen": 3121217536 }, { "epoch": 0.89, "learning_rate": 5.468185830056969e-05, "loss": 0.0668, "theoretical_loss": 3.339160662979866, "tokens_seen": 3121348608 }, { "epoch": 0.89, "learning_rate": 5.464173954906523e-05, "loss": 0.0635, "theoretical_loss": 3.3391500601235453, "tokens_seen": 3121479680 }, { "epoch": 0.89, "learning_rate": 5.460162079756078e-05, "loss": 0.0701, "theoretical_loss": 3.3391394578370877, "tokens_seen": 3121610752 }, { "epoch": 0.89, "learning_rate": 5.4561502046056334e-05, "loss": 0.0658, "theoretical_loss": 3.339128856120439, "tokens_seen": 3121741824 }, { "epoch": 0.89, "learning_rate": 5.4521383294551875e-05, "loss": 0.0621, "theoretical_loss": 3.3391182549735445, "tokens_seen": 3121872896 }, { "epoch": 0.89, "learning_rate": 5.448126454304742e-05, "loss": 0.064, "theoretical_loss": 3.339107654396349, "tokens_seen": 3122003968 }, { "epoch": 0.89, "learning_rate": 5.4441145791542964e-05, "loss": 0.072, "theoretical_loss": 3.339097054388799, "tokens_seen": 3122135040 }, { "epoch": 0.89, "learning_rate": 5.440102704003851e-05, "loss": 0.0685, "theoretical_loss": 3.3390864549508392, "tokens_seen": 3122266112 }, { "epoch": 0.89, "learning_rate": 5.436090828853407e-05, "loss": 0.068, "theoretical_loss": 3.3390758560824154, "tokens_seen": 3122397184 }, { "epoch": 0.89, "learning_rate": 5.432078953702961e-05, "loss": 0.0686, "theoretical_loss": 3.3390652577834734, "tokens_seen": 3122528256 }, { "epoch": 0.89, "learning_rate": 5.428067078552516e-05, "loss": 0.0644, "theoretical_loss": 3.3390546600539577, "tokens_seen": 3122659328 }, { "epoch": 0.89, "learning_rate": 5.4240552034020705e-05, "loss": 0.0652, "theoretical_loss": 3.339044062893815, "tokens_seen": 3122790400 }, { "epoch": 0.89, "learning_rate": 5.4200433282516246e-05, "loss": 0.0676, "theoretical_loss": 3.33903346630299, "tokens_seen": 3122921472 }, { "epoch": 0.89, "learning_rate": 5.41603145310118e-05, "loss": 0.0658, "theoretical_loss": 3.3390228702814286, "tokens_seen": 3123052544 }, { "epoch": 0.89, "learning_rate": 5.412019577950734e-05, "loss": 0.0665, "theoretical_loss": 3.3390122748290763, "tokens_seen": 3123183616 }, { "epoch": 0.89, "learning_rate": 5.408007702800289e-05, "loss": 0.0735, "theoretical_loss": 3.3390016799458784, "tokens_seen": 3123314688 }, { "epoch": 0.89, "learning_rate": 5.403995827649844e-05, "loss": 0.0644, "theoretical_loss": 3.338991085631781, "tokens_seen": 3123445760 }, { "epoch": 0.89, "learning_rate": 5.399983952499398e-05, "loss": 0.0677, "theoretical_loss": 3.3389804918867285, "tokens_seen": 3123576832 }, { "epoch": 0.89, "learning_rate": 5.3959720773489534e-05, "loss": 0.0674, "theoretical_loss": 3.3389698987106673, "tokens_seen": 3123707904 }, { "epoch": 0.89, "learning_rate": 5.3919602021985076e-05, "loss": 0.0653, "theoretical_loss": 3.3389593061035434, "tokens_seen": 3123838976 }, { "epoch": 0.89, "learning_rate": 5.3879483270480624e-05, "loss": 0.0664, "theoretical_loss": 3.338948714065302, "tokens_seen": 3123970048 }, { "epoch": 0.89, "learning_rate": 5.383936451897617e-05, "loss": 0.0679, "theoretical_loss": 3.338938122595888, "tokens_seen": 3124101120 }, { "epoch": 0.89, "learning_rate": 5.379924576747171e-05, "loss": 0.065, "theoretical_loss": 3.3389275316952474, "tokens_seen": 3124232192 }, { "epoch": 0.89, "learning_rate": 5.375912701596727e-05, "loss": 0.0627, "theoretical_loss": 3.338916941363326, "tokens_seen": 3124363264 }, { "epoch": 0.89, "objective/train/advantage_avg": -0.00043085598736070096, "objective/train/docs_used": 1135223, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3350822925567627, "objective/train/original_loss": 1.3350821733474731, "objective/train/theoretical_loss": 3.3389063516000697, "objective/train/tokens_used": 1495018976, "objective/train/value_avg": -0.01036834716796875, "objective/train/value_loss": 0.00045806754496879876, "objective/train/value_max": -4.947185516357422e-05, "objective/train/value_min": -0.81591796875, "objective/train/value_reward_corr": 0.7178445065800154, "objective/train/value_std": 0.0217132568359375, "objective/train/weight_avg": 0.9997718334197998, "objective/train/weighted_lm_loss": 1.3338640928268433, "objective/train/weights_max": 1.5731110572814941, "objective/train/weights_min": 0.382114052772522, "theoretical_loss": 3.3389063516000697, "tokens_seen": 3124494336 }, { "epoch": 0.89, "learning_rate": 5.3719008264462816e-05, "loss": 0.0651, "theoretical_loss": 3.3389063516000697, "tokens_seen": 3124494336 }, { "epoch": 0.89, "learning_rate": 5.367888951295836e-05, "loss": 0.0664, "theoretical_loss": 3.3388957624054236, "tokens_seen": 3124625408 }, { "epoch": 0.89, "learning_rate": 5.3638770761453905e-05, "loss": 0.0651, "theoretical_loss": 3.3388851737793335, "tokens_seen": 3124756480 }, { "epoch": 0.89, "learning_rate": 5.3598652009949447e-05, "loss": 0.0602, "theoretical_loss": 3.338874585721745, "tokens_seen": 3124887552 }, { "epoch": 0.89, "learning_rate": 5.3558533258445e-05, "loss": 0.0637, "theoretical_loss": 3.338863998232603, "tokens_seen": 3125018624 }, { "epoch": 0.89, "learning_rate": 5.351841450694055e-05, "loss": 0.0628, "theoretical_loss": 3.3388534113118546, "tokens_seen": 3125149696 }, { "epoch": 0.89, "learning_rate": 5.347829575543609e-05, "loss": 0.0693, "theoretical_loss": 3.338842824959445, "tokens_seen": 3125280768 }, { "epoch": 0.89, "learning_rate": 5.343817700393164e-05, "loss": 0.0642, "theoretical_loss": 3.3388322391753187, "tokens_seen": 3125411840 }, { "epoch": 0.89, "learning_rate": 5.339805825242718e-05, "loss": 0.0627, "theoretical_loss": 3.338821653959423, "tokens_seen": 3125542912 }, { "epoch": 0.89, "learning_rate": 5.3357939500922735e-05, "loss": 0.0651, "theoretical_loss": 3.338811069311702, "tokens_seen": 3125673984 }, { "epoch": 0.89, "learning_rate": 5.331782074941828e-05, "loss": 0.0624, "theoretical_loss": 3.3388004852321025, "tokens_seen": 3125805056 }, { "epoch": 0.89, "learning_rate": 5.3277701997913824e-05, "loss": 0.0677, "theoretical_loss": 3.33878990172057, "tokens_seen": 3125936128 }, { "epoch": 0.89, "learning_rate": 5.323758324640937e-05, "loss": 0.0696, "theoretical_loss": 3.33877931877705, "tokens_seen": 3126067200 }, { "epoch": 0.89, "learning_rate": 5.3197464494904914e-05, "loss": 0.0673, "theoretical_loss": 3.338768736401488, "tokens_seen": 3126198272 }, { "epoch": 0.89, "learning_rate": 5.315734574340047e-05, "loss": 0.0669, "theoretical_loss": 3.3387581545938305, "tokens_seen": 3126329344 }, { "epoch": 0.89, "learning_rate": 5.3117226991896016e-05, "loss": 0.0637, "theoretical_loss": 3.338747573354022, "tokens_seen": 3126460416 }, { "epoch": 0.89, "learning_rate": 5.307710824039156e-05, "loss": 0.0672, "theoretical_loss": 3.3387369926820094, "tokens_seen": 3126591488 }, { "epoch": 0.89, "learning_rate": 5.3036989488887106e-05, "loss": 0.0632, "theoretical_loss": 3.3387264125777376, "tokens_seen": 3126722560 }, { "epoch": 0.9, "learning_rate": 5.2996870737382654e-05, "loss": 0.0656, "theoretical_loss": 3.338715833041153, "tokens_seen": 3126853632 }, { "epoch": 0.9, "learning_rate": 5.29567519858782e-05, "loss": 0.0641, "theoretical_loss": 3.3387052540722006, "tokens_seen": 3126984704 }, { "epoch": 0.9, "learning_rate": 5.291663323437375e-05, "loss": 0.0623, "theoretical_loss": 3.338694675670827, "tokens_seen": 3127115776 }, { "epoch": 0.9, "learning_rate": 5.287651448286929e-05, "loss": 0.0643, "theoretical_loss": 3.3386840978369774, "tokens_seen": 3127246848 }, { "epoch": 0.9, "learning_rate": 5.283639573136484e-05, "loss": 0.0624, "theoretical_loss": 3.3386735205705977, "tokens_seen": 3127377920 }, { "epoch": 0.9, "learning_rate": 5.279627697986039e-05, "loss": 0.0667, "theoretical_loss": 3.3386629438716335, "tokens_seen": 3127508992 }, { "epoch": 0.9, "learning_rate": 5.2756158228355935e-05, "loss": 0.0645, "theoretical_loss": 3.338652367740031, "tokens_seen": 3127640064 }, { "epoch": 0.9, "objective/train/advantage_avg": -0.00014268378436099738, "objective/train/docs_used": 1136308, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.444013237953186, "objective/train/original_loss": 1.4440133571624756, "objective/train/theoretical_loss": 3.3386417921757356, "objective/train/tokens_used": 1498295776, "objective/train/value_avg": -0.0087127685546875, "objective/train/value_loss": 0.00013681019481737167, "objective/train/value_max": -1.9073486328125e-05, "objective/train/value_min": -0.20166015625, "objective/train/value_reward_corr": 0.8200716321043808, "objective/train/value_std": 0.01580810546875, "objective/train/weight_avg": 0.9999222159385681, "objective/train/weighted_lm_loss": 1.442944049835205, "objective/train/weights_max": 1.1450825929641724, "objective/train/weights_min": 0.4142749011516571, "theoretical_loss": 3.3386417921757356, "tokens_seen": 3127771136 }, { "epoch": 0.9, "learning_rate": 5.2716039476851484e-05, "loss": 0.0725, "theoretical_loss": 3.3386417921757356, "tokens_seen": 3127771136 }, { "epoch": 0.9, "learning_rate": 5.2675920725347025e-05, "loss": 0.0639, "theoretical_loss": 3.338631217178693, "tokens_seen": 3127902208 }, { "epoch": 0.9, "learning_rate": 5.263580197384257e-05, "loss": 0.0622, "theoretical_loss": 3.3386206427488494, "tokens_seen": 3128033280 }, { "epoch": 0.9, "learning_rate": 5.259568322233812e-05, "loss": 0.065, "theoretical_loss": 3.3386100688861506, "tokens_seen": 3128164352 }, { "epoch": 0.9, "learning_rate": 5.255556447083367e-05, "loss": 0.0664, "theoretical_loss": 3.3385994955905423, "tokens_seen": 3128295424 }, { "epoch": 0.9, "learning_rate": 5.251544571932922e-05, "loss": 0.0661, "theoretical_loss": 3.33858892286197, "tokens_seen": 3128426496 }, { "epoch": 0.9, "learning_rate": 5.247532696782476e-05, "loss": 0.0646, "theoretical_loss": 3.3385783507003803, "tokens_seen": 3128557568 }, { "epoch": 0.9, "learning_rate": 5.2435208216320306e-05, "loss": 0.0647, "theoretical_loss": 3.338567779105718, "tokens_seen": 3128688640 }, { "epoch": 0.9, "learning_rate": 5.239508946481586e-05, "loss": 0.068, "theoretical_loss": 3.3385572080779298, "tokens_seen": 3128819712 }, { "epoch": 0.9, "learning_rate": 5.23549707133114e-05, "loss": 0.0622, "theoretical_loss": 3.3385466376169615, "tokens_seen": 3128950784 }, { "epoch": 0.9, "learning_rate": 5.231485196180695e-05, "loss": 0.0638, "theoretical_loss": 3.3385360677227585, "tokens_seen": 3129081856 }, { "epoch": 0.9, "learning_rate": 5.22747332103025e-05, "loss": 0.0631, "theoretical_loss": 3.338525498395267, "tokens_seen": 3129212928 }, { "epoch": 0.9, "learning_rate": 5.223461445879804e-05, "loss": 0.0652, "theoretical_loss": 3.338514929634433, "tokens_seen": 3129344000 }, { "epoch": 0.9, "learning_rate": 5.2194495707293595e-05, "loss": 0.0648, "theoretical_loss": 3.338504361440202, "tokens_seen": 3129475072 }, { "epoch": 0.9, "learning_rate": 5.2154376955789136e-05, "loss": 0.0667, "theoretical_loss": 3.3384937938125203, "tokens_seen": 3129606144 }, { "epoch": 0.9, "learning_rate": 5.2114258204284684e-05, "loss": 0.0649, "theoretical_loss": 3.3384832267513334, "tokens_seen": 3129737216 }, { "epoch": 0.9, "learning_rate": 5.207413945278023e-05, "loss": 0.0661, "theoretical_loss": 3.338472660256587, "tokens_seen": 3129868288 }, { "epoch": 0.9, "learning_rate": 5.2034020701275773e-05, "loss": 0.0649, "theoretical_loss": 3.338462094328228, "tokens_seen": 3129999360 }, { "epoch": 0.9, "learning_rate": 5.199390194977133e-05, "loss": 0.0662, "theoretical_loss": 3.3384515289662016, "tokens_seen": 3130130432 }, { "epoch": 0.9, "learning_rate": 5.195378319826687e-05, "loss": 0.0648, "theoretical_loss": 3.338440964170454, "tokens_seen": 3130261504 }, { "epoch": 0.9, "learning_rate": 5.191366444676242e-05, "loss": 0.0668, "theoretical_loss": 3.338430399940931, "tokens_seen": 3130392576 }, { "epoch": 0.9, "learning_rate": 5.1873545695257966e-05, "loss": 0.0658, "theoretical_loss": 3.338419836277578, "tokens_seen": 3130523648 }, { "epoch": 0.9, "learning_rate": 5.183342694375351e-05, "loss": 0.0632, "theoretical_loss": 3.338409273180342, "tokens_seen": 3130654720 }, { "epoch": 0.9, "learning_rate": 5.179330819224906e-05, "loss": 0.0632, "theoretical_loss": 3.338398710649168, "tokens_seen": 3130785792 }, { "epoch": 0.9, "learning_rate": 5.175318944074461e-05, "loss": 0.0694, "theoretical_loss": 3.338388148684003, "tokens_seen": 3130916864 }, { "epoch": 0.9, "objective/train/advantage_avg": 0.0007294545648619533, "objective/train/docs_used": 1137606, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.237131953239441, "objective/train/original_loss": 1.2371318340301514, "objective/train/theoretical_loss": 3.3383775872847923, "objective/train/tokens_used": 1501572576, "objective/train/value_avg": -0.006778717041015625, "objective/train/value_loss": 0.00010735885007306933, "objective/train/value_max": -3.11732292175293e-05, "objective/train/value_min": -0.2149658203125, "objective/train/value_reward_corr": 0.7595226229100402, "objective/train/value_std": 0.0117950439453125, "objective/train/weight_avg": 1.0007823705673218, "objective/train/weighted_lm_loss": 1.237913727760315, "objective/train/weights_max": 1.1265970468521118, "objective/train/weights_min": 0.8036206960678101, "theoretical_loss": 3.3383775872847923, "tokens_seen": 3131047936 }, { "epoch": 0.9, "learning_rate": 5.171307068924015e-05, "loss": 0.0676, "theoretical_loss": 3.3383775872847923, "tokens_seen": 3131047936 }, { "epoch": 0.9, "learning_rate": 5.16729519377357e-05, "loss": 0.0623, "theoretical_loss": 3.3383670264514818, "tokens_seen": 3131179008 }, { "epoch": 0.9, "learning_rate": 5.163283318623124e-05, "loss": 0.0631, "theoretical_loss": 3.3383564661840177, "tokens_seen": 3131310080 }, { "epoch": 0.9, "learning_rate": 5.1592714434726795e-05, "loss": 0.065, "theoretical_loss": 3.3383459064823464, "tokens_seen": 3131441152 }, { "epoch": 0.9, "learning_rate": 5.155259568322234e-05, "loss": 0.0657, "theoretical_loss": 3.3383353473464132, "tokens_seen": 3131572224 }, { "epoch": 0.9, "learning_rate": 5.1512476931717885e-05, "loss": 0.0632, "theoretical_loss": 3.3383247887761645, "tokens_seen": 3131703296 }, { "epoch": 0.9, "learning_rate": 5.147235818021343e-05, "loss": 0.0651, "theoretical_loss": 3.3383142307715463, "tokens_seen": 3131834368 }, { "epoch": 0.9, "learning_rate": 5.1432239428708974e-05, "loss": 0.0632, "theoretical_loss": 3.338303673332504, "tokens_seen": 3131965440 }, { "epoch": 0.9, "learning_rate": 5.139212067720453e-05, "loss": 0.0659, "theoretical_loss": 3.338293116458985, "tokens_seen": 3132096512 }, { "epoch": 0.9, "learning_rate": 5.135200192570008e-05, "loss": 0.065, "theoretical_loss": 3.338282560150934, "tokens_seen": 3132227584 }, { "epoch": 0.9, "learning_rate": 5.131188317419562e-05, "loss": 0.0649, "theoretical_loss": 3.3382720044082976, "tokens_seen": 3132358656 }, { "epoch": 0.9, "learning_rate": 5.1271764422691166e-05, "loss": 0.0649, "theoretical_loss": 3.3382614492310223, "tokens_seen": 3132489728 }, { "epoch": 0.9, "learning_rate": 5.123164567118671e-05, "loss": 0.0678, "theoretical_loss": 3.3382508946190534, "tokens_seen": 3132620800 }, { "epoch": 0.9, "learning_rate": 5.119152691968226e-05, "loss": 0.0686, "theoretical_loss": 3.3382403405723373, "tokens_seen": 3132751872 }, { "epoch": 0.9, "learning_rate": 5.115140816817781e-05, "loss": 0.065, "theoretical_loss": 3.33822978709082, "tokens_seen": 3132882944 }, { "epoch": 0.9, "learning_rate": 5.111128941667335e-05, "loss": 0.0659, "theoretical_loss": 3.338219234174448, "tokens_seen": 3133014016 }, { "epoch": 0.9, "learning_rate": 5.10711706651689e-05, "loss": 0.0605, "theoretical_loss": 3.3382086818231667, "tokens_seen": 3133145088 }, { "epoch": 0.9, "learning_rate": 5.103105191366445e-05, "loss": 0.0621, "theoretical_loss": 3.3381981300369223, "tokens_seen": 3133276160 }, { "epoch": 0.9, "learning_rate": 5.0990933162159996e-05, "loss": 0.0672, "theoretical_loss": 3.3381875788156616, "tokens_seen": 3133407232 }, { "epoch": 0.9, "learning_rate": 5.0950814410655544e-05, "loss": 0.0637, "theoretical_loss": 3.33817702815933, "tokens_seen": 3133538304 }, { "epoch": 0.9, "learning_rate": 5.0910695659151085e-05, "loss": 0.0641, "theoretical_loss": 3.3381664780678744, "tokens_seen": 3133669376 }, { "epoch": 0.9, "learning_rate": 5.087057690764663e-05, "loss": 0.0679, "theoretical_loss": 3.33815592854124, "tokens_seen": 3133800448 }, { "epoch": 0.9, "learning_rate": 5.083045815614218e-05, "loss": 0.0657, "theoretical_loss": 3.338145379579373, "tokens_seen": 3133931520 }, { "epoch": 0.9, "learning_rate": 5.079033940463773e-05, "loss": 0.065, "theoretical_loss": 3.3381348311822205, "tokens_seen": 3134062592 }, { "epoch": 0.9, "learning_rate": 5.075022065313328e-05, "loss": 0.0661, "theoretical_loss": 3.338124283349728, "tokens_seen": 3134193664 }, { "epoch": 0.9, "objective/train/advantage_avg": 0.0006889239302836359, "objective/train/docs_used": 1138754, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2543506622314453, "objective/train/original_loss": 1.2543506622314453, "objective/train/theoretical_loss": 3.3381137360818416, "objective/train/tokens_used": 1504849376, "objective/train/value_avg": -0.007259368896484375, "objective/train/value_loss": 9.801622945815325e-05, "objective/train/value_max": -2.568960189819336e-05, "objective/train/value_min": -0.2069091796875, "objective/train/value_reward_corr": 0.7786969949174956, "objective/train/value_std": 0.0132293701171875, "objective/train/weight_avg": 1.0007376670837402, "objective/train/weighted_lm_loss": 1.2552211284637451, "objective/train/weights_max": 1.1807441711425781, "objective/train/weights_min": 0.7883899211883545, "theoretical_loss": 3.3381137360818416, "tokens_seen": 3134324736 }, { "epoch": 0.9, "learning_rate": 5.071010190162882e-05, "loss": 0.0629, "theoretical_loss": 3.3381137360818416, "tokens_seen": 3134324736 }, { "epoch": 0.9, "learning_rate": 5.066998315012437e-05, "loss": 0.0624, "theoretical_loss": 3.338103189378508, "tokens_seen": 3134455808 }, { "epoch": 0.9, "learning_rate": 5.0629864398619915e-05, "loss": 0.0621, "theoretical_loss": 3.3380926432396727, "tokens_seen": 3134586880 }, { "epoch": 0.9, "learning_rate": 5.058974564711546e-05, "loss": 0.0624, "theoretical_loss": 3.338082097665282, "tokens_seen": 3134717952 }, { "epoch": 0.9, "learning_rate": 5.054962689561101e-05, "loss": 0.0665, "theoretical_loss": 3.338071552655282, "tokens_seen": 3134849024 }, { "epoch": 0.9, "learning_rate": 5.050950814410656e-05, "loss": 0.0648, "theoretical_loss": 3.33806100820962, "tokens_seen": 3134980096 }, { "epoch": 0.9, "learning_rate": 5.04693893926021e-05, "loss": 0.0717, "theoretical_loss": 3.3380504643282407, "tokens_seen": 3135111168 }, { "epoch": 0.9, "learning_rate": 5.0429270641097655e-05, "loss": 0.064, "theoretical_loss": 3.338039921011091, "tokens_seen": 3135242240 }, { "epoch": 0.9, "learning_rate": 5.0389151889593196e-05, "loss": 0.0633, "theoretical_loss": 3.3380293782581174, "tokens_seen": 3135373312 }, { "epoch": 0.9, "learning_rate": 5.0349033138088744e-05, "loss": 0.0681, "theoretical_loss": 3.3380188360692657, "tokens_seen": 3135504384 }, { "epoch": 0.9, "learning_rate": 5.030891438658429e-05, "loss": 0.0666, "theoretical_loss": 3.3380082944444824, "tokens_seen": 3135635456 }, { "epoch": 0.9, "learning_rate": 5.0268795635079834e-05, "loss": 0.0647, "theoretical_loss": 3.3379977533837133, "tokens_seen": 3135766528 }, { "epoch": 0.9, "learning_rate": 5.022867688357539e-05, "loss": 0.0632, "theoretical_loss": 3.337987212886905, "tokens_seen": 3135897600 }, { "epoch": 0.9, "learning_rate": 5.018855813207093e-05, "loss": 0.0622, "theoretical_loss": 3.3379766729540035, "tokens_seen": 3136028672 }, { "epoch": 0.9, "learning_rate": 5.014843938056648e-05, "loss": 0.0656, "theoretical_loss": 3.3379661335849558, "tokens_seen": 3136159744 }, { "epoch": 0.9, "learning_rate": 5.0108320629062026e-05, "loss": 0.0673, "theoretical_loss": 3.3379555947797073, "tokens_seen": 3136290816 }, { "epoch": 0.9, "learning_rate": 5.006820187755757e-05, "loss": 0.0629, "theoretical_loss": 3.3379450565382047, "tokens_seen": 3136421888 }, { "epoch": 0.9, "learning_rate": 5.002808312605312e-05, "loss": 0.0624, "theoretical_loss": 3.337934518860394, "tokens_seen": 3136552960 }, { "epoch": 0.9, "learning_rate": 4.9987964374548663e-05, "loss": 0.0602, "theoretical_loss": 3.337923981746222, "tokens_seen": 3136684032 }, { "epoch": 0.9, "learning_rate": 4.994784562304421e-05, "loss": 0.0648, "theoretical_loss": 3.3379134451956345, "tokens_seen": 3136815104 }, { "epoch": 0.9, "learning_rate": 4.990772687153976e-05, "loss": 0.0646, "theoretical_loss": 3.3379029092085784, "tokens_seen": 3136946176 }, { "epoch": 0.9, "learning_rate": 4.98676081200353e-05, "loss": 0.0607, "theoretical_loss": 3.337892373784999, "tokens_seen": 3137077248 }, { "epoch": 0.9, "learning_rate": 4.9827489368530856e-05, "loss": 0.0656, "theoretical_loss": 3.3378818389248437, "tokens_seen": 3137208320 }, { "epoch": 0.9, "learning_rate": 4.9787370617026404e-05, "loss": 0.0622, "theoretical_loss": 3.337871304628058, "tokens_seen": 3137339392 }, { "epoch": 0.9, "learning_rate": 4.9747251865521945e-05, "loss": 0.0664, "theoretical_loss": 3.337860770894589, "tokens_seen": 3137470464 }, { "epoch": 0.9, "objective/train/advantage_avg": 0.0001839887409005314, "objective/train/docs_used": 1140026, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3836036920547485, "objective/train/original_loss": 1.383603572845459, "objective/train/theoretical_loss": 3.3378502377243824, "objective/train/tokens_used": 1508126176, "objective/train/value_avg": -0.00811767578125, "objective/train/value_loss": 0.000382947240723297, "objective/train/value_max": -3.170967102050781e-05, "objective/train/value_min": -0.98046875, "objective/train/value_reward_corr": 0.8541675327201566, "objective/train/value_std": 0.0295562744140625, "objective/train/weight_avg": 1.0003573894500732, "objective/train/weighted_lm_loss": 1.3843170404434204, "objective/train/weights_max": 1.5233733654022217, "objective/train/weights_min": 0.38269758224487305, "theoretical_loss": 3.3378502377243824, "tokens_seen": 3137601536 }, { "epoch": 0.9, "learning_rate": 4.970713311401749e-05, "loss": 0.067, "theoretical_loss": 3.3378502377243824, "tokens_seen": 3137601536 }, { "epoch": 0.9, "learning_rate": 4.9667014362513034e-05, "loss": 0.0649, "theoretical_loss": 3.3378397051173847, "tokens_seen": 3137732608 }, { "epoch": 0.9, "learning_rate": 4.962689561100859e-05, "loss": 0.0695, "theoretical_loss": 3.3378291730735428, "tokens_seen": 3137863680 }, { "epoch": 0.9, "learning_rate": 4.958677685950414e-05, "loss": 0.0607, "theoretical_loss": 3.3378186415928024, "tokens_seen": 3137994752 }, { "epoch": 0.9, "learning_rate": 4.954665810799968e-05, "loss": 0.0646, "theoretical_loss": 3.3378081106751103, "tokens_seen": 3138125824 }, { "epoch": 0.9, "learning_rate": 4.9506539356495227e-05, "loss": 0.0657, "theoretical_loss": 3.3377975803204123, "tokens_seen": 3138256896 }, { "epoch": 0.9, "learning_rate": 4.946642060499077e-05, "loss": 0.0609, "theoretical_loss": 3.3377870505286555, "tokens_seen": 3138387968 }, { "epoch": 0.9, "learning_rate": 4.942630185348632e-05, "loss": 0.0632, "theoretical_loss": 3.3377765212997863, "tokens_seen": 3138519040 }, { "epoch": 0.9, "learning_rate": 4.938618310198187e-05, "loss": 0.066, "theoretical_loss": 3.3377659926337504, "tokens_seen": 3138650112 }, { "epoch": 0.9, "learning_rate": 4.934606435047741e-05, "loss": 0.0603, "theoretical_loss": 3.3377554645304945, "tokens_seen": 3138781184 }, { "epoch": 0.9, "learning_rate": 4.930594559897296e-05, "loss": 0.0631, "theoretical_loss": 3.3377449369899654, "tokens_seen": 3138912256 }, { "epoch": 0.9, "learning_rate": 4.926582684746851e-05, "loss": 0.0627, "theoretical_loss": 3.337734410012109, "tokens_seen": 3139043328 }, { "epoch": 0.9, "learning_rate": 4.9225708095964056e-05, "loss": 0.0648, "theoretical_loss": 3.337723883596872, "tokens_seen": 3139174400 }, { "epoch": 0.9, "learning_rate": 4.9185589344459604e-05, "loss": 0.0608, "theoretical_loss": 3.337713357744201, "tokens_seen": 3139305472 }, { "epoch": 0.9, "learning_rate": 4.9145470592955146e-05, "loss": 0.0635, "theoretical_loss": 3.337702832454042, "tokens_seen": 3139436544 }, { "epoch": 0.9, "learning_rate": 4.9105351841450694e-05, "loss": 0.0641, "theoretical_loss": 3.337692307726342, "tokens_seen": 3139567616 }, { "epoch": 0.9, "learning_rate": 4.906523308994624e-05, "loss": 0.0664, "theoretical_loss": 3.337681783561047, "tokens_seen": 3139698688 }, { "epoch": 0.9, "learning_rate": 4.902511433844179e-05, "loss": 0.064, "theoretical_loss": 3.3376712599581038, "tokens_seen": 3139829760 }, { "epoch": 0.9, "learning_rate": 4.898499558693734e-05, "loss": 0.0645, "theoretical_loss": 3.3376607369174587, "tokens_seen": 3139960832 }, { "epoch": 0.9, "learning_rate": 4.894487683543288e-05, "loss": 0.0627, "theoretical_loss": 3.337650214439058, "tokens_seen": 3140091904 }, { "epoch": 0.9, "learning_rate": 4.890475808392843e-05, "loss": 0.0669, "theoretical_loss": 3.337639692522849, "tokens_seen": 3140222976 }, { "epoch": 0.9, "learning_rate": 4.8864639332423975e-05, "loss": 0.0634, "theoretical_loss": 3.337629171168777, "tokens_seen": 3140354048 }, { "epoch": 0.9, "learning_rate": 4.882452058091952e-05, "loss": 0.0603, "theoretical_loss": 3.3376186503767897, "tokens_seen": 3140485120 }, { "epoch": 0.9, "learning_rate": 4.878440182941507e-05, "loss": 0.0635, "theoretical_loss": 3.3376081301468323, "tokens_seen": 3140616192 }, { "epoch": 0.9, "learning_rate": 4.874428307791061e-05, "loss": 0.0666, "theoretical_loss": 3.3375976104788525, "tokens_seen": 3140747264 }, { "epoch": 0.9, "objective/train/advantage_avg": 0.0008113420335575938, "objective/train/docs_used": 1141163, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2303107976913452, "objective/train/original_loss": 1.2303106784820557, "objective/train/theoretical_loss": 3.3375870913727965, "objective/train/tokens_used": 1511402976, "objective/train/value_avg": -0.006992340087890625, "objective/train/value_loss": 0.00011325669038342312, "objective/train/value_max": -1.9371509552001953e-05, "objective/train/value_min": -0.9658203125, "objective/train/value_reward_corr": 0.8638190163641304, "objective/train/value_std": 0.0181427001953125, "objective/train/weight_avg": 1.0008670091629028, "objective/train/weighted_lm_loss": 1.2315560579299927, "objective/train/weights_max": 1.3383455276489258, "objective/train/weights_min": 0.5817304849624634, "theoretical_loss": 3.3375870913727965, "tokens_seen": 3140878336 }, { "epoch": 0.9, "learning_rate": 4.870416432640616e-05, "loss": 0.0634, "theoretical_loss": 3.3375870913727965, "tokens_seen": 3140878336 }, { "epoch": 0.9, "learning_rate": 4.866404557490171e-05, "loss": 0.062, "theoretical_loss": 3.3375765728286106, "tokens_seen": 3141009408 }, { "epoch": 0.9, "learning_rate": 4.862392682339726e-05, "loss": 0.0651, "theoretical_loss": 3.3375660548462416, "tokens_seen": 3141140480 }, { "epoch": 0.9, "learning_rate": 4.8583808071892805e-05, "loss": 0.0632, "theoretical_loss": 3.3375555374256356, "tokens_seen": 3141271552 }, { "epoch": 0.9, "learning_rate": 4.854368932038835e-05, "loss": 0.0661, "theoretical_loss": 3.33754502056674, "tokens_seen": 3141402624 }, { "epoch": 0.9, "learning_rate": 4.8503570568883894e-05, "loss": 0.0589, "theoretical_loss": 3.3375345042695, "tokens_seen": 3141533696 }, { "epoch": 0.9, "learning_rate": 4.846345181737945e-05, "loss": 0.0616, "theoretical_loss": 3.3375239885338637, "tokens_seen": 3141664768 }, { "epoch": 0.9, "learning_rate": 4.842333306587499e-05, "loss": 0.0637, "theoretical_loss": 3.3375134733597767, "tokens_seen": 3141795840 }, { "epoch": 0.9, "learning_rate": 4.838321431437054e-05, "loss": 0.0651, "theoretical_loss": 3.337502958747186, "tokens_seen": 3141926912 }, { "epoch": 0.9, "learning_rate": 4.8343095562866086e-05, "loss": 0.0616, "theoretical_loss": 3.337492444696038, "tokens_seen": 3142057984 }, { "epoch": 0.9, "learning_rate": 4.830297681136163e-05, "loss": 0.0676, "theoretical_loss": 3.3374819312062796, "tokens_seen": 3142189056 }, { "epoch": 0.9, "learning_rate": 4.826285805985718e-05, "loss": 0.0683, "theoretical_loss": 3.3374714182778566, "tokens_seen": 3142320128 }, { "epoch": 0.9, "learning_rate": 4.8222739308352724e-05, "loss": 0.0614, "theoretical_loss": 3.337460905910717, "tokens_seen": 3142451200 }, { "epoch": 0.9, "learning_rate": 4.818262055684827e-05, "loss": 0.0689, "theoretical_loss": 3.337450394104806, "tokens_seen": 3142582272 }, { "epoch": 0.9, "learning_rate": 4.814250180534382e-05, "loss": 0.0627, "theoretical_loss": 3.337439882860071, "tokens_seen": 3142713344 }, { "epoch": 0.9, "learning_rate": 4.810238305383936e-05, "loss": 0.0628, "theoretical_loss": 3.3374293721764587, "tokens_seen": 3142844416 }, { "epoch": 0.9, "learning_rate": 4.8062264302334916e-05, "loss": 0.0627, "theoretical_loss": 3.337418862053916, "tokens_seen": 3142975488 }, { "epoch": 0.9, "learning_rate": 4.8022145550830464e-05, "loss": 0.0632, "theoretical_loss": 3.337408352492388, "tokens_seen": 3143106560 }, { "epoch": 0.9, "learning_rate": 4.7982026799326005e-05, "loss": 0.0646, "theoretical_loss": 3.3373978434918232, "tokens_seen": 3143237632 }, { "epoch": 0.91, "learning_rate": 4.7941908047821553e-05, "loss": 0.0626, "theoretical_loss": 3.3373873350521674, "tokens_seen": 3143368704 }, { "epoch": 0.91, "learning_rate": 4.7901789296317095e-05, "loss": 0.0608, "theoretical_loss": 3.337376827173367, "tokens_seen": 3143499776 }, { "epoch": 0.91, "learning_rate": 4.786167054481265e-05, "loss": 0.0617, "theoretical_loss": 3.3373663198553696, "tokens_seen": 3143630848 }, { "epoch": 0.91, "learning_rate": 4.78215517933082e-05, "loss": 0.0614, "theoretical_loss": 3.337355813098121, "tokens_seen": 3143761920 }, { "epoch": 0.91, "learning_rate": 4.778143304180374e-05, "loss": 0.0632, "theoretical_loss": 3.3373453069015686, "tokens_seen": 3143892992 }, { "epoch": 0.91, "learning_rate": 4.774131429029929e-05, "loss": 0.0645, "theoretical_loss": 3.3373348012656585, "tokens_seen": 3144024064 }, { "epoch": 0.91, "objective/train/advantage_avg": -0.0007176327053457499, "objective/train/docs_used": 1142279, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2468386888504028, "objective/train/original_loss": 1.2468385696411133, "objective/train/theoretical_loss": 3.337324296190338, "objective/train/tokens_used": 1514679776, "objective/train/value_avg": -0.01171875, "objective/train/value_loss": 0.0003149180265609175, "objective/train/value_max": -6.300210952758789e-05, "objective/train/value_min": -0.81103515625, "objective/train/value_reward_corr": 0.7251921697742879, "objective/train/value_std": 0.0181732177734375, "objective/train/weight_avg": 0.999424397945404, "objective/train/weighted_lm_loss": 1.245508074760437, "objective/train/weights_max": 1.4532164335250854, "objective/train/weights_min": 0.3975783586502075, "theoretical_loss": 3.337324296190338, "tokens_seen": 3144155136 }, { "epoch": 0.91, "learning_rate": 4.770119553879483e-05, "loss": 0.066, "theoretical_loss": 3.337324296190338, "tokens_seen": 3144155136 }, { "epoch": 0.91, "learning_rate": 4.766107678729038e-05, "loss": 0.0643, "theoretical_loss": 3.3373137916755535, "tokens_seen": 3144286208 }, { "epoch": 0.91, "learning_rate": 4.762095803578593e-05, "loss": 0.0686, "theoretical_loss": 3.3373032877212516, "tokens_seen": 3144417280 }, { "epoch": 0.91, "learning_rate": 4.758083928428147e-05, "loss": 0.0629, "theoretical_loss": 3.3372927843273787, "tokens_seen": 3144548352 }, { "epoch": 0.91, "learning_rate": 4.754072053277702e-05, "loss": 0.0675, "theoretical_loss": 3.3372822814938825, "tokens_seen": 3144679424 }, { "epoch": 0.91, "learning_rate": 4.750060178127256e-05, "loss": 0.0667, "theoretical_loss": 3.3372717792207096, "tokens_seen": 3144810496 }, { "epoch": 0.91, "learning_rate": 4.7460483029768117e-05, "loss": 0.0622, "theoretical_loss": 3.337261277507806, "tokens_seen": 3144941568 }, { "epoch": 0.91, "learning_rate": 4.7420364278263665e-05, "loss": 0.0656, "theoretical_loss": 3.3372507763551185, "tokens_seen": 3145072640 }, { "epoch": 0.91, "learning_rate": 4.7380245526759206e-05, "loss": 0.0618, "theoretical_loss": 3.3372402757625945, "tokens_seen": 3145203712 }, { "epoch": 0.91, "learning_rate": 4.7340126775254754e-05, "loss": 0.0618, "theoretical_loss": 3.3372297757301808, "tokens_seen": 3145334784 }, { "epoch": 0.91, "learning_rate": 4.73000080237503e-05, "loss": 0.0629, "theoretical_loss": 3.337219276257824, "tokens_seen": 3145465856 }, { "epoch": 0.91, "learning_rate": 4.725988927224585e-05, "loss": 0.0649, "theoretical_loss": 3.33720877734547, "tokens_seen": 3145596928 }, { "epoch": 0.91, "learning_rate": 4.72197705207414e-05, "loss": 0.0587, "theoretical_loss": 3.3371982789930668, "tokens_seen": 3145728000 }, { "epoch": 0.91, "learning_rate": 4.717965176923694e-05, "loss": 0.0663, "theoretical_loss": 3.3371877812005613, "tokens_seen": 3145859072 }, { "epoch": 0.91, "learning_rate": 4.713953301773249e-05, "loss": 0.0657, "theoretical_loss": 3.3371772839678995, "tokens_seen": 3145990144 }, { "epoch": 0.91, "learning_rate": 4.7099414266228036e-05, "loss": 0.0635, "theoretical_loss": 3.337166787295028, "tokens_seen": 3146121216 }, { "epoch": 0.91, "learning_rate": 4.7059295514723584e-05, "loss": 0.0648, "theoretical_loss": 3.337156291181895, "tokens_seen": 3146252288 }, { "epoch": 0.91, "learning_rate": 4.701917676321913e-05, "loss": 0.0626, "theoretical_loss": 3.337145795628446, "tokens_seen": 3146383360 }, { "epoch": 0.91, "learning_rate": 4.697905801171467e-05, "loss": 0.065, "theoretical_loss": 3.3371353006346283, "tokens_seen": 3146514432 }, { "epoch": 0.91, "learning_rate": 4.693893926021022e-05, "loss": 0.0606, "theoretical_loss": 3.337124806200389, "tokens_seen": 3146645504 }, { "epoch": 0.91, "learning_rate": 4.689882050870577e-05, "loss": 0.0654, "theoretical_loss": 3.3371143123256743, "tokens_seen": 3146776576 }, { "epoch": 0.91, "learning_rate": 4.685870175720132e-05, "loss": 0.0661, "theoretical_loss": 3.337103819010432, "tokens_seen": 3146907648 }, { "epoch": 0.91, "learning_rate": 4.6818583005696865e-05, "loss": 0.0642, "theoretical_loss": 3.3370933262546085, "tokens_seen": 3147038720 }, { "epoch": 0.91, "learning_rate": 4.677846425419241e-05, "loss": 0.0662, "theoretical_loss": 3.33708283405815, "tokens_seen": 3147169792 }, { "epoch": 0.91, "learning_rate": 4.6738345502687955e-05, "loss": 0.0675, "theoretical_loss": 3.337072342421005, "tokens_seen": 3147300864 }, { "epoch": 0.91, "objective/train/advantage_avg": 0.00021485566685441881, "objective/train/docs_used": 1143525, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.1653653383255005, "objective/train/original_loss": 1.165365219116211, "objective/train/theoretical_loss": 3.337061851343119, "objective/train/tokens_used": 1517956576, "objective/train/value_avg": -0.00940704345703125, "objective/train/value_loss": 0.00019293001969344914, "objective/train/value_max": -2.9802322387695312e-05, "objective/train/value_min": -0.716796875, "objective/train/value_reward_corr": 0.87562644138771, "objective/train/value_std": 0.0253753662109375, "objective/train/weight_avg": 1.0003098249435425, "objective/train/weighted_lm_loss": 1.1653449535369873, "objective/train/weights_max": 1.8681029081344604, "objective/train/weights_min": 0.5569349527359009, "theoretical_loss": 3.337061851343119, "tokens_seen": 3147431936 }, { "epoch": 0.91, "learning_rate": 4.66982267511835e-05, "loss": 0.0626, "theoretical_loss": 3.337061851343119, "tokens_seen": 3147431936 }, { "epoch": 0.91, "learning_rate": 4.665810799967905e-05, "loss": 0.0687, "theoretical_loss": 3.3370513608244394, "tokens_seen": 3147563008 }, { "epoch": 0.91, "learning_rate": 4.66179892481746e-05, "loss": 0.0628, "theoretical_loss": 3.3370408708649126, "tokens_seen": 3147694080 }, { "epoch": 0.91, "learning_rate": 4.657787049667015e-05, "loss": 0.0631, "theoretical_loss": 3.3370303814644866, "tokens_seen": 3147825152 }, { "epoch": 0.91, "learning_rate": 4.653775174516569e-05, "loss": 0.0641, "theoretical_loss": 3.3370198926231076, "tokens_seen": 3147956224 }, { "epoch": 0.91, "learning_rate": 4.649763299366124e-05, "loss": 0.0623, "theoretical_loss": 3.337009404340722, "tokens_seen": 3148087296 }, { "epoch": 0.91, "learning_rate": 4.6457514242156784e-05, "loss": 0.0647, "theoretical_loss": 3.336998916617278, "tokens_seen": 3148218368 }, { "epoch": 0.91, "learning_rate": 4.641739549065233e-05, "loss": 0.0639, "theoretical_loss": 3.3369884294527217, "tokens_seen": 3148349440 }, { "epoch": 0.91, "learning_rate": 4.637727673914788e-05, "loss": 0.0673, "theoretical_loss": 3.336977942847, "tokens_seen": 3148480512 }, { "epoch": 0.91, "learning_rate": 4.633715798764342e-05, "loss": 0.0626, "theoretical_loss": 3.3369674568000605, "tokens_seen": 3148611584 }, { "epoch": 0.91, "learning_rate": 4.6297039236138976e-05, "loss": 0.064, "theoretical_loss": 3.3369569713118494, "tokens_seen": 3148742656 }, { "epoch": 0.91, "learning_rate": 4.625692048463452e-05, "loss": 0.0663, "theoretical_loss": 3.336946486382314, "tokens_seen": 3148873728 }, { "epoch": 0.91, "learning_rate": 4.6216801733130066e-05, "loss": 0.0607, "theoretical_loss": 3.336936002011402, "tokens_seen": 3149004800 }, { "epoch": 0.91, "learning_rate": 4.6176682981625614e-05, "loss": 0.0709, "theoretical_loss": 3.336925518199059, "tokens_seen": 3149135872 }, { "epoch": 0.91, "learning_rate": 4.6136564230121155e-05, "loss": 0.0653, "theoretical_loss": 3.3369150349452332, "tokens_seen": 3149266944 }, { "epoch": 0.91, "learning_rate": 4.609644547861671e-05, "loss": 0.0655, "theoretical_loss": 3.336904552249871, "tokens_seen": 3149398016 }, { "epoch": 0.91, "learning_rate": 4.605632672711226e-05, "loss": 0.0635, "theoretical_loss": 3.336894070112919, "tokens_seen": 3149529088 }, { "epoch": 0.91, "learning_rate": 4.60162079756078e-05, "loss": 0.0635, "theoretical_loss": 3.3368835885343255, "tokens_seen": 3149660160 }, { "epoch": 0.91, "learning_rate": 4.597608922410335e-05, "loss": 0.0644, "theoretical_loss": 3.3368731075140365, "tokens_seen": 3149791232 }, { "epoch": 0.91, "learning_rate": 4.593597047259889e-05, "loss": 0.0637, "theoretical_loss": 3.3368626270519988, "tokens_seen": 3149922304 }, { "epoch": 0.91, "learning_rate": 4.5895851721094443e-05, "loss": 0.0632, "theoretical_loss": 3.3368521471481603, "tokens_seen": 3150053376 }, { "epoch": 0.91, "learning_rate": 4.585573296958999e-05, "loss": 0.0682, "theoretical_loss": 3.3368416678024677, "tokens_seen": 3150184448 }, { "epoch": 0.91, "learning_rate": 4.581561421808553e-05, "loss": 0.0621, "theoretical_loss": 3.336831189014868, "tokens_seen": 3150315520 }, { "epoch": 0.91, "learning_rate": 4.577549546658108e-05, "loss": 0.0617, "theoretical_loss": 3.3368207107853083, "tokens_seen": 3150446592 }, { "epoch": 0.91, "learning_rate": 4.573537671507662e-05, "loss": 0.066, "theoretical_loss": 3.3368102331137357, "tokens_seen": 3150577664 }, { "epoch": 0.91, "objective/train/advantage_avg": 0.000617370882537216, "objective/train/docs_used": 1144741, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3537697792053223, "objective/train/original_loss": 1.3537700176239014, "objective/train/theoretical_loss": 3.3367997560000973, "objective/train/tokens_used": 1521233376, "objective/train/value_avg": -0.007633209228515625, "objective/train/value_loss": 0.00023306567163672298, "objective/train/value_max": -2.777576446533203e-05, "objective/train/value_min": -0.369140625, "objective/train/value_reward_corr": 0.6356369808532786, "objective/train/value_std": 0.01352691650390625, "objective/train/weight_avg": 1.0007227659225464, "objective/train/weighted_lm_loss": 1.354282259941101, "objective/train/weights_max": 1.2928329706192017, "objective/train/weights_min": 0.37639662623405457, "theoretical_loss": 3.3367997560000973, "tokens_seen": 3150708736 }, { "epoch": 0.91, "learning_rate": 4.569525796357218e-05, "loss": 0.0649, "theoretical_loss": 3.3367997560000973, "tokens_seen": 3150708736 }, { "epoch": 0.91, "learning_rate": 4.5655139212067725e-05, "loss": 0.0629, "theoretical_loss": 3.3367892794443397, "tokens_seen": 3150839808 }, { "epoch": 0.91, "learning_rate": 4.5615020460563266e-05, "loss": 0.0621, "theoretical_loss": 3.3367788034464105, "tokens_seen": 3150970880 }, { "epoch": 0.91, "learning_rate": 4.5574901709058814e-05, "loss": 0.0657, "theoretical_loss": 3.336768328006257, "tokens_seen": 3151101952 }, { "epoch": 0.91, "learning_rate": 4.5534782957554356e-05, "loss": 0.0634, "theoretical_loss": 3.336757853123826, "tokens_seen": 3151233024 }, { "epoch": 0.91, "learning_rate": 4.549466420604991e-05, "loss": 0.0645, "theoretical_loss": 3.336747378799064, "tokens_seen": 3151364096 }, { "epoch": 0.91, "learning_rate": 4.545454545454546e-05, "loss": 0.06, "theoretical_loss": 3.3367369050319193, "tokens_seen": 3151495168 }, { "epoch": 0.91, "learning_rate": 4.5414426703041e-05, "loss": 0.0673, "theoretical_loss": 3.3367264318223384, "tokens_seen": 3151626240 }, { "epoch": 0.91, "learning_rate": 4.537430795153655e-05, "loss": 0.0632, "theoretical_loss": 3.3367159591702684, "tokens_seen": 3151757312 }, { "epoch": 0.91, "learning_rate": 4.5334189200032096e-05, "loss": 0.0624, "theoretical_loss": 3.3367054870756565, "tokens_seen": 3151888384 }, { "epoch": 0.91, "learning_rate": 4.5294070448527644e-05, "loss": 0.0628, "theoretical_loss": 3.33669501553845, "tokens_seen": 3152019456 }, { "epoch": 0.91, "learning_rate": 4.525395169702319e-05, "loss": 0.0611, "theoretical_loss": 3.3366845445585955, "tokens_seen": 3152150528 }, { "epoch": 0.91, "learning_rate": 4.521383294551873e-05, "loss": 0.0634, "theoretical_loss": 3.336674074136041, "tokens_seen": 3152281600 }, { "epoch": 0.91, "learning_rate": 4.517371419401428e-05, "loss": 0.0651, "theoretical_loss": 3.3366636042707327, "tokens_seen": 3152412672 }, { "epoch": 0.91, "learning_rate": 4.513359544250983e-05, "loss": 0.0637, "theoretical_loss": 3.336653134962619, "tokens_seen": 3152543744 }, { "epoch": 0.91, "learning_rate": 4.509347669100538e-05, "loss": 0.0616, "theoretical_loss": 3.336642666211646, "tokens_seen": 3152674816 }, { "epoch": 0.91, "learning_rate": 4.5053357939500926e-05, "loss": 0.0651, "theoretical_loss": 3.3366321980177616, "tokens_seen": 3152805888 }, { "epoch": 0.91, "learning_rate": 4.501323918799647e-05, "loss": 0.0675, "theoretical_loss": 3.3366217303809123, "tokens_seen": 3152936960 }, { "epoch": 0.91, "learning_rate": 4.4973120436492015e-05, "loss": 0.0644, "theoretical_loss": 3.3366112633010463, "tokens_seen": 3153068032 }, { "epoch": 0.91, "learning_rate": 4.493300168498756e-05, "loss": 0.0669, "theoretical_loss": 3.3366007967781095, "tokens_seen": 3153199104 }, { "epoch": 0.91, "learning_rate": 4.489288293348311e-05, "loss": 0.0587, "theoretical_loss": 3.3365903308120504, "tokens_seen": 3153330176 }, { "epoch": 0.91, "learning_rate": 4.485276418197866e-05, "loss": 0.06, "theoretical_loss": 3.336579865402815, "tokens_seen": 3153461248 }, { "epoch": 0.91, "learning_rate": 4.481264543047421e-05, "loss": 0.0652, "theoretical_loss": 3.336569400550352, "tokens_seen": 3153592320 }, { "epoch": 0.91, "learning_rate": 4.477252667896975e-05, "loss": 0.0631, "theoretical_loss": 3.3365589362546073, "tokens_seen": 3153723392 }, { "epoch": 0.91, "learning_rate": 4.4732407927465297e-05, "loss": 0.0652, "theoretical_loss": 3.3365484725155286, "tokens_seen": 3153854464 }, { "epoch": 0.91, "objective/train/advantage_avg": 0.000531767844222486, "objective/train/docs_used": 1145990, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.247833251953125, "objective/train/original_loss": 1.247833490371704, "objective/train/theoretical_loss": 3.336538009333063, "objective/train/tokens_used": 1524510176, "objective/train/value_avg": -0.005474090576171875, "objective/train/value_loss": 0.0001445947855245322, "objective/train/value_max": -4.756450653076172e-05, "objective/train/value_min": -0.2178955078125, "objective/train/value_reward_corr": 0.628519641483854, "objective/train/value_std": 0.0100250244140625, "objective/train/weight_avg": 1.0005933046340942, "objective/train/weighted_lm_loss": 1.2484912872314453, "objective/train/weights_max": 1.1078227758407593, "objective/train/weights_min": 0.38186925649642944, "theoretical_loss": 3.336538009333063, "tokens_seen": 3153985536 }, { "epoch": 0.91, "learning_rate": 4.4692289175960845e-05, "loss": 0.0624, "theoretical_loss": 3.336538009333063, "tokens_seen": 3153985536 }, { "epoch": 0.91, "learning_rate": 4.465217042445639e-05, "loss": 0.0656, "theoretical_loss": 3.3365275467071585, "tokens_seen": 3154116608 }, { "epoch": 0.91, "learning_rate": 4.461205167295194e-05, "loss": 0.0671, "theoretical_loss": 3.336517084637762, "tokens_seen": 3154247680 }, { "epoch": 0.91, "learning_rate": 4.457193292144748e-05, "loss": 0.0655, "theoretical_loss": 3.33650662312482, "tokens_seen": 3154378752 }, { "epoch": 0.91, "learning_rate": 4.453181416994304e-05, "loss": 0.0665, "theoretical_loss": 3.336496162168281, "tokens_seen": 3154509824 }, { "epoch": 0.91, "learning_rate": 4.449169541843858e-05, "loss": 0.0662, "theoretical_loss": 3.3364857017680913, "tokens_seen": 3154640896 }, { "epoch": 0.91, "learning_rate": 4.4451576666934126e-05, "loss": 0.0703, "theoretical_loss": 3.336475241924199, "tokens_seen": 3154771968 }, { "epoch": 0.91, "learning_rate": 4.4411457915429674e-05, "loss": 0.0651, "theoretical_loss": 3.3364647826365506, "tokens_seen": 3154903040 }, { "epoch": 0.91, "learning_rate": 4.4371339163925216e-05, "loss": 0.0652, "theoretical_loss": 3.3364543239050937, "tokens_seen": 3155034112 }, { "epoch": 0.91, "learning_rate": 4.433122041242077e-05, "loss": 0.0562, "theoretical_loss": 3.3364438657297764, "tokens_seen": 3155165184 }, { "epoch": 0.91, "learning_rate": 4.429110166091632e-05, "loss": 0.0638, "theoretical_loss": 3.336433408110545, "tokens_seen": 3155296256 }, { "epoch": 0.91, "learning_rate": 4.425098290941186e-05, "loss": 0.0639, "theoretical_loss": 3.3364229510473473, "tokens_seen": 3155427328 }, { "epoch": 0.91, "learning_rate": 4.421086415790741e-05, "loss": 0.0663, "theoretical_loss": 3.33641249454013, "tokens_seen": 3155558400 }, { "epoch": 0.91, "learning_rate": 4.417074540640295e-05, "loss": 0.0631, "theoretical_loss": 3.3364020385888415, "tokens_seen": 3155689472 }, { "epoch": 0.91, "learning_rate": 4.4130626654898504e-05, "loss": 0.0631, "theoretical_loss": 3.3363915831934285, "tokens_seen": 3155820544 }, { "epoch": 0.91, "learning_rate": 4.409050790339405e-05, "loss": 0.061, "theoretical_loss": 3.3363811283538385, "tokens_seen": 3155951616 }, { "epoch": 0.91, "learning_rate": 4.405038915188959e-05, "loss": 0.0642, "theoretical_loss": 3.336370674070019, "tokens_seen": 3156082688 }, { "epoch": 0.91, "learning_rate": 4.401027040038514e-05, "loss": 0.0629, "theoretical_loss": 3.336360220341917, "tokens_seen": 3156213760 }, { "epoch": 0.91, "learning_rate": 4.397015164888068e-05, "loss": 0.0655, "theoretical_loss": 3.3363497671694806, "tokens_seen": 3156344832 }, { "epoch": 0.91, "learning_rate": 4.393003289737624e-05, "loss": 0.0666, "theoretical_loss": 3.3363393145526565, "tokens_seen": 3156475904 }, { "epoch": 0.91, "learning_rate": 4.3889914145871785e-05, "loss": 0.0633, "theoretical_loss": 3.336328862491392, "tokens_seen": 3156606976 }, { "epoch": 0.91, "learning_rate": 4.384979539436733e-05, "loss": 0.0629, "theoretical_loss": 3.336318410985635, "tokens_seen": 3156738048 }, { "epoch": 0.91, "learning_rate": 4.3809676642862875e-05, "loss": 0.0647, "theoretical_loss": 3.3363079600353327, "tokens_seen": 3156869120 }, { "epoch": 0.91, "learning_rate": 4.3769557891358416e-05, "loss": 0.0729, "theoretical_loss": 3.336297509640433, "tokens_seen": 3157000192 }, { "epoch": 0.91, "learning_rate": 4.372943913985397e-05, "loss": 0.068, "theoretical_loss": 3.336287059800882, "tokens_seen": 3157131264 }, { "epoch": 0.91, "objective/train/advantage_avg": -0.00012268703721929342, "objective/train/docs_used": 1147172, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.238295555114746, "objective/train/original_loss": 1.2382956743240356, "objective/train/theoretical_loss": 3.336276610516629, "objective/train/tokens_used": 1527786976, "objective/train/value_avg": -0.005504608154296875, "objective/train/value_loss": 0.00010320972796762362, "objective/train/value_max": -3.916025161743164e-05, "objective/train/value_min": -0.2166748046875, "objective/train/value_reward_corr": 0.796647075404727, "objective/train/value_std": 0.0107269287109375, "objective/train/weight_avg": 0.9999280571937561, "objective/train/weighted_lm_loss": 1.2378548383712769, "objective/train/weights_max": 1.198367953300476, "objective/train/weights_min": 0.822647213935852, "theoretical_loss": 3.336276610516629, "tokens_seen": 3157262336 }, { "epoch": 0.91, "learning_rate": 4.368932038834952e-05, "loss": 0.0634, "theoretical_loss": 3.336276610516629, "tokens_seen": 3157262336 }, { "epoch": 0.91, "learning_rate": 4.364920163684506e-05, "loss": 0.0636, "theoretical_loss": 3.33626616178762, "tokens_seen": 3157393408 }, { "epoch": 0.91, "learning_rate": 4.360908288534061e-05, "loss": 0.0665, "theoretical_loss": 3.3362557136138027, "tokens_seen": 3157524480 }, { "epoch": 0.91, "learning_rate": 4.3568964133836156e-05, "loss": 0.064, "theoretical_loss": 3.336245265995125, "tokens_seen": 3157655552 }, { "epoch": 0.91, "learning_rate": 4.3528845382331704e-05, "loss": 0.0602, "theoretical_loss": 3.3362348189315343, "tokens_seen": 3157786624 }, { "epoch": 0.91, "learning_rate": 4.348872663082725e-05, "loss": 0.0607, "theoretical_loss": 3.3362243724229774, "tokens_seen": 3157917696 }, { "epoch": 0.91, "learning_rate": 4.3448607879322794e-05, "loss": 0.0657, "theoretical_loss": 3.3362139264694024, "tokens_seen": 3158048768 }, { "epoch": 0.91, "learning_rate": 4.340848912781834e-05, "loss": 0.0627, "theoretical_loss": 3.336203481070757, "tokens_seen": 3158179840 }, { "epoch": 0.91, "learning_rate": 4.336837037631389e-05, "loss": 0.064, "theoretical_loss": 3.3361930362269883, "tokens_seen": 3158310912 }, { "epoch": 0.91, "learning_rate": 4.332825162480944e-05, "loss": 0.0605, "theoretical_loss": 3.3361825919380435, "tokens_seen": 3158441984 }, { "epoch": 0.91, "learning_rate": 4.3288132873304986e-05, "loss": 0.0598, "theoretical_loss": 3.336172148203871, "tokens_seen": 3158573056 }, { "epoch": 0.91, "learning_rate": 4.324801412180053e-05, "loss": 0.0639, "theoretical_loss": 3.3361617050244172, "tokens_seen": 3158704128 }, { "epoch": 0.91, "learning_rate": 4.3207895370296075e-05, "loss": 0.0616, "theoretical_loss": 3.33615126239963, "tokens_seen": 3158835200 }, { "epoch": 0.91, "learning_rate": 4.3167776618791623e-05, "loss": 0.067, "theoretical_loss": 3.3361408203294576, "tokens_seen": 3158966272 }, { "epoch": 0.91, "learning_rate": 4.312765786728717e-05, "loss": 0.0653, "theoretical_loss": 3.336130378813847, "tokens_seen": 3159097344 }, { "epoch": 0.91, "learning_rate": 4.308753911578272e-05, "loss": 0.0635, "theoretical_loss": 3.3361199378527457, "tokens_seen": 3159228416 }, { "epoch": 0.91, "learning_rate": 4.304742036427826e-05, "loss": 0.0644, "theoretical_loss": 3.3361094974461016, "tokens_seen": 3159359488 }, { "epoch": 0.91, "learning_rate": 4.300730161277381e-05, "loss": 0.0626, "theoretical_loss": 3.3360990575938616, "tokens_seen": 3159490560 }, { "epoch": 0.91, "learning_rate": 4.296718286126936e-05, "loss": 0.0668, "theoretical_loss": 3.3360886182959737, "tokens_seen": 3159621632 }, { "epoch": 0.92, "learning_rate": 4.2927064109764905e-05, "loss": 0.0662, "theoretical_loss": 3.3360781795523855, "tokens_seen": 3159752704 }, { "epoch": 0.92, "learning_rate": 4.288694535826045e-05, "loss": 0.0627, "theoretical_loss": 3.3360677413630446, "tokens_seen": 3159883776 }, { "epoch": 0.92, "learning_rate": 4.2846826606756e-05, "loss": 0.0605, "theoretical_loss": 3.336057303727898, "tokens_seen": 3160014848 }, { "epoch": 0.92, "learning_rate": 4.280670785525154e-05, "loss": 0.0623, "theoretical_loss": 3.3360468666468943, "tokens_seen": 3160145920 }, { "epoch": 0.92, "learning_rate": 4.276658910374709e-05, "loss": 0.0666, "theoretical_loss": 3.3360364301199805, "tokens_seen": 3160276992 }, { "epoch": 0.92, "learning_rate": 4.272647035224264e-05, "loss": 0.0576, "theoretical_loss": 3.336025994147104, "tokens_seen": 3160408064 }, { "epoch": 0.92, "objective/train/advantage_avg": 0.0003389687626622617, "objective/train/docs_used": 1148479, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4082382917404175, "objective/train/original_loss": 1.408238172531128, "objective/train/theoretical_loss": 3.3360155587282128, "objective/train/tokens_used": 1531063776, "objective/train/value_avg": -0.00614166259765625, "objective/train/value_loss": 0.00010328338976250961, "objective/train/value_max": -3.5643577575683594e-05, "objective/train/value_min": -0.2178955078125, "objective/train/value_reward_corr": 0.6826682776339061, "objective/train/value_std": 0.009429931640625, "objective/train/weight_avg": 1.000386118888855, "objective/train/weighted_lm_loss": 1.4084773063659668, "objective/train/weights_max": 1.1547927856445312, "objective/train/weights_min": 0.3694151043891907, "theoretical_loss": 3.3360155587282128, "tokens_seen": 3160539136 }, { "epoch": 0.92, "learning_rate": 4.2686351600738187e-05, "loss": 0.0703, "theoretical_loss": 3.3360155587282128, "tokens_seen": 3160539136 }, { "epoch": 0.92, "learning_rate": 4.2646232849233735e-05, "loss": 0.0678, "theoretical_loss": 3.3360051238632544, "tokens_seen": 3160670208 }, { "epoch": 0.92, "learning_rate": 4.2606114097729276e-05, "loss": 0.0596, "theoretical_loss": 3.3359946895521766, "tokens_seen": 3160801280 }, { "epoch": 0.92, "learning_rate": 4.256599534622483e-05, "loss": 0.0609, "theoretical_loss": 3.3359842557949264, "tokens_seen": 3160932352 }, { "epoch": 0.92, "learning_rate": 4.252587659472037e-05, "loss": 0.0608, "theoretical_loss": 3.335973822591453, "tokens_seen": 3161063424 }, { "epoch": 0.92, "learning_rate": 4.248575784321592e-05, "loss": 0.0644, "theoretical_loss": 3.3359633899417016, "tokens_seen": 3161194496 }, { "epoch": 0.92, "learning_rate": 4.244563909171147e-05, "loss": 0.064, "theoretical_loss": 3.3359529578456217, "tokens_seen": 3161325568 }, { "epoch": 0.92, "learning_rate": 4.240552034020701e-05, "loss": 0.0626, "theoretical_loss": 3.335942526303161, "tokens_seen": 3161456640 }, { "epoch": 0.92, "learning_rate": 4.2365401588702564e-05, "loss": 0.063, "theoretical_loss": 3.335932095314266, "tokens_seen": 3161587712 }, { "epoch": 0.92, "learning_rate": 4.232528283719811e-05, "loss": 0.0693, "theoretical_loss": 3.335921664878885, "tokens_seen": 3161718784 }, { "epoch": 0.92, "learning_rate": 4.2285164085693654e-05, "loss": 0.065, "theoretical_loss": 3.335911234996966, "tokens_seen": 3161849856 }, { "epoch": 0.92, "learning_rate": 4.22450453341892e-05, "loss": 0.0677, "theoretical_loss": 3.3359008056684565, "tokens_seen": 3161980928 }, { "epoch": 0.92, "learning_rate": 4.220492658268474e-05, "loss": 0.0684, "theoretical_loss": 3.3358903768933037, "tokens_seen": 3162112000 }, { "epoch": 0.92, "learning_rate": 4.21648078311803e-05, "loss": 0.0612, "theoretical_loss": 3.335879948671456, "tokens_seen": 3162243072 }, { "epoch": 0.92, "learning_rate": 4.2124689079675846e-05, "loss": 0.0638, "theoretical_loss": 3.3358695210028606, "tokens_seen": 3162374144 }, { "epoch": 0.92, "learning_rate": 4.208457032817139e-05, "loss": 0.0652, "theoretical_loss": 3.3358590938874655, "tokens_seen": 3162505216 }, { "epoch": 0.92, "learning_rate": 4.2044451576666935e-05, "loss": 0.0641, "theoretical_loss": 3.3358486673252186, "tokens_seen": 3162636288 }, { "epoch": 0.92, "learning_rate": 4.2004332825162476e-05, "loss": 0.0628, "theoretical_loss": 3.335838241316067, "tokens_seen": 3162767360 }, { "epoch": 0.92, "learning_rate": 4.196421407365803e-05, "loss": 0.0606, "theoretical_loss": 3.335827815859959, "tokens_seen": 3162898432 }, { "epoch": 0.92, "learning_rate": 4.192409532215358e-05, "loss": 0.0649, "theoretical_loss": 3.335817390956842, "tokens_seen": 3163029504 }, { "epoch": 0.92, "learning_rate": 4.188397657064912e-05, "loss": 0.0661, "theoretical_loss": 3.335806966606664, "tokens_seen": 3163160576 }, { "epoch": 0.92, "learning_rate": 4.184385781914467e-05, "loss": 0.0622, "theoretical_loss": 3.3357965428093728, "tokens_seen": 3163291648 }, { "epoch": 0.92, "learning_rate": 4.180373906764021e-05, "loss": 0.0668, "theoretical_loss": 3.335786119564916, "tokens_seen": 3163422720 }, { "epoch": 0.92, "learning_rate": 4.1763620316135765e-05, "loss": 0.0641, "theoretical_loss": 3.3357756968732413, "tokens_seen": 3163553792 }, { "epoch": 0.92, "learning_rate": 4.172350156463131e-05, "loss": 0.0699, "theoretical_loss": 3.335765274734297, "tokens_seen": 3163684864 }, { "epoch": 0.92, "objective/train/advantage_avg": 0.0007287923363037407, "objective/train/docs_used": 1149760, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.371077299118042, "objective/train/original_loss": 1.371077060699463, "objective/train/theoretical_loss": 3.33575485314803, "objective/train/tokens_used": 1534340576, "objective/train/value_avg": -0.00528717041015625, "objective/train/value_loss": 0.00014300165639724582, "objective/train/value_max": -4.07099723815918e-05, "objective/train/value_min": -0.230712890625, "objective/train/value_reward_corr": 0.6362017862976356, "objective/train/value_std": 0.009857177734375, "objective/train/weight_avg": 1.0007941722869873, "objective/train/weighted_lm_loss": 1.3718750476837158, "objective/train/weights_max": 1.161043405532837, "objective/train/weights_min": 0.37251704931259155, "theoretical_loss": 3.33575485314803, "tokens_seen": 3163815936 }, { "epoch": 0.92, "learning_rate": 4.1683382813126854e-05, "loss": 0.0615, "theoretical_loss": 3.33575485314803, "tokens_seen": 3163815936 }, { "epoch": 0.92, "learning_rate": 4.16432640616224e-05, "loss": 0.067, "theoretical_loss": 3.3357444321143888, "tokens_seen": 3163947008 }, { "epoch": 0.92, "learning_rate": 4.160314531011795e-05, "loss": 0.0634, "theoretical_loss": 3.335734011633321, "tokens_seen": 3164078080 }, { "epoch": 0.92, "learning_rate": 4.15630265586135e-05, "loss": 0.0611, "theoretical_loss": 3.3357235917047747, "tokens_seen": 3164209152 }, { "epoch": 0.92, "learning_rate": 4.1522907807109046e-05, "loss": 0.0607, "theoretical_loss": 3.3357131723286972, "tokens_seen": 3164340224 }, { "epoch": 0.92, "learning_rate": 4.148278905560459e-05, "loss": 0.0647, "theoretical_loss": 3.3357027535050365, "tokens_seen": 3164471296 }, { "epoch": 0.92, "learning_rate": 4.1442670304100136e-05, "loss": 0.0641, "theoretical_loss": 3.33569233523374, "tokens_seen": 3164602368 }, { "epoch": 0.92, "learning_rate": 4.1402551552595684e-05, "loss": 0.062, "theoretical_loss": 3.3356819175147567, "tokens_seen": 3164733440 }, { "epoch": 0.92, "learning_rate": 4.136243280109123e-05, "loss": 0.0634, "theoretical_loss": 3.3356715003480337, "tokens_seen": 3164864512 }, { "epoch": 0.92, "learning_rate": 4.132231404958678e-05, "loss": 0.0616, "theoretical_loss": 3.3356610837335188, "tokens_seen": 3164995584 }, { "epoch": 0.92, "learning_rate": 4.128219529808232e-05, "loss": 0.0647, "theoretical_loss": 3.3356506676711604, "tokens_seen": 3165126656 }, { "epoch": 0.92, "learning_rate": 4.124207654657787e-05, "loss": 0.0632, "theoretical_loss": 3.3356402521609056, "tokens_seen": 3165257728 }, { "epoch": 0.92, "learning_rate": 4.120195779507342e-05, "loss": 0.0637, "theoretical_loss": 3.3356298372027027, "tokens_seen": 3165388800 }, { "epoch": 0.92, "learning_rate": 4.1161839043568965e-05, "loss": 0.0602, "theoretical_loss": 3.3356194227964995, "tokens_seen": 3165519872 }, { "epoch": 0.92, "learning_rate": 4.1121720292064513e-05, "loss": 0.066, "theoretical_loss": 3.335609008942244, "tokens_seen": 3165650944 }, { "epoch": 0.92, "learning_rate": 4.108160154056006e-05, "loss": 0.0632, "theoretical_loss": 3.3355985956398837, "tokens_seen": 3165782016 }, { "epoch": 0.92, "learning_rate": 4.10414827890556e-05, "loss": 0.0608, "theoretical_loss": 3.335588182889367, "tokens_seen": 3165913088 }, { "epoch": 0.92, "learning_rate": 4.100136403755115e-05, "loss": 0.0612, "theoretical_loss": 3.3355777706906418, "tokens_seen": 3166044160 }, { "epoch": 0.92, "learning_rate": 4.09612452860467e-05, "loss": 0.0641, "theoretical_loss": 3.3355673590436554, "tokens_seen": 3166175232 }, { "epoch": 0.92, "learning_rate": 4.092112653454225e-05, "loss": 0.0664, "theoretical_loss": 3.3355569479483567, "tokens_seen": 3166306304 }, { "epoch": 0.92, "learning_rate": 4.0881007783037795e-05, "loss": 0.0647, "theoretical_loss": 3.3355465374046926, "tokens_seen": 3166437376 }, { "epoch": 0.92, "learning_rate": 4.0840889031533336e-05, "loss": 0.065, "theoretical_loss": 3.335536127412612, "tokens_seen": 3166568448 }, { "epoch": 0.92, "learning_rate": 4.0800770280028884e-05, "loss": 0.0637, "theoretical_loss": 3.335525717972062, "tokens_seen": 3166699520 }, { "epoch": 0.92, "learning_rate": 4.076065152852443e-05, "loss": 0.0662, "theoretical_loss": 3.335515309082991, "tokens_seen": 3166830592 }, { "epoch": 0.92, "learning_rate": 4.072053277701998e-05, "loss": 0.0625, "theoretical_loss": 3.335504900745347, "tokens_seen": 3166961664 }, { "epoch": 0.92, "objective/train/advantage_avg": 0.00021225240197964013, "objective/train/docs_used": 1150570, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.1777477264404297, "objective/train/original_loss": 1.1777477264404297, "objective/train/theoretical_loss": 3.3354944929590777, "objective/train/tokens_used": 1537617376, "objective/train/value_avg": -0.006900787353515625, "objective/train/value_loss": 0.00015564082423225045, "objective/train/value_max": -3.707408905029297e-05, "objective/train/value_min": -0.286376953125, "objective/train/value_reward_corr": 0.7048592027047307, "objective/train/value_std": 0.01236724853515625, "objective/train/weight_avg": 1.0002844333648682, "objective/train/weighted_lm_loss": 1.1773625612258911, "objective/train/weights_max": 1.1423146724700928, "objective/train/weights_min": 0.38794201612472534, "theoretical_loss": 3.3354944929590777, "tokens_seen": 3167092736 }, { "epoch": 0.92, "learning_rate": 4.068041402551553e-05, "loss": 0.0588, "theoretical_loss": 3.3354944929590777, "tokens_seen": 3167092736 }, { "epoch": 0.92, "learning_rate": 4.064029527401107e-05, "loss": 0.0683, "theoretical_loss": 3.335484085724132, "tokens_seen": 3167223808 }, { "epoch": 0.92, "learning_rate": 4.0600176522506625e-05, "loss": 0.0601, "theoretical_loss": 3.335473679040456, "tokens_seen": 3167354880 }, { "epoch": 0.92, "learning_rate": 4.0560057771002166e-05, "loss": 0.0623, "theoretical_loss": 3.335463272907999, "tokens_seen": 3167485952 }, { "epoch": 0.92, "learning_rate": 4.0519939019497714e-05, "loss": 0.0595, "theoretical_loss": 3.335452867326709, "tokens_seen": 3167617024 }, { "epoch": 0.92, "learning_rate": 4.047982026799326e-05, "loss": 0.0615, "theoretical_loss": 3.335442462296534, "tokens_seen": 3167748096 }, { "epoch": 0.92, "learning_rate": 4.04397015164888e-05, "loss": 0.0606, "theoretical_loss": 3.335432057817422, "tokens_seen": 3167879168 }, { "epoch": 0.92, "learning_rate": 4.039958276498436e-05, "loss": 0.0622, "theoretical_loss": 3.3354216538893207, "tokens_seen": 3168010240 }, { "epoch": 0.92, "learning_rate": 4.0359464013479906e-05, "loss": 0.0677, "theoretical_loss": 3.335411250512178, "tokens_seen": 3168141312 }, { "epoch": 0.92, "learning_rate": 4.031934526197545e-05, "loss": 0.0652, "theoretical_loss": 3.335400847685942, "tokens_seen": 3168272384 }, { "epoch": 0.92, "learning_rate": 4.0279226510470996e-05, "loss": 0.0615, "theoretical_loss": 3.335390445410561, "tokens_seen": 3168403456 }, { "epoch": 0.92, "learning_rate": 4.023910775896654e-05, "loss": 0.0662, "theoretical_loss": 3.335380043685983, "tokens_seen": 3168534528 }, { "epoch": 0.92, "learning_rate": 4.019898900746209e-05, "loss": 0.0665, "theoretical_loss": 3.3353696425121564, "tokens_seen": 3168665600 }, { "epoch": 0.92, "learning_rate": 4.015887025595764e-05, "loss": 0.0638, "theoretical_loss": 3.3353592418890283, "tokens_seen": 3168796672 }, { "epoch": 0.92, "learning_rate": 4.011875150445318e-05, "loss": 0.0642, "theoretical_loss": 3.335348841816548, "tokens_seen": 3168927744 }, { "epoch": 0.92, "learning_rate": 4.007863275294873e-05, "loss": 0.0662, "theoretical_loss": 3.3353384422946624, "tokens_seen": 3169058816 }, { "epoch": 0.92, "learning_rate": 4.003851400144427e-05, "loss": 0.0682, "theoretical_loss": 3.33532804332332, "tokens_seen": 3169189888 }, { "epoch": 0.92, "learning_rate": 3.9998395249939825e-05, "loss": 0.0629, "theoretical_loss": 3.335317644902469, "tokens_seen": 3169320960 }, { "epoch": 0.92, "learning_rate": 3.995827649843537e-05, "loss": 0.0627, "theoretical_loss": 3.335307247032058, "tokens_seen": 3169452032 }, { "epoch": 0.92, "learning_rate": 3.9918157746930915e-05, "loss": 0.066, "theoretical_loss": 3.335296849712034, "tokens_seen": 3169583104 }, { "epoch": 0.92, "learning_rate": 3.987803899542646e-05, "loss": 0.0636, "theoretical_loss": 3.3352864529423463, "tokens_seen": 3169714176 }, { "epoch": 0.92, "learning_rate": 3.983792024392201e-05, "loss": 0.0608, "theoretical_loss": 3.335276056722942, "tokens_seen": 3169845248 }, { "epoch": 0.92, "learning_rate": 3.979780149241756e-05, "loss": 0.0677, "theoretical_loss": 3.3352656610537696, "tokens_seen": 3169976320 }, { "epoch": 0.92, "learning_rate": 3.975768274091311e-05, "loss": 0.0636, "theoretical_loss": 3.3352552659347774, "tokens_seen": 3170107392 }, { "epoch": 0.92, "learning_rate": 3.971756398940865e-05, "loss": 0.0626, "theoretical_loss": 3.335244871365913, "tokens_seen": 3170238464 }, { "epoch": 0.92, "objective/train/advantage_avg": -0.000273563084192574, "objective/train/docs_used": 1151776, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4039186239242554, "objective/train/original_loss": 1.4039186239242554, "objective/train/theoretical_loss": 3.335234477347125, "objective/train/tokens_used": 1540894176, "objective/train/value_avg": -0.01062774658203125, "objective/train/value_loss": 0.00029328561504371464, "objective/train/value_max": -4.2319297790527344e-05, "objective/train/value_min": -0.5283203125, "objective/train/value_reward_corr": 0.7513325032789062, "objective/train/value_std": 0.018707275390625, "objective/train/weight_avg": 0.9998619556427002, "objective/train/weighted_lm_loss": 1.402757167816162, "objective/train/weights_max": 1.2395168542861938, "objective/train/weights_min": 0.3713989555835724, "theoretical_loss": 3.335234477347125, "tokens_seen": 3170369536 }, { "epoch": 0.92, "learning_rate": 3.9677445237904196e-05, "loss": 0.0679, "theoretical_loss": 3.335234477347125, "tokens_seen": 3170369536 }, { "epoch": 0.92, "learning_rate": 3.9637326486399744e-05, "loss": 0.0661, "theoretical_loss": 3.335224083878362, "tokens_seen": 3170500608 }, { "epoch": 0.92, "learning_rate": 3.959720773489529e-05, "loss": 0.0681, "theoretical_loss": 3.3352136909595713, "tokens_seen": 3170631680 }, { "epoch": 0.92, "learning_rate": 3.955708898339084e-05, "loss": 0.066, "theoretical_loss": 3.3352032985907014, "tokens_seen": 3170762752 }, { "epoch": 0.92, "learning_rate": 3.951697023188638e-05, "loss": 0.0612, "theoretical_loss": 3.3351929067717005, "tokens_seen": 3170893824 }, { "epoch": 0.92, "learning_rate": 3.947685148038193e-05, "loss": 0.0633, "theoretical_loss": 3.3351825155025163, "tokens_seen": 3171024896 }, { "epoch": 0.92, "learning_rate": 3.943673272887748e-05, "loss": 0.0638, "theoretical_loss": 3.335172124783098, "tokens_seen": 3171155968 }, { "epoch": 0.92, "learning_rate": 3.9396613977373026e-05, "loss": 0.0631, "theoretical_loss": 3.335161734613393, "tokens_seen": 3171287040 }, { "epoch": 0.92, "learning_rate": 3.9356495225868574e-05, "loss": 0.0626, "theoretical_loss": 3.33515134499335, "tokens_seen": 3171418112 }, { "epoch": 0.92, "learning_rate": 3.9316376474364115e-05, "loss": 0.0632, "theoretical_loss": 3.335140955922917, "tokens_seen": 3171549184 }, { "epoch": 0.92, "learning_rate": 3.927625772285966e-05, "loss": 0.0614, "theoretical_loss": 3.3351305674020417, "tokens_seen": 3171680256 }, { "epoch": 0.92, "learning_rate": 3.923613897135521e-05, "loss": 0.0663, "theoretical_loss": 3.335120179430673, "tokens_seen": 3171811328 }, { "epoch": 0.92, "learning_rate": 3.919602021985076e-05, "loss": 0.0627, "theoretical_loss": 3.3351097920087587, "tokens_seen": 3171942400 }, { "epoch": 0.92, "learning_rate": 3.915590146834631e-05, "loss": 0.0611, "theoretical_loss": 3.3350994051362473, "tokens_seen": 3172073472 }, { "epoch": 0.92, "learning_rate": 3.9115782716841855e-05, "loss": 0.0659, "theoretical_loss": 3.335089018813087, "tokens_seen": 3172204544 }, { "epoch": 0.92, "learning_rate": 3.90756639653374e-05, "loss": 0.0605, "theoretical_loss": 3.335078633039226, "tokens_seen": 3172335616 }, { "epoch": 0.92, "learning_rate": 3.9035545213832945e-05, "loss": 0.0639, "theoretical_loss": 3.3350682478146125, "tokens_seen": 3172466688 }, { "epoch": 0.92, "learning_rate": 3.899542646232849e-05, "loss": 0.0625, "theoretical_loss": 3.335057863139195, "tokens_seen": 3172597760 }, { "epoch": 0.92, "learning_rate": 3.895530771082404e-05, "loss": 0.0628, "theoretical_loss": 3.3350474790129216, "tokens_seen": 3172728832 }, { "epoch": 0.92, "learning_rate": 3.891518895931959e-05, "loss": 0.0662, "theoretical_loss": 3.3350370954357405, "tokens_seen": 3172859904 }, { "epoch": 0.92, "learning_rate": 3.887507020781513e-05, "loss": 0.0614, "theoretical_loss": 3.3350267124076, "tokens_seen": 3172990976 }, { "epoch": 0.92, "learning_rate": 3.883495145631068e-05, "loss": 0.0658, "theoretical_loss": 3.3350163299284485, "tokens_seen": 3173122048 }, { "epoch": 0.92, "learning_rate": 3.8794832704806226e-05, "loss": 0.0662, "theoretical_loss": 3.335005947998235, "tokens_seen": 3173253120 }, { "epoch": 0.92, "learning_rate": 3.8754713953301774e-05, "loss": 0.0678, "theoretical_loss": 3.334995566616906, "tokens_seen": 3173384192 }, { "epoch": 0.92, "learning_rate": 3.871459520179732e-05, "loss": 0.0642, "theoretical_loss": 3.3349851857844115, "tokens_seen": 3173515264 }, { "epoch": 0.92, "objective/train/advantage_avg": -0.0005601816810667515, "objective/train/docs_used": 1153146, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2333840131759644, "objective/train/original_loss": 1.2333840131759644, "objective/train/theoretical_loss": 3.334974805500699, "objective/train/tokens_used": 1544170976, "objective/train/value_avg": -0.009613037109375, "objective/train/value_loss": 0.00024637652677483857, "objective/train/value_max": -3.9458274841308594e-05, "objective/train/value_min": -0.355224609375, "objective/train/value_reward_corr": 0.7266098019754481, "objective/train/value_std": 0.0164337158203125, "objective/train/weight_avg": 0.9995500445365906, "objective/train/weighted_lm_loss": 1.2322841882705688, "objective/train/weights_max": 1.2056413888931274, "objective/train/weights_min": 0.3680877387523651, "theoretical_loss": 3.334974805500699, "tokens_seen": 3173646336 }, { "epoch": 0.92, "learning_rate": 3.8674476450292864e-05, "loss": 0.0616, "theoretical_loss": 3.334974805500699, "tokens_seen": 3173646336 }, { "epoch": 0.92, "learning_rate": 3.863435769878842e-05, "loss": 0.0628, "theoretical_loss": 3.3349644257657167, "tokens_seen": 3173777408 }, { "epoch": 0.92, "learning_rate": 3.8594238947283967e-05, "loss": 0.0614, "theoretical_loss": 3.3349540465794134, "tokens_seen": 3173908480 }, { "epoch": 0.92, "learning_rate": 3.855412019577951e-05, "loss": 0.0612, "theoretical_loss": 3.3349436679417375, "tokens_seen": 3174039552 }, { "epoch": 0.92, "learning_rate": 3.8514001444275056e-05, "loss": 0.0649, "theoretical_loss": 3.334933289852637, "tokens_seen": 3174170624 }, { "epoch": 0.92, "learning_rate": 3.84738826927706e-05, "loss": 0.0636, "theoretical_loss": 3.3349229123120603, "tokens_seen": 3174301696 }, { "epoch": 0.92, "learning_rate": 3.843376394126615e-05, "loss": 0.0655, "theoretical_loss": 3.334912535319956, "tokens_seen": 3174432768 }, { "epoch": 0.92, "learning_rate": 3.83936451897617e-05, "loss": 0.0658, "theoretical_loss": 3.334902158876272, "tokens_seen": 3174563840 }, { "epoch": 0.92, "learning_rate": 3.835352643825724e-05, "loss": 0.0671, "theoretical_loss": 3.3348917829809577, "tokens_seen": 3174694912 }, { "epoch": 0.92, "learning_rate": 3.831340768675279e-05, "loss": 0.0644, "theoretical_loss": 3.33488140763396, "tokens_seen": 3174825984 }, { "epoch": 0.92, "learning_rate": 3.827328893524833e-05, "loss": 0.0671, "theoretical_loss": 3.3348710328352285, "tokens_seen": 3174957056 }, { "epoch": 0.92, "learning_rate": 3.8233170183743886e-05, "loss": 0.0646, "theoretical_loss": 3.334860658584711, "tokens_seen": 3175088128 }, { "epoch": 0.92, "learning_rate": 3.8193051432239434e-05, "loss": 0.0628, "theoretical_loss": 3.3348502848823562, "tokens_seen": 3175219200 }, { "epoch": 0.92, "learning_rate": 3.8152932680734975e-05, "loss": 0.0634, "theoretical_loss": 3.3348399117281122, "tokens_seen": 3175350272 }, { "epoch": 0.92, "learning_rate": 3.811281392923052e-05, "loss": 0.0624, "theoretical_loss": 3.334829539121928, "tokens_seen": 3175481344 }, { "epoch": 0.92, "learning_rate": 3.8072695177726064e-05, "loss": 0.0613, "theoretical_loss": 3.3348191670637513, "tokens_seen": 3175612416 }, { "epoch": 0.92, "learning_rate": 3.803257642622162e-05, "loss": 0.065, "theoretical_loss": 3.334808795553531, "tokens_seen": 3175743488 }, { "epoch": 0.92, "learning_rate": 3.799245767471717e-05, "loss": 0.0628, "theoretical_loss": 3.334798424591215, "tokens_seen": 3175874560 }, { "epoch": 0.92, "learning_rate": 3.795233892321271e-05, "loss": 0.0653, "theoretical_loss": 3.3347880541767525, "tokens_seen": 3176005632 }, { "epoch": 0.92, "learning_rate": 3.7912220171708257e-05, "loss": 0.0602, "theoretical_loss": 3.334777684310091, "tokens_seen": 3176136704 }, { "epoch": 0.93, "learning_rate": 3.7872101420203805e-05, "loss": 0.063, "theoretical_loss": 3.33476731499118, "tokens_seen": 3176267776 }, { "epoch": 0.93, "learning_rate": 3.783198266869935e-05, "loss": 0.0675, "theoretical_loss": 3.3347569462199678, "tokens_seen": 3176398848 }, { "epoch": 0.93, "learning_rate": 3.77918639171949e-05, "loss": 0.0658, "theoretical_loss": 3.334746577996402, "tokens_seen": 3176529920 }, { "epoch": 0.93, "learning_rate": 3.775174516569044e-05, "loss": 0.066, "theoretical_loss": 3.3347362103204317, "tokens_seen": 3176660992 }, { "epoch": 0.93, "learning_rate": 3.771162641418599e-05, "loss": 0.0644, "theoretical_loss": 3.3347258431920057, "tokens_seen": 3176792064 }, { "epoch": 0.93, "objective/train/advantage_avg": -0.00021951283270027488, "objective/train/docs_used": 1154349, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3528246879577637, "objective/train/original_loss": 1.3528245687484741, "objective/train/theoretical_loss": 3.3347154766110716, "objective/train/tokens_used": 1547447776, "objective/train/value_avg": -0.005321502685546875, "objective/train/value_loss": 0.00011804938549175858, "objective/train/value_max": -1.9371509552001953e-05, "objective/train/value_min": -0.2105712890625, "objective/train/value_reward_corr": 0.7405519278879418, "objective/train/value_std": 0.01055145263671875, "objective/train/weight_avg": 0.9998354315757751, "objective/train/weighted_lm_loss": 1.3518730401992798, "objective/train/weights_max": 1.1222734451293945, "objective/train/weights_min": 0.38968625664711, "theoretical_loss": 3.3347154766110716, "tokens_seen": 3176923136 }, { "epoch": 0.93, "learning_rate": 3.767150766268154e-05, "loss": 0.0674, "theoretical_loss": 3.3347154766110716, "tokens_seen": 3176923136 }, { "epoch": 0.93, "learning_rate": 3.7631388911177086e-05, "loss": 0.064, "theoretical_loss": 3.3347051105775787, "tokens_seen": 3177054208 }, { "epoch": 0.93, "learning_rate": 3.7591270159672634e-05, "loss": 0.0641, "theoretical_loss": 3.3346947450914755, "tokens_seen": 3177185280 }, { "epoch": 0.93, "learning_rate": 3.7551151408168175e-05, "loss": 0.0666, "theoretical_loss": 3.33468438015271, "tokens_seen": 3177316352 }, { "epoch": 0.93, "learning_rate": 3.7511032656663724e-05, "loss": 0.0661, "theoretical_loss": 3.334674015761231, "tokens_seen": 3177447424 }, { "epoch": 0.93, "learning_rate": 3.747091390515927e-05, "loss": 0.0637, "theoretical_loss": 3.334663651916987, "tokens_seen": 3177578496 }, { "epoch": 0.93, "learning_rate": 3.743079515365482e-05, "loss": 0.0611, "theoretical_loss": 3.3346532886199265, "tokens_seen": 3177709568 }, { "epoch": 0.93, "learning_rate": 3.739067640215037e-05, "loss": 0.0628, "theoretical_loss": 3.334642925869998, "tokens_seen": 3177840640 }, { "epoch": 0.93, "learning_rate": 3.7350557650645916e-05, "loss": 0.0666, "theoretical_loss": 3.3346325636671503, "tokens_seen": 3177971712 }, { "epoch": 0.93, "learning_rate": 3.731043889914146e-05, "loss": 0.0617, "theoretical_loss": 3.3346222020113316, "tokens_seen": 3178102784 }, { "epoch": 0.93, "learning_rate": 3.7270320147637005e-05, "loss": 0.0636, "theoretical_loss": 3.334611840902491, "tokens_seen": 3178233856 }, { "epoch": 0.93, "learning_rate": 3.723020139613255e-05, "loss": 0.0637, "theoretical_loss": 3.3346014803405764, "tokens_seen": 3178364928 }, { "epoch": 0.93, "learning_rate": 3.71900826446281e-05, "loss": 0.0675, "theoretical_loss": 3.3345911203255367, "tokens_seen": 3178496000 }, { "epoch": 0.93, "learning_rate": 3.714996389312365e-05, "loss": 0.0662, "theoretical_loss": 3.3345807608573206, "tokens_seen": 3178627072 }, { "epoch": 0.93, "learning_rate": 3.710984514161919e-05, "loss": 0.0615, "theoretical_loss": 3.334570401935877, "tokens_seen": 3178758144 }, { "epoch": 0.93, "learning_rate": 3.706972639011474e-05, "loss": 0.0655, "theoretical_loss": 3.334560043561153, "tokens_seen": 3178889216 }, { "epoch": 0.93, "learning_rate": 3.702960763861029e-05, "loss": 0.0664, "theoretical_loss": 3.334549685733099, "tokens_seen": 3179020288 }, { "epoch": 0.93, "learning_rate": 3.6989488887105835e-05, "loss": 0.0667, "theoretical_loss": 3.3345393284516627, "tokens_seen": 3179151360 }, { "epoch": 0.93, "learning_rate": 3.694937013560138e-05, "loss": 0.0659, "theoretical_loss": 3.334528971716793, "tokens_seen": 3179282432 }, { "epoch": 0.93, "learning_rate": 3.6909251384096924e-05, "loss": 0.0619, "theoretical_loss": 3.334518615528438, "tokens_seen": 3179413504 }, { "epoch": 0.93, "learning_rate": 3.686913263259247e-05, "loss": 0.0652, "theoretical_loss": 3.3345082598865474, "tokens_seen": 3179544576 }, { "epoch": 0.93, "learning_rate": 3.682901388108802e-05, "loss": 0.066, "theoretical_loss": 3.334497904791069, "tokens_seen": 3179675648 }, { "epoch": 0.93, "learning_rate": 3.678889512958357e-05, "loss": 0.0626, "theoretical_loss": 3.3344875502419513, "tokens_seen": 3179806720 }, { "epoch": 0.93, "learning_rate": 3.6748776378079116e-05, "loss": 0.0651, "theoretical_loss": 3.3344771962391437, "tokens_seen": 3179937792 }, { "epoch": 0.93, "learning_rate": 3.670865762657466e-05, "loss": 0.0655, "theoretical_loss": 3.3344668427825943, "tokens_seen": 3180068864 }, { "epoch": 0.93, "objective/train/advantage_avg": 0.00013871349801775068, "objective/train/docs_used": 1155539, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2456732988357544, "objective/train/original_loss": 1.2456732988357544, "objective/train/theoretical_loss": 3.334456489872252, "objective/train/tokens_used": 1550724576, "objective/train/value_avg": -0.00746917724609375, "objective/train/value_loss": 0.00028417675639502704, "objective/train/value_max": -1.3113021850585938e-05, "objective/train/value_min": -0.33447265625, "objective/train/value_reward_corr": 0.6574814075489908, "objective/train/value_std": 0.014801025390625, "objective/train/weight_avg": 1.0002650022506714, "objective/train/weighted_lm_loss": 1.2451051473617554, "objective/train/weights_max": 1.29947829246521, "objective/train/weights_min": 0.37699440121650696, "theoretical_loss": 3.334456489872252, "tokens_seen": 3180199936 }, { "epoch": 0.93, "learning_rate": 3.666853887507021e-05, "loss": 0.0628, "theoretical_loss": 3.334456489872252, "tokens_seen": 3180199936 }, { "epoch": 0.93, "learning_rate": 3.662842012356576e-05, "loss": 0.0643, "theoretical_loss": 3.334446137508065, "tokens_seen": 3180331008 }, { "epoch": 0.93, "learning_rate": 3.65883013720613e-05, "loss": 0.0603, "theoretical_loss": 3.334435785689983, "tokens_seen": 3180462080 }, { "epoch": 0.93, "learning_rate": 3.654818262055685e-05, "loss": 0.0668, "theoretical_loss": 3.3344254344179536, "tokens_seen": 3180593152 }, { "epoch": 0.93, "learning_rate": 3.650806386905239e-05, "loss": 0.0667, "theoretical_loss": 3.334415083691926, "tokens_seen": 3180724224 }, { "epoch": 0.93, "learning_rate": 3.6467945117547946e-05, "loss": 0.0671, "theoretical_loss": 3.334404733511849, "tokens_seen": 3180855296 }, { "epoch": 0.93, "learning_rate": 3.6427826366043494e-05, "loss": 0.0652, "theoretical_loss": 3.3343943838776715, "tokens_seen": 3180986368 }, { "epoch": 0.93, "learning_rate": 3.6387707614539035e-05, "loss": 0.0652, "theoretical_loss": 3.334384034789341, "tokens_seen": 3181117440 }, { "epoch": 0.93, "learning_rate": 3.634758886303458e-05, "loss": 0.0654, "theoretical_loss": 3.334373686246808, "tokens_seen": 3181248512 }, { "epoch": 0.93, "learning_rate": 3.6307470111530125e-05, "loss": 0.0655, "theoretical_loss": 3.33436333825002, "tokens_seen": 3181379584 }, { "epoch": 0.93, "learning_rate": 3.626735136002568e-05, "loss": 0.0654, "theoretical_loss": 3.3343529907989264, "tokens_seen": 3181510656 }, { "epoch": 0.93, "learning_rate": 3.622723260852123e-05, "loss": 0.0649, "theoretical_loss": 3.334342643893475, "tokens_seen": 3181641728 }, { "epoch": 0.93, "learning_rate": 3.618711385701677e-05, "loss": 0.0651, "theoretical_loss": 3.3343322975336154, "tokens_seen": 3181772800 }, { "epoch": 0.93, "learning_rate": 3.614699510551232e-05, "loss": 0.0639, "theoretical_loss": 3.3343219517192964, "tokens_seen": 3181903872 }, { "epoch": 0.93, "learning_rate": 3.610687635400786e-05, "loss": 0.0655, "theoretical_loss": 3.3343116064504663, "tokens_seen": 3182034944 }, { "epoch": 0.93, "learning_rate": 3.606675760250341e-05, "loss": 0.0648, "theoretical_loss": 3.334301261727074, "tokens_seen": 3182166016 }, { "epoch": 0.93, "learning_rate": 3.602663885099896e-05, "loss": 0.0647, "theoretical_loss": 3.3342909175490685, "tokens_seen": 3182297088 }, { "epoch": 0.93, "learning_rate": 3.59865200994945e-05, "loss": 0.0607, "theoretical_loss": 3.3342805739163985, "tokens_seen": 3182428160 }, { "epoch": 0.93, "learning_rate": 3.594640134799005e-05, "loss": 0.0678, "theoretical_loss": 3.3342702308290124, "tokens_seen": 3182559232 }, { "epoch": 0.93, "learning_rate": 3.59062825964856e-05, "loss": 0.0649, "theoretical_loss": 3.3342598882868595, "tokens_seen": 3182690304 }, { "epoch": 0.93, "learning_rate": 3.5866163844981147e-05, "loss": 0.0638, "theoretical_loss": 3.3342495462898882, "tokens_seen": 3182821376 }, { "epoch": 0.93, "learning_rate": 3.5826045093476695e-05, "loss": 0.0619, "theoretical_loss": 3.334239204838048, "tokens_seen": 3182952448 }, { "epoch": 0.93, "learning_rate": 3.5785926341972236e-05, "loss": 0.0674, "theoretical_loss": 3.334228863931287, "tokens_seen": 3183083520 }, { "epoch": 0.93, "learning_rate": 3.5745807590467784e-05, "loss": 0.0639, "theoretical_loss": 3.3342185235695543, "tokens_seen": 3183214592 }, { "epoch": 0.93, "learning_rate": 3.570568883896333e-05, "loss": 0.0642, "theoretical_loss": 3.3342081837527986, "tokens_seen": 3183345664 }, { "epoch": 0.93, "objective/train/advantage_avg": -0.00016063837392721325, "objective/train/docs_used": 1156704, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.230084776878357, "objective/train/original_loss": 1.2300846576690674, "objective/train/theoretical_loss": 3.334197844480969, "objective/train/tokens_used": 1554001376, "objective/train/value_avg": -0.005016326904296875, "objective/train/value_loss": 0.00011600170546444133, "objective/train/value_max": -3.319978713989258e-05, "objective/train/value_min": -0.5458984375, "objective/train/value_reward_corr": 0.758850845781315, "objective/train/value_std": 0.0106353759765625, "objective/train/weight_avg": 0.9998940229415894, "objective/train/weighted_lm_loss": 1.23052978515625, "objective/train/weights_max": 1.150712013244629, "objective/train/weights_min": 0.5409542918205261, "theoretical_loss": 3.334197844480969, "tokens_seen": 3183476736 }, { "epoch": 0.93, "learning_rate": 3.566557008745888e-05, "loss": 0.0659, "theoretical_loss": 3.334197844480969, "tokens_seen": 3183476736 }, { "epoch": 0.93, "learning_rate": 3.562545133595443e-05, "loss": 0.0659, "theoretical_loss": 3.334187505754014, "tokens_seen": 3183607808 }, { "epoch": 0.93, "learning_rate": 3.558533258444997e-05, "loss": 0.0642, "theoretical_loss": 3.334177167571883, "tokens_seen": 3183738880 }, { "epoch": 0.93, "learning_rate": 3.554521383294552e-05, "loss": 0.0644, "theoretical_loss": 3.3341668299345244, "tokens_seen": 3183869952 }, { "epoch": 0.93, "learning_rate": 3.5505095081441066e-05, "loss": 0.0635, "theoretical_loss": 3.334156492841887, "tokens_seen": 3184001024 }, { "epoch": 0.93, "learning_rate": 3.5464976329936614e-05, "loss": 0.0655, "theoretical_loss": 3.3341461562939196, "tokens_seen": 3184132096 }, { "epoch": 0.93, "learning_rate": 3.542485757843216e-05, "loss": 0.0644, "theoretical_loss": 3.3341358202905718, "tokens_seen": 3184263168 }, { "epoch": 0.93, "learning_rate": 3.538473882692771e-05, "loss": 0.0644, "theoretical_loss": 3.334125484831792, "tokens_seen": 3184394240 }, { "epoch": 0.93, "learning_rate": 3.534462007542325e-05, "loss": 0.0668, "theoretical_loss": 3.334115149917529, "tokens_seen": 3184525312 }, { "epoch": 0.93, "learning_rate": 3.53045013239188e-05, "loss": 0.0623, "theoretical_loss": 3.334104815547732, "tokens_seen": 3184656384 }, { "epoch": 0.93, "learning_rate": 3.526438257241435e-05, "loss": 0.0646, "theoretical_loss": 3.3340944817223495, "tokens_seen": 3184787456 }, { "epoch": 0.93, "learning_rate": 3.5224263820909895e-05, "loss": 0.0633, "theoretical_loss": 3.334084148441331, "tokens_seen": 3184918528 }, { "epoch": 0.93, "learning_rate": 3.518414506940544e-05, "loss": 0.0631, "theoretical_loss": 3.334073815704625, "tokens_seen": 3185049600 }, { "epoch": 0.93, "learning_rate": 3.5144026317900984e-05, "loss": 0.0662, "theoretical_loss": 3.33406348351218, "tokens_seen": 3185180672 }, { "epoch": 0.93, "learning_rate": 3.510390756639653e-05, "loss": 0.0672, "theoretical_loss": 3.3340531518639462, "tokens_seen": 3185311744 }, { "epoch": 0.93, "learning_rate": 3.506378881489208e-05, "loss": 0.0645, "theoretical_loss": 3.3340428207598714, "tokens_seen": 3185442816 }, { "epoch": 0.93, "learning_rate": 3.502367006338763e-05, "loss": 0.0612, "theoretical_loss": 3.3340324901999048, "tokens_seen": 3185573888 }, { "epoch": 0.93, "learning_rate": 3.498355131188318e-05, "loss": 0.0659, "theoretical_loss": 3.334022160183996, "tokens_seen": 3185704960 }, { "epoch": 0.93, "learning_rate": 3.494343256037872e-05, "loss": 0.064, "theoretical_loss": 3.3340118307120927, "tokens_seen": 3185836032 }, { "epoch": 0.93, "learning_rate": 3.4903313808874266e-05, "loss": 0.0646, "theoretical_loss": 3.334001501784145, "tokens_seen": 3185967104 }, { "epoch": 0.93, "learning_rate": 3.486319505736982e-05, "loss": 0.0625, "theoretical_loss": 3.3339911734001015, "tokens_seen": 3186098176 }, { "epoch": 0.93, "learning_rate": 3.482307630586536e-05, "loss": 0.0633, "theoretical_loss": 3.3339808455599114, "tokens_seen": 3186229248 }, { "epoch": 0.93, "learning_rate": 3.478295755436091e-05, "loss": 0.0637, "theoretical_loss": 3.3339705182635235, "tokens_seen": 3186360320 }, { "epoch": 0.93, "learning_rate": 3.474283880285645e-05, "loss": 0.0633, "theoretical_loss": 3.3339601915108865, "tokens_seen": 3186491392 }, { "epoch": 0.93, "learning_rate": 3.4702720051352006e-05, "loss": 0.0633, "theoretical_loss": 3.3339498653019497, "tokens_seen": 3186622464 }, { "epoch": 0.93, "objective/train/advantage_avg": 7.25181826055632e-06, "objective/train/docs_used": 1157890, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2783713340759277, "objective/train/original_loss": 1.2783712148666382, "objective/train/theoretical_loss": 3.3339395396366625, "objective/train/tokens_used": 1557278176, "objective/train/value_avg": -0.0081939697265625, "objective/train/value_loss": 0.00019008590606972575, "objective/train/value_max": -3.3974647521972656e-05, "objective/train/value_min": -0.474609375, "objective/train/value_reward_corr": 0.6885028968294706, "objective/train/value_std": 0.0141754150390625, "objective/train/weight_avg": 1.000093936920166, "objective/train/weighted_lm_loss": 1.2782567739486694, "objective/train/weights_max": 1.454423189163208, "objective/train/weights_min": 0.36904817819595337, "theoretical_loss": 3.3339395396366625, "tokens_seen": 3186753536 }, { "epoch": 0.93, "learning_rate": 3.4662601299847554e-05, "loss": 0.0613, "theoretical_loss": 3.3339395396366625, "tokens_seen": 3186753536 }, { "epoch": 0.93, "learning_rate": 3.4622482548343096e-05, "loss": 0.0633, "theoretical_loss": 3.3339292145149733, "tokens_seen": 3186884608 }, { "epoch": 0.93, "learning_rate": 3.4582363796838644e-05, "loss": 0.0654, "theoretical_loss": 3.333918889936831, "tokens_seen": 3187015680 }, { "epoch": 0.93, "learning_rate": 3.4542245045334185e-05, "loss": 0.0659, "theoretical_loss": 3.3339085659021857, "tokens_seen": 3187146752 }, { "epoch": 0.93, "learning_rate": 3.450212629382974e-05, "loss": 0.0618, "theoretical_loss": 3.333898242410985, "tokens_seen": 3187277824 }, { "epoch": 0.93, "learning_rate": 3.446200754232529e-05, "loss": 0.0663, "theoretical_loss": 3.333887919463179, "tokens_seen": 3187408896 }, { "epoch": 0.93, "learning_rate": 3.442188879082083e-05, "loss": 0.0648, "theoretical_loss": 3.3338775970587164, "tokens_seen": 3187539968 }, { "epoch": 0.93, "learning_rate": 3.438177003931638e-05, "loss": 0.0669, "theoretical_loss": 3.333867275197546, "tokens_seen": 3187671040 }, { "epoch": 0.93, "learning_rate": 3.434165128781192e-05, "loss": 0.0657, "theoretical_loss": 3.333856953879618, "tokens_seen": 3187802112 }, { "epoch": 0.93, "learning_rate": 3.4301532536307473e-05, "loss": 0.0677, "theoretical_loss": 3.33384663310488, "tokens_seen": 3187933184 }, { "epoch": 0.93, "learning_rate": 3.426141378480302e-05, "loss": 0.0629, "theoretical_loss": 3.3338363128732817, "tokens_seen": 3188064256 }, { "epoch": 0.93, "learning_rate": 3.422129503329856e-05, "loss": 0.0644, "theoretical_loss": 3.3338259931847727, "tokens_seen": 3188195328 }, { "epoch": 0.93, "learning_rate": 3.418117628179411e-05, "loss": 0.0624, "theoretical_loss": 3.3338156740393012, "tokens_seen": 3188326400 }, { "epoch": 0.93, "learning_rate": 3.414105753028966e-05, "loss": 0.0654, "theoretical_loss": 3.3338053554368163, "tokens_seen": 3188457472 }, { "epoch": 0.93, "learning_rate": 3.410093877878521e-05, "loss": 0.0639, "theoretical_loss": 3.333795037377268, "tokens_seen": 3188588544 }, { "epoch": 0.93, "learning_rate": 3.4060820027280755e-05, "loss": 0.0634, "theoretical_loss": 3.333784719860605, "tokens_seen": 3188719616 }, { "epoch": 0.93, "learning_rate": 3.4020701275776296e-05, "loss": 0.0665, "theoretical_loss": 3.333774402886776, "tokens_seen": 3188850688 }, { "epoch": 0.93, "learning_rate": 3.3980582524271844e-05, "loss": 0.0616, "theoretical_loss": 3.333764086455731, "tokens_seen": 3188981760 }, { "epoch": 0.93, "learning_rate": 3.394046377276739e-05, "loss": 0.0622, "theoretical_loss": 3.333753770567418, "tokens_seen": 3189112832 }, { "epoch": 0.93, "learning_rate": 3.390034502126294e-05, "loss": 0.0609, "theoretical_loss": 3.333743455221787, "tokens_seen": 3189243904 }, { "epoch": 0.93, "learning_rate": 3.386022626975849e-05, "loss": 0.0655, "theoretical_loss": 3.333733140418787, "tokens_seen": 3189374976 }, { "epoch": 0.93, "learning_rate": 3.382010751825403e-05, "loss": 0.0636, "theoretical_loss": 3.333722826158367, "tokens_seen": 3189506048 }, { "epoch": 0.93, "learning_rate": 3.377998876674958e-05, "loss": 0.0652, "theoretical_loss": 3.333712512440476, "tokens_seen": 3189637120 }, { "epoch": 0.93, "learning_rate": 3.3739870015245126e-05, "loss": 0.0676, "theoretical_loss": 3.3337021992650637, "tokens_seen": 3189768192 }, { "epoch": 0.93, "learning_rate": 3.3699751263740674e-05, "loss": 0.065, "theoretical_loss": 3.333691886632079, "tokens_seen": 3189899264 }, { "epoch": 0.93, "objective/train/advantage_avg": 0.000974375638179481, "objective/train/docs_used": 1159129, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.5308678150177002, "objective/train/original_loss": 1.5308678150177002, "objective/train/theoretical_loss": 3.3336815745414707, "objective/train/tokens_used": 1560554976, "objective/train/value_avg": -0.00946044921875, "objective/train/value_loss": 0.0003855634422507137, "objective/train/value_max": -3.820657730102539e-05, "objective/train/value_min": -0.67041015625, "objective/train/value_reward_corr": 0.685462365296502, "objective/train/value_std": 0.020294189453125, "objective/train/weight_avg": 1.001145601272583, "objective/train/weighted_lm_loss": 1.5322003364562988, "objective/train/weights_max": 1.5188086032867432, "objective/train/weights_min": 0.3770807087421417, "theoretical_loss": 3.3336815745414707, "tokens_seen": 3190030336 }, { "epoch": 0.93, "learning_rate": 3.365963251223622e-05, "loss": 0.0675, "theoretical_loss": 3.3336815745414707, "tokens_seen": 3190030336 }, { "epoch": 0.93, "learning_rate": 3.361951376073176e-05, "loss": 0.0656, "theoretical_loss": 3.333671262993189, "tokens_seen": 3190161408 }, { "epoch": 0.93, "learning_rate": 3.357939500922731e-05, "loss": 0.0636, "theoretical_loss": 3.3336609519871816, "tokens_seen": 3190292480 }, { "epoch": 0.93, "learning_rate": 3.353927625772286e-05, "loss": 0.0661, "theoretical_loss": 3.333650641523399, "tokens_seen": 3190423552 }, { "epoch": 0.93, "learning_rate": 3.349915750621841e-05, "loss": 0.0645, "theoretical_loss": 3.33364033160179, "tokens_seen": 3190554624 }, { "epoch": 0.93, "learning_rate": 3.3459038754713956e-05, "loss": 0.0671, "theoretical_loss": 3.3336300222223034, "tokens_seen": 3190685696 }, { "epoch": 0.93, "learning_rate": 3.3418920003209504e-05, "loss": 0.063, "theoretical_loss": 3.3336197133848895, "tokens_seen": 3190816768 }, { "epoch": 0.93, "learning_rate": 3.3378801251705045e-05, "loss": 0.0644, "theoretical_loss": 3.333609405089496, "tokens_seen": 3190947840 }, { "epoch": 0.93, "learning_rate": 3.333868250020059e-05, "loss": 0.0644, "theoretical_loss": 3.333599097336074, "tokens_seen": 3191078912 }, { "epoch": 0.93, "learning_rate": 3.329856374869614e-05, "loss": 0.0641, "theoretical_loss": 3.333588790124571, "tokens_seen": 3191209984 }, { "epoch": 0.93, "learning_rate": 3.325844499719169e-05, "loss": 0.0639, "theoretical_loss": 3.3335784834549367, "tokens_seen": 3191341056 }, { "epoch": 0.93, "learning_rate": 3.321832624568724e-05, "loss": 0.0658, "theoretical_loss": 3.333568177327121, "tokens_seen": 3191472128 }, { "epoch": 0.93, "learning_rate": 3.317820749418278e-05, "loss": 0.0709, "theoretical_loss": 3.333557871741073, "tokens_seen": 3191603200 }, { "epoch": 0.93, "learning_rate": 3.3138088742678326e-05, "loss": 0.0661, "theoretical_loss": 3.333547566696742, "tokens_seen": 3191734272 }, { "epoch": 0.93, "learning_rate": 3.3097969991173875e-05, "loss": 0.0639, "theoretical_loss": 3.3335372621940764, "tokens_seen": 3191865344 }, { "epoch": 0.93, "learning_rate": 3.305785123966942e-05, "loss": 0.0636, "theoretical_loss": 3.3335269582330262, "tokens_seen": 3191996416 }, { "epoch": 0.93, "learning_rate": 3.301773248816497e-05, "loss": 0.063, "theoretical_loss": 3.333516654813541, "tokens_seen": 3192127488 }, { "epoch": 0.93, "learning_rate": 3.297761373666051e-05, "loss": 0.0667, "theoretical_loss": 3.3335063519355694, "tokens_seen": 3192258560 }, { "epoch": 0.93, "learning_rate": 3.293749498515606e-05, "loss": 0.0595, "theoretical_loss": 3.3334960495990615, "tokens_seen": 3192389632 }, { "epoch": 0.93, "learning_rate": 3.2897376233651615e-05, "loss": 0.0671, "theoretical_loss": 3.333485747803966, "tokens_seen": 3192520704 }, { "epoch": 0.93, "learning_rate": 3.2857257482147156e-05, "loss": 0.0642, "theoretical_loss": 3.3334754465502323, "tokens_seen": 3192651776 }, { "epoch": 0.94, "learning_rate": 3.2817138730642704e-05, "loss": 0.0636, "theoretical_loss": 3.3334651458378097, "tokens_seen": 3192782848 }, { "epoch": 0.94, "learning_rate": 3.2777019979138245e-05, "loss": 0.0615, "theoretical_loss": 3.3334548456666475, "tokens_seen": 3192913920 }, { "epoch": 0.94, "learning_rate": 3.27369012276338e-05, "loss": 0.0598, "theoretical_loss": 3.3334445460366955, "tokens_seen": 3193044992 }, { "epoch": 0.94, "learning_rate": 3.269678247612935e-05, "loss": 0.0645, "theoretical_loss": 3.3334342469479026, "tokens_seen": 3193176064 }, { "epoch": 0.94, "objective/train/advantage_avg": 0.000863240915350616, "objective/train/docs_used": 1160256, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3098801374435425, "objective/train/original_loss": 1.309880018234253, "objective/train/theoretical_loss": 3.333423948400218, "objective/train/tokens_used": 1563831776, "objective/train/value_avg": -0.006763458251953125, "objective/train/value_loss": 0.00017729404498822987, "objective/train/value_max": -3.218650817871094e-05, "objective/train/value_min": -0.6640625, "objective/train/value_reward_corr": 0.7446319175958788, "objective/train/value_std": 0.01413726806640625, "objective/train/weight_avg": 1.0009465217590332, "objective/train/weighted_lm_loss": 1.311232089996338, "objective/train/weights_max": 1.8627805709838867, "objective/train/weights_min": 0.4655543565750122, "theoretical_loss": 3.333423948400218, "tokens_seen": 3193307136 }, { "epoch": 0.94, "learning_rate": 3.265666372462489e-05, "loss": 0.064, "theoretical_loss": 3.333423948400218, "tokens_seen": 3193307136 }, { "epoch": 0.94, "learning_rate": 3.261654497312044e-05, "loss": 0.062, "theoretical_loss": 3.3334136503935916, "tokens_seen": 3193438208 }, { "epoch": 0.94, "learning_rate": 3.257642622161598e-05, "loss": 0.0672, "theoretical_loss": 3.3334033529279727, "tokens_seen": 3193569280 }, { "epoch": 0.94, "learning_rate": 3.2536307470111534e-05, "loss": 0.0681, "theoretical_loss": 3.3333930560033105, "tokens_seen": 3193700352 }, { "epoch": 0.94, "learning_rate": 3.249618871860708e-05, "loss": 0.0623, "theoretical_loss": 3.333382759619554, "tokens_seen": 3193831424 }, { "epoch": 0.94, "learning_rate": 3.245606996710262e-05, "loss": 0.0625, "theoretical_loss": 3.3333724637766533, "tokens_seen": 3193962496 }, { "epoch": 0.94, "learning_rate": 3.241595121559817e-05, "loss": 0.0661, "theoretical_loss": 3.3333621684745576, "tokens_seen": 3194093568 }, { "epoch": 0.94, "learning_rate": 3.237583246409371e-05, "loss": 0.0646, "theoretical_loss": 3.333351873713216, "tokens_seen": 3194224640 }, { "epoch": 0.94, "learning_rate": 3.233571371258927e-05, "loss": 0.0661, "theoretical_loss": 3.333341579492578, "tokens_seen": 3194355712 }, { "epoch": 0.94, "learning_rate": 3.2295594961084815e-05, "loss": 0.0616, "theoretical_loss": 3.333331285812593, "tokens_seen": 3194486784 }, { "epoch": 0.94, "learning_rate": 3.225547620958036e-05, "loss": 0.0626, "theoretical_loss": 3.3333209926732104, "tokens_seen": 3194617856 }, { "epoch": 0.94, "learning_rate": 3.2215357458075905e-05, "loss": 0.065, "theoretical_loss": 3.33331070007438, "tokens_seen": 3194748928 }, { "epoch": 0.94, "learning_rate": 3.217523870657145e-05, "loss": 0.0658, "theoretical_loss": 3.3333004080160507, "tokens_seen": 3194880000 }, { "epoch": 0.94, "learning_rate": 3.2135119955067e-05, "loss": 0.0624, "theoretical_loss": 3.3332901164981728, "tokens_seen": 3195011072 }, { "epoch": 0.94, "learning_rate": 3.209500120356255e-05, "loss": 0.062, "theoretical_loss": 3.3332798255206946, "tokens_seen": 3195142144 }, { "epoch": 0.94, "learning_rate": 3.205488245205809e-05, "loss": 0.0651, "theoretical_loss": 3.3332695350835664, "tokens_seen": 3195273216 }, { "epoch": 0.94, "learning_rate": 3.201476370055364e-05, "loss": 0.0627, "theoretical_loss": 3.333259245186737, "tokens_seen": 3195404288 }, { "epoch": 0.94, "learning_rate": 3.1974644949049186e-05, "loss": 0.0654, "theoretical_loss": 3.333248955830157, "tokens_seen": 3195535360 }, { "epoch": 0.94, "learning_rate": 3.1934526197544734e-05, "loss": 0.0605, "theoretical_loss": 3.3332386670137746, "tokens_seen": 3195666432 }, { "epoch": 0.94, "learning_rate": 3.189440744604028e-05, "loss": 0.0657, "theoretical_loss": 3.33322837873754, "tokens_seen": 3195797504 }, { "epoch": 0.94, "learning_rate": 3.1854288694535824e-05, "loss": 0.0653, "theoretical_loss": 3.3332180910014024, "tokens_seen": 3195928576 }, { "epoch": 0.94, "learning_rate": 3.181416994303137e-05, "loss": 0.0628, "theoretical_loss": 3.3332078038053115, "tokens_seen": 3196059648 }, { "epoch": 0.94, "learning_rate": 3.177405119152692e-05, "loss": 0.0654, "theoretical_loss": 3.333197517149217, "tokens_seen": 3196190720 }, { "epoch": 0.94, "learning_rate": 3.173393244002247e-05, "loss": 0.0607, "theoretical_loss": 3.3331872310330675, "tokens_seen": 3196321792 }, { "epoch": 0.94, "learning_rate": 3.1693813688518016e-05, "loss": 0.0629, "theoretical_loss": 3.3331769454568136, "tokens_seen": 3196452864 }, { "epoch": 0.94, "objective/train/advantage_avg": 0.00042727382970042527, "objective/train/docs_used": 1161393, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.253743290901184, "objective/train/original_loss": 1.2537431716918945, "objective/train/theoretical_loss": 3.333166660420404, "objective/train/tokens_used": 1567108576, "objective/train/value_avg": -0.005710601806640625, "objective/train/value_loss": 0.00014094072685111314, "objective/train/value_max": -5.143880844116211e-05, "objective/train/value_min": -0.2095947265625, "objective/train/value_reward_corr": 0.6463890238384815, "objective/train/value_std": 0.00963592529296875, "objective/train/weight_avg": 1.000489354133606, "objective/train/weighted_lm_loss": 1.2542307376861572, "objective/train/weights_max": 1.1178876161575317, "objective/train/weights_min": 0.3696194887161255, "theoretical_loss": 3.333166660420404, "tokens_seen": 3196583936 }, { "epoch": 0.94, "learning_rate": 3.1653694937013564e-05, "loss": 0.0648, "theoretical_loss": 3.333166660420404, "tokens_seen": 3196583936 }, { "epoch": 0.94, "learning_rate": 3.1613576185509105e-05, "loss": 0.0604, "theoretical_loss": 3.333156375923789, "tokens_seen": 3196715008 }, { "epoch": 0.94, "learning_rate": 3.157345743400465e-05, "loss": 0.0607, "theoretical_loss": 3.333146091966918, "tokens_seen": 3196846080 }, { "epoch": 0.94, "learning_rate": 3.15333386825002e-05, "loss": 0.0621, "theoretical_loss": 3.3331358085497396, "tokens_seen": 3196977152 }, { "epoch": 0.94, "learning_rate": 3.149321993099575e-05, "loss": 0.065, "theoretical_loss": 3.3331255256722043, "tokens_seen": 3197108224 }, { "epoch": 0.94, "learning_rate": 3.14531011794913e-05, "loss": 0.0626, "theoretical_loss": 3.3331152433342615, "tokens_seen": 3197239296 }, { "epoch": 0.94, "learning_rate": 3.141298242798684e-05, "loss": 0.0642, "theoretical_loss": 3.3331049615358608, "tokens_seen": 3197370368 }, { "epoch": 0.94, "learning_rate": 3.137286367648239e-05, "loss": 0.0638, "theoretical_loss": 3.333094680276951, "tokens_seen": 3197501440 }, { "epoch": 0.94, "learning_rate": 3.1332744924977935e-05, "loss": 0.0634, "theoretical_loss": 3.333084399557483, "tokens_seen": 3197632512 }, { "epoch": 0.94, "learning_rate": 3.129262617347348e-05, "loss": 0.0649, "theoretical_loss": 3.3330741193774056, "tokens_seen": 3197763584 }, { "epoch": 0.94, "learning_rate": 3.125250742196903e-05, "loss": 0.0646, "theoretical_loss": 3.3330638397366683, "tokens_seen": 3197894656 }, { "epoch": 0.94, "learning_rate": 3.121238867046458e-05, "loss": 0.0641, "theoretical_loss": 3.333053560635221, "tokens_seen": 3198025728 }, { "epoch": 0.94, "learning_rate": 3.117226991896012e-05, "loss": 0.0613, "theoretical_loss": 3.3330432820730134, "tokens_seen": 3198156800 }, { "epoch": 0.94, "learning_rate": 3.113215116745567e-05, "loss": 0.0612, "theoretical_loss": 3.3330330040499945, "tokens_seen": 3198287872 }, { "epoch": 0.94, "learning_rate": 3.1092032415951216e-05, "loss": 0.0663, "theoretical_loss": 3.3330227265661145, "tokens_seen": 3198418944 }, { "epoch": 0.94, "learning_rate": 3.1051913664446765e-05, "loss": 0.062, "theoretical_loss": 3.3330124496213234, "tokens_seen": 3198550016 }, { "epoch": 0.94, "learning_rate": 3.101179491294231e-05, "loss": 0.0652, "theoretical_loss": 3.3330021732155695, "tokens_seen": 3198681088 }, { "epoch": 0.94, "learning_rate": 3.0971676161437854e-05, "loss": 0.0627, "theoretical_loss": 3.332991897348804, "tokens_seen": 3198812160 }, { "epoch": 0.94, "learning_rate": 3.09315574099334e-05, "loss": 0.0603, "theoretical_loss": 3.332981622020975, "tokens_seen": 3198943232 }, { "epoch": 0.94, "learning_rate": 3.089143865842895e-05, "loss": 0.061, "theoretical_loss": 3.3329713472320335, "tokens_seen": 3199074304 }, { "epoch": 0.94, "learning_rate": 3.08513199069245e-05, "loss": 0.0649, "theoretical_loss": 3.332961072981928, "tokens_seen": 3199205376 }, { "epoch": 0.94, "learning_rate": 3.0811201155420046e-05, "loss": 0.066, "theoretical_loss": 3.3329507992706096, "tokens_seen": 3199336448 }, { "epoch": 0.94, "learning_rate": 3.0771082403915594e-05, "loss": 0.066, "theoretical_loss": 3.3329405260980267, "tokens_seen": 3199467520 }, { "epoch": 0.94, "learning_rate": 3.0730963652411135e-05, "loss": 0.0655, "theoretical_loss": 3.3329302534641294, "tokens_seen": 3199598592 }, { "epoch": 0.94, "learning_rate": 3.0690844900906684e-05, "loss": 0.0618, "theoretical_loss": 3.3329199813688675, "tokens_seen": 3199729664 }, { "epoch": 0.94, "objective/train/advantage_avg": 0.00018824238213710487, "objective/train/docs_used": 1162655, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.175961971282959, "objective/train/original_loss": 1.175961971282959, "objective/train/theoretical_loss": 3.3329097098121907, "objective/train/tokens_used": 1570385376, "objective/train/value_avg": -0.0081329345703125, "objective/train/value_loss": 0.00023304796195589006, "objective/train/value_max": -2.2292137145996094e-05, "objective/train/value_min": -0.6513671875, "objective/train/value_reward_corr": 0.7525685986401239, "objective/train/value_std": 0.0178985595703125, "objective/train/weight_avg": 1.0002979040145874, "objective/train/weighted_lm_loss": 1.175710916519165, "objective/train/weights_max": 1.7872517108917236, "objective/train/weights_min": 0.36827927827835083, "theoretical_loss": 3.3329097098121907, "tokens_seen": 3199860736 }, { "epoch": 0.94, "learning_rate": 3.065072614940223e-05, "loss": 0.0611, "theoretical_loss": 3.3329097098121907, "tokens_seen": 3199860736 }, { "epoch": 0.94, "learning_rate": 3.061060739789778e-05, "loss": 0.0651, "theoretical_loss": 3.3328994387940485, "tokens_seen": 3199991808 }, { "epoch": 0.94, "learning_rate": 3.057048864639333e-05, "loss": 0.0641, "theoretical_loss": 3.3328891683143906, "tokens_seen": 3200122880 }, { "epoch": 0.94, "learning_rate": 3.053036989488887e-05, "loss": 0.0614, "theoretical_loss": 3.332878898373167, "tokens_seen": 3200253952 }, { "epoch": 0.94, "learning_rate": 3.049025114338442e-05, "loss": 0.0634, "theoretical_loss": 3.3328686289703273, "tokens_seen": 3200385024 }, { "epoch": 0.94, "learning_rate": 3.0450132391879965e-05, "loss": 0.0657, "theoretical_loss": 3.332858360105821, "tokens_seen": 3200516096 }, { "epoch": 0.94, "learning_rate": 3.0410013640375513e-05, "loss": 0.0697, "theoretical_loss": 3.332848091779598, "tokens_seen": 3200647168 }, { "epoch": 0.94, "learning_rate": 3.0369894888871058e-05, "loss": 0.0628, "theoretical_loss": 3.3328378239916083, "tokens_seen": 3200778240 }, { "epoch": 0.94, "learning_rate": 3.0329776137366602e-05, "loss": 0.0614, "theoretical_loss": 3.332827556741801, "tokens_seen": 3200909312 }, { "epoch": 0.94, "learning_rate": 3.0289657385862154e-05, "loss": 0.0643, "theoretical_loss": 3.3328172900301265, "tokens_seen": 3201040384 }, { "epoch": 0.94, "learning_rate": 3.02495386343577e-05, "loss": 0.0652, "theoretical_loss": 3.332807023856535, "tokens_seen": 3201171456 }, { "epoch": 0.94, "learning_rate": 3.0209419882853247e-05, "loss": 0.0632, "theoretical_loss": 3.3327967582209745, "tokens_seen": 3201302528 }, { "epoch": 0.94, "learning_rate": 3.016930113134879e-05, "loss": 0.0632, "theoretical_loss": 3.3327864931233964, "tokens_seen": 3201433600 }, { "epoch": 0.94, "learning_rate": 3.0129182379844343e-05, "loss": 0.0599, "theoretical_loss": 3.33277622856375, "tokens_seen": 3201564672 }, { "epoch": 0.94, "learning_rate": 3.0089063628339887e-05, "loss": 0.0654, "theoretical_loss": 3.3327659645419847, "tokens_seen": 3201695744 }, { "epoch": 0.94, "learning_rate": 3.0048944876835432e-05, "loss": 0.063, "theoretical_loss": 3.332755701058051, "tokens_seen": 3201826816 }, { "epoch": 0.94, "learning_rate": 3.000882612533098e-05, "loss": 0.0623, "theoretical_loss": 3.3327454381118984, "tokens_seen": 3201957888 }, { "epoch": 0.94, "learning_rate": 2.9968707373826528e-05, "loss": 0.0657, "theoretical_loss": 3.332735175703476, "tokens_seen": 3202088960 }, { "epoch": 0.94, "learning_rate": 2.9928588622322076e-05, "loss": 0.0683, "theoretical_loss": 3.332724913832735, "tokens_seen": 3202220032 }, { "epoch": 0.94, "learning_rate": 2.988846987081762e-05, "loss": 0.0646, "theoretical_loss": 3.3327146524996243, "tokens_seen": 3202351104 }, { "epoch": 0.94, "learning_rate": 2.9848351119313166e-05, "loss": 0.0698, "theoretical_loss": 3.3327043917040937, "tokens_seen": 3202482176 }, { "epoch": 0.94, "learning_rate": 2.9808232367808714e-05, "loss": 0.0601, "theoretical_loss": 3.332694131446093, "tokens_seen": 3202613248 }, { "epoch": 0.94, "learning_rate": 2.9768113616304262e-05, "loss": 0.0651, "theoretical_loss": 3.332683871725573, "tokens_seen": 3202744320 }, { "epoch": 0.94, "learning_rate": 2.972799486479981e-05, "loss": 0.0643, "theoretical_loss": 3.3326736125424827, "tokens_seen": 3202875392 }, { "epoch": 0.94, "learning_rate": 2.9687876113295354e-05, "loss": 0.063, "theoretical_loss": 3.332663353896772, "tokens_seen": 3203006464 }, { "epoch": 0.94, "objective/train/advantage_avg": -0.00025193727924488485, "objective/train/docs_used": 1164087, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.1740226745605469, "objective/train/original_loss": 1.1740226745605469, "objective/train/theoretical_loss": 3.3326530957883906, "objective/train/tokens_used": 1573662176, "objective/train/value_avg": -0.0081329345703125, "objective/train/value_loss": 0.0002466995210852474, "objective/train/value_max": -4.13060188293457e-05, "objective/train/value_min": -0.8232421875, "objective/train/value_reward_corr": 0.7333823742371692, "objective/train/value_std": 0.0172882080078125, "objective/train/weight_avg": 0.9998635053634644, "objective/train/weighted_lm_loss": 1.1736947298049927, "objective/train/weights_max": 2.2402234077453613, "objective/train/weights_min": 0.3761841952800751, "theoretical_loss": 3.3326530957883906, "tokens_seen": 3203137536 }, { "epoch": 0.94, "learning_rate": 2.96477573617909e-05, "loss": 0.0609, "theoretical_loss": 3.3326530957883906, "tokens_seen": 3203137536 }, { "epoch": 0.94, "learning_rate": 2.960763861028645e-05, "loss": 0.0643, "theoretical_loss": 3.332642838217289, "tokens_seen": 3203268608 }, { "epoch": 0.94, "learning_rate": 2.9567519858781995e-05, "loss": 0.0628, "theoretical_loss": 3.3326325811834163, "tokens_seen": 3203399680 }, { "epoch": 0.94, "learning_rate": 2.9527401107277543e-05, "loss": 0.0639, "theoretical_loss": 3.332622324686723, "tokens_seen": 3203530752 }, { "epoch": 0.94, "learning_rate": 2.9487282355773088e-05, "loss": 0.0631, "theoretical_loss": 3.332612068727159, "tokens_seen": 3203661824 }, { "epoch": 0.94, "learning_rate": 2.9447163604268633e-05, "loss": 0.0639, "theoretical_loss": 3.3326018133046738, "tokens_seen": 3203792896 }, { "epoch": 0.94, "learning_rate": 2.9407044852764184e-05, "loss": 0.0591, "theoretical_loss": 3.3325915584192174, "tokens_seen": 3203923968 }, { "epoch": 0.94, "learning_rate": 2.936692610125973e-05, "loss": 0.0631, "theoretical_loss": 3.33258130407074, "tokens_seen": 3204055040 }, { "epoch": 0.94, "learning_rate": 2.9326807349755277e-05, "loss": 0.0642, "theoretical_loss": 3.332571050259191, "tokens_seen": 3204186112 }, { "epoch": 0.94, "learning_rate": 2.928668859825082e-05, "loss": 0.0646, "theoretical_loss": 3.332560796984521, "tokens_seen": 3204317184 }, { "epoch": 0.94, "learning_rate": 2.9246569846746373e-05, "loss": 0.0639, "theoretical_loss": 3.3325505442466796, "tokens_seen": 3204448256 }, { "epoch": 0.94, "learning_rate": 2.9206451095241918e-05, "loss": 0.0673, "theoretical_loss": 3.3325402920456164, "tokens_seen": 3204579328 }, { "epoch": 0.94, "learning_rate": 2.9166332343737462e-05, "loss": 0.0625, "theoretical_loss": 3.3325300403812816, "tokens_seen": 3204710400 }, { "epoch": 0.94, "learning_rate": 2.912621359223301e-05, "loss": 0.0636, "theoretical_loss": 3.3325197892536256, "tokens_seen": 3204841472 }, { "epoch": 0.94, "learning_rate": 2.9086094840728555e-05, "loss": 0.0637, "theoretical_loss": 3.332509538662598, "tokens_seen": 3204972544 }, { "epoch": 0.94, "learning_rate": 2.9045976089224106e-05, "loss": 0.0652, "theoretical_loss": 3.3324992886081484, "tokens_seen": 3205103616 }, { "epoch": 0.94, "learning_rate": 2.900585733771965e-05, "loss": 0.0649, "theoretical_loss": 3.332489039090227, "tokens_seen": 3205234688 }, { "epoch": 0.94, "learning_rate": 2.8965738586215196e-05, "loss": 0.065, "theoretical_loss": 3.332478790108784, "tokens_seen": 3205365760 }, { "epoch": 0.94, "learning_rate": 2.8925619834710744e-05, "loss": 0.0628, "theoretical_loss": 3.3324685416637694, "tokens_seen": 3205496832 }, { "epoch": 0.94, "learning_rate": 2.8885501083206292e-05, "loss": 0.0613, "theoretical_loss": 3.332458293755133, "tokens_seen": 3205627904 }, { "epoch": 0.94, "learning_rate": 2.884538233170184e-05, "loss": 0.061, "theoretical_loss": 3.3324480463828245, "tokens_seen": 3205758976 }, { "epoch": 0.94, "learning_rate": 2.8805263580197385e-05, "loss": 0.0629, "theoretical_loss": 3.3324377995467946, "tokens_seen": 3205890048 }, { "epoch": 0.94, "learning_rate": 2.876514482869293e-05, "loss": 0.0633, "theoretical_loss": 3.3324275532469927, "tokens_seen": 3206021120 }, { "epoch": 0.94, "learning_rate": 2.872502607718848e-05, "loss": 0.0621, "theoretical_loss": 3.3324173074833694, "tokens_seen": 3206152192 }, { "epoch": 0.94, "learning_rate": 2.8684907325684025e-05, "loss": 0.0627, "theoretical_loss": 3.332407062255874, "tokens_seen": 3206283264 }, { "epoch": 0.94, "objective/train/advantage_avg": -8.069272007560357e-05, "objective/train/docs_used": 1165309, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.1500324010849, "objective/train/original_loss": 1.1500325202941895, "objective/train/theoretical_loss": 3.332396817564457, "objective/train/tokens_used": 1576938976, "objective/train/value_avg": -0.00815582275390625, "objective/train/value_loss": 0.00017128646140918136, "objective/train/value_max": -3.218650817871094e-05, "objective/train/value_min": -0.318359375, "objective/train/value_reward_corr": 0.7529020675561426, "objective/train/value_std": 0.01531982421875, "objective/train/weight_avg": 0.9999960064888, "objective/train/weighted_lm_loss": 1.149786353111267, "objective/train/weights_max": 1.1518362760543823, "objective/train/weights_min": 0.36847248673439026, "theoretical_loss": 3.332396817564457, "tokens_seen": 3206414336 }, { "epoch": 0.94, "learning_rate": 2.8644788574179574e-05, "loss": 0.0589, "theoretical_loss": 3.332396817564457, "tokens_seen": 3206414336 }, { "epoch": 0.94, "learning_rate": 2.8604669822675118e-05, "loss": 0.06, "theoretical_loss": 3.3323865734090687, "tokens_seen": 3206545408 }, { "epoch": 0.94, "learning_rate": 2.8564551071170663e-05, "loss": 0.0641, "theoretical_loss": 3.3323763297896587, "tokens_seen": 3206676480 }, { "epoch": 0.94, "learning_rate": 2.8524432319666214e-05, "loss": 0.0673, "theoretical_loss": 3.332366086706177, "tokens_seen": 3206807552 }, { "epoch": 0.94, "learning_rate": 2.848431356816176e-05, "loss": 0.0652, "theoretical_loss": 3.332355844158574, "tokens_seen": 3206938624 }, { "epoch": 0.94, "learning_rate": 2.8444194816657307e-05, "loss": 0.0629, "theoretical_loss": 3.3323456021467996, "tokens_seen": 3207069696 }, { "epoch": 0.94, "learning_rate": 2.8404076065152852e-05, "loss": 0.0662, "theoretical_loss": 3.332335360670804, "tokens_seen": 3207200768 }, { "epoch": 0.94, "learning_rate": 2.83639573136484e-05, "loss": 0.0634, "theoretical_loss": 3.3323251197305366, "tokens_seen": 3207331840 }, { "epoch": 0.94, "learning_rate": 2.8323838562143948e-05, "loss": 0.0625, "theoretical_loss": 3.3323148793259483, "tokens_seen": 3207462912 }, { "epoch": 0.94, "learning_rate": 2.8283719810639493e-05, "loss": 0.0614, "theoretical_loss": 3.3323046394569893, "tokens_seen": 3207593984 }, { "epoch": 0.94, "learning_rate": 2.824360105913504e-05, "loss": 0.061, "theoretical_loss": 3.3322944001236086, "tokens_seen": 3207725056 }, { "epoch": 0.94, "learning_rate": 2.8203482307630585e-05, "loss": 0.0582, "theoretical_loss": 3.3322841613257577, "tokens_seen": 3207856128 }, { "epoch": 0.94, "learning_rate": 2.8163363556126137e-05, "loss": 0.0619, "theoretical_loss": 3.3322739230633855, "tokens_seen": 3207987200 }, { "epoch": 0.94, "learning_rate": 2.812324480462168e-05, "loss": 0.0612, "theoretical_loss": 3.332263685336443, "tokens_seen": 3208118272 }, { "epoch": 0.94, "learning_rate": 2.8083126053117226e-05, "loss": 0.0602, "theoretical_loss": 3.3322534481448796, "tokens_seen": 3208249344 }, { "epoch": 0.94, "learning_rate": 2.8043007301612774e-05, "loss": 0.0606, "theoretical_loss": 3.3322432114886458, "tokens_seen": 3208380416 }, { "epoch": 0.94, "learning_rate": 2.8002888550108322e-05, "loss": 0.0622, "theoretical_loss": 3.332232975367692, "tokens_seen": 3208511488 }, { "epoch": 0.94, "learning_rate": 2.796276979860387e-05, "loss": 0.06, "theoretical_loss": 3.332222739781968, "tokens_seen": 3208642560 }, { "epoch": 0.94, "learning_rate": 2.7922651047099415e-05, "loss": 0.0611, "theoretical_loss": 3.332212504731424, "tokens_seen": 3208773632 }, { "epoch": 0.94, "learning_rate": 2.788253229559496e-05, "loss": 0.0615, "theoretical_loss": 3.33220227021601, "tokens_seen": 3208904704 }, { "epoch": 0.94, "learning_rate": 2.7842413544090508e-05, "loss": 0.0643, "theoretical_loss": 3.3321920362356767, "tokens_seen": 3209035776 }, { "epoch": 0.94, "learning_rate": 2.7802294792586056e-05, "loss": 0.0611, "theoretical_loss": 3.332181802790374, "tokens_seen": 3209166848 }, { "epoch": 0.95, "learning_rate": 2.7762176041081604e-05, "loss": 0.0652, "theoretical_loss": 3.332171569880052, "tokens_seen": 3209297920 }, { "epoch": 0.95, "learning_rate": 2.772205728957715e-05, "loss": 0.0679, "theoretical_loss": 3.3321613375046604, "tokens_seen": 3209428992 }, { "epoch": 0.95, "learning_rate": 2.7681938538072693e-05, "loss": 0.0678, "theoretical_loss": 3.3321511056641504, "tokens_seen": 3209560064 }, { "epoch": 0.95, "objective/train/advantage_avg": 0.0007333943503908813, "objective/train/docs_used": 1166212, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.380961537361145, "objective/train/original_loss": 1.3809616565704346, "objective/train/theoretical_loss": 3.3321408743584713, "objective/train/tokens_used": 1580215776, "objective/train/value_avg": -0.00650787353515625, "objective/train/value_loss": 0.0001095169282052666, "objective/train/value_max": -2.2113323211669922e-05, "objective/train/value_min": -0.372802734375, "objective/train/value_reward_corr": 0.7090534949681234, "objective/train/value_std": 0.012420654296875, "objective/train/weight_avg": 1.0007842779159546, "objective/train/weighted_lm_loss": 1.3823575973510742, "objective/train/weights_max": 1.351715087890625, "objective/train/weights_min": 0.3728155791759491, "theoretical_loss": 3.3321408743584713, "tokens_seen": 3209691136 }, { "epoch": 0.95, "learning_rate": 2.7641819786568245e-05, "loss": 0.0622, "theoretical_loss": 3.3321408743584713, "tokens_seen": 3209691136 }, { "epoch": 0.95, "learning_rate": 2.760170103506379e-05, "loss": 0.0586, "theoretical_loss": 3.3321306435875737, "tokens_seen": 3209822208 }, { "epoch": 0.95, "learning_rate": 2.7561582283559337e-05, "loss": 0.0674, "theoretical_loss": 3.332120413351408, "tokens_seen": 3209953280 }, { "epoch": 0.95, "learning_rate": 2.7521463532054882e-05, "loss": 0.0637, "theoretical_loss": 3.3321101836499243, "tokens_seen": 3210084352 }, { "epoch": 0.95, "learning_rate": 2.7481344780550427e-05, "loss": 0.0643, "theoretical_loss": 3.3320999544830725, "tokens_seen": 3210215424 }, { "epoch": 0.95, "learning_rate": 2.7441226029045978e-05, "loss": 0.0602, "theoretical_loss": 3.3320897258508033, "tokens_seen": 3210346496 }, { "epoch": 0.95, "learning_rate": 2.7401107277541523e-05, "loss": 0.0615, "theoretical_loss": 3.3320794977530666, "tokens_seen": 3210477568 }, { "epoch": 0.95, "learning_rate": 2.736098852603707e-05, "loss": 0.059, "theoretical_loss": 3.332069270189813, "tokens_seen": 3210608640 }, { "epoch": 0.95, "learning_rate": 2.7320869774532615e-05, "loss": 0.0601, "theoretical_loss": 3.332059043160992, "tokens_seen": 3210739712 }, { "epoch": 0.95, "learning_rate": 2.7280751023028167e-05, "loss": 0.0645, "theoretical_loss": 3.3320488166665547, "tokens_seen": 3210870784 }, { "epoch": 0.95, "learning_rate": 2.724063227152371e-05, "loss": 0.0622, "theoretical_loss": 3.3320385907064507, "tokens_seen": 3211001856 }, { "epoch": 0.95, "learning_rate": 2.7200513520019256e-05, "loss": 0.0622, "theoretical_loss": 3.332028365280631, "tokens_seen": 3211132928 }, { "epoch": 0.95, "learning_rate": 2.7160394768514804e-05, "loss": 0.0631, "theoretical_loss": 3.3320181403890454, "tokens_seen": 3211264000 }, { "epoch": 0.95, "learning_rate": 2.7120276017010352e-05, "loss": 0.0654, "theoretical_loss": 3.3320079160316443, "tokens_seen": 3211395072 }, { "epoch": 0.95, "learning_rate": 2.70801572655059e-05, "loss": 0.0662, "theoretical_loss": 3.3319976922083776, "tokens_seen": 3211526144 }, { "epoch": 0.95, "learning_rate": 2.7040038514001445e-05, "loss": 0.0644, "theoretical_loss": 3.331987468919196, "tokens_seen": 3211657216 }, { "epoch": 0.95, "learning_rate": 2.699991976249699e-05, "loss": 0.064, "theoretical_loss": 3.33197724616405, "tokens_seen": 3211788288 }, { "epoch": 0.95, "learning_rate": 2.6959801010992538e-05, "loss": 0.0613, "theoretical_loss": 3.33196702394289, "tokens_seen": 3211919360 }, { "epoch": 0.95, "learning_rate": 2.6919682259488086e-05, "loss": 0.0614, "theoretical_loss": 3.3319568022556654, "tokens_seen": 3212050432 }, { "epoch": 0.95, "learning_rate": 2.6879563507983634e-05, "loss": 0.0622, "theoretical_loss": 3.3319465811023274, "tokens_seen": 3212181504 }, { "epoch": 0.95, "learning_rate": 2.683944475647918e-05, "loss": 0.0667, "theoretical_loss": 3.331936360482826, "tokens_seen": 3212312576 }, { "epoch": 0.95, "learning_rate": 2.6799326004974723e-05, "loss": 0.0663, "theoretical_loss": 3.3319261403971114, "tokens_seen": 3212443648 }, { "epoch": 0.95, "learning_rate": 2.6759207253470275e-05, "loss": 0.0646, "theoretical_loss": 3.3319159208451343, "tokens_seen": 3212574720 }, { "epoch": 0.95, "learning_rate": 2.671908850196582e-05, "loss": 0.0677, "theoretical_loss": 3.331905701826845, "tokens_seen": 3212705792 }, { "epoch": 0.95, "learning_rate": 2.6678969750461367e-05, "loss": 0.067, "theoretical_loss": 3.3318954833421937, "tokens_seen": 3212836864 }, { "epoch": 0.95, "objective/train/advantage_avg": -0.0006552878767251968, "objective/train/docs_used": 1167269, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3325235843658447, "objective/train/original_loss": 1.3325235843658447, "objective/train/theoretical_loss": 3.3318852653911306, "objective/train/tokens_used": 1583492576, "objective/train/value_avg": -0.00823211669921875, "objective/train/value_loss": 0.0001474874879932031, "objective/train/value_max": -7.31348991394043e-05, "objective/train/value_min": -0.2437744140625, "objective/train/value_reward_corr": 0.8977101471270035, "objective/train/value_std": 0.0222930908203125, "objective/train/weight_avg": 0.9994164109230042, "objective/train/weighted_lm_loss": 1.3316842317581177, "objective/train/weights_max": 1.1213833093643188, "objective/train/weights_min": 0.6300957202911377, "theoretical_loss": 3.3318852653911306, "tokens_seen": 3212967936 }, { "epoch": 0.95, "learning_rate": 2.6638850998956912e-05, "loss": 0.064, "theoretical_loss": 3.3318852653911306, "tokens_seen": 3212967936 }, { "epoch": 0.95, "learning_rate": 2.6598732247452457e-05, "loss": 0.0649, "theoretical_loss": 3.3318750479736066, "tokens_seen": 3213099008 }, { "epoch": 0.95, "learning_rate": 2.6558613495948008e-05, "loss": 0.0634, "theoretical_loss": 3.331864831089571, "tokens_seen": 3213230080 }, { "epoch": 0.95, "learning_rate": 2.6518494744443553e-05, "loss": 0.0619, "theoretical_loss": 3.3318546147389756, "tokens_seen": 3213361152 }, { "epoch": 0.95, "learning_rate": 2.64783759929391e-05, "loss": 0.0672, "theoretical_loss": 3.33184439892177, "tokens_seen": 3213492224 }, { "epoch": 0.95, "learning_rate": 2.6438257241434646e-05, "loss": 0.0612, "theoretical_loss": 3.3318341836379046, "tokens_seen": 3213623296 }, { "epoch": 0.95, "learning_rate": 2.6398138489930194e-05, "loss": 0.0635, "theoretical_loss": 3.3318239688873303, "tokens_seen": 3213754368 }, { "epoch": 0.95, "learning_rate": 2.6358019738425742e-05, "loss": 0.0663, "theoretical_loss": 3.3318137546699966, "tokens_seen": 3213885440 }, { "epoch": 0.95, "learning_rate": 2.6317900986921286e-05, "loss": 0.0634, "theoretical_loss": 3.3318035409858546, "tokens_seen": 3214016512 }, { "epoch": 0.95, "learning_rate": 2.6277782235416834e-05, "loss": 0.0627, "theoretical_loss": 3.3317933278348546, "tokens_seen": 3214147584 }, { "epoch": 0.95, "learning_rate": 2.623766348391238e-05, "loss": 0.0616, "theoretical_loss": 3.3317831152169473, "tokens_seen": 3214278656 }, { "epoch": 0.95, "learning_rate": 2.619754473240793e-05, "loss": 0.067, "theoretical_loss": 3.331772903132083, "tokens_seen": 3214409728 }, { "epoch": 0.95, "learning_rate": 2.6157425980903475e-05, "loss": 0.0657, "theoretical_loss": 3.331762691580211, "tokens_seen": 3214540800 }, { "epoch": 0.95, "learning_rate": 2.611730722939902e-05, "loss": 0.0623, "theoretical_loss": 3.331752480561284, "tokens_seen": 3214671872 }, { "epoch": 0.95, "learning_rate": 2.6077188477894568e-05, "loss": 0.0629, "theoretical_loss": 3.3317422700752504, "tokens_seen": 3214802944 }, { "epoch": 0.95, "learning_rate": 2.6037069726390116e-05, "loss": 0.068, "theoretical_loss": 3.3317320601220617, "tokens_seen": 3214934016 }, { "epoch": 0.95, "learning_rate": 2.5996950974885664e-05, "loss": 0.063, "theoretical_loss": 3.331721850701668, "tokens_seen": 3215065088 }, { "epoch": 0.95, "learning_rate": 2.595683222338121e-05, "loss": 0.0658, "theoretical_loss": 3.33171164181402, "tokens_seen": 3215196160 }, { "epoch": 0.95, "learning_rate": 2.5916713471876753e-05, "loss": 0.0666, "theoretical_loss": 3.331701433459068, "tokens_seen": 3215327232 }, { "epoch": 0.95, "learning_rate": 2.5876594720372305e-05, "loss": 0.0638, "theoretical_loss": 3.331691225636763, "tokens_seen": 3215458304 }, { "epoch": 0.95, "learning_rate": 2.583647596886785e-05, "loss": 0.0643, "theoretical_loss": 3.3316810183470547, "tokens_seen": 3215589376 }, { "epoch": 0.95, "learning_rate": 2.5796357217363398e-05, "loss": 0.0684, "theoretical_loss": 3.331670811589894, "tokens_seen": 3215720448 }, { "epoch": 0.95, "learning_rate": 2.5756238465858942e-05, "loss": 0.066, "theoretical_loss": 3.331660605365231, "tokens_seen": 3215851520 }, { "epoch": 0.95, "learning_rate": 2.5716119714354487e-05, "loss": 0.0652, "theoretical_loss": 3.3316503996730176, "tokens_seen": 3215982592 }, { "epoch": 0.95, "learning_rate": 2.567600096285004e-05, "loss": 0.0645, "theoretical_loss": 3.3316401945132026, "tokens_seen": 3216113664 }, { "epoch": 0.95, "objective/train/advantage_avg": 0.0006316755316220224, "objective/train/docs_used": 1168413, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2802395820617676, "objective/train/original_loss": 1.2802395820617676, "objective/train/theoretical_loss": 3.3316299898857373, "objective/train/tokens_used": 1586769376, "objective/train/value_avg": -0.005786895751953125, "objective/train/value_loss": 0.00019552743469830602, "objective/train/value_max": -2.6464462280273438e-05, "objective/train/value_min": -0.6474609375, "objective/train/value_reward_corr": 0.7402827369937579, "objective/train/value_std": 0.0144195556640625, "objective/train/weight_avg": 1.0007187128067017, "objective/train/weighted_lm_loss": 1.2810481786727905, "objective/train/weights_max": 1.3409960269927979, "objective/train/weights_min": 0.39153391122817993, "theoretical_loss": 3.3316299898857373, "tokens_seen": 3216244736 }, { "epoch": 0.95, "learning_rate": 2.5635882211345583e-05, "loss": 0.0602, "theoretical_loss": 3.3316299898857373, "tokens_seen": 3216244736 }, { "epoch": 0.95, "learning_rate": 2.559576345984113e-05, "loss": 0.0634, "theoretical_loss": 3.3316197857905725, "tokens_seen": 3216375808 }, { "epoch": 0.95, "learning_rate": 2.5555644708336676e-05, "loss": 0.0625, "theoretical_loss": 3.3316095822276584, "tokens_seen": 3216506880 }, { "epoch": 0.95, "learning_rate": 2.5515525956832224e-05, "loss": 0.0676, "theoretical_loss": 3.3315993791969456, "tokens_seen": 3216637952 }, { "epoch": 0.95, "learning_rate": 2.5475407205327772e-05, "loss": 0.0631, "theoretical_loss": 3.3315891766983845, "tokens_seen": 3216769024 }, { "epoch": 0.95, "learning_rate": 2.5435288453823317e-05, "loss": 0.0642, "theoretical_loss": 3.3315789747319258, "tokens_seen": 3216900096 }, { "epoch": 0.95, "learning_rate": 2.5395169702318865e-05, "loss": 0.065, "theoretical_loss": 3.3315687732975205, "tokens_seen": 3217031168 }, { "epoch": 0.95, "learning_rate": 2.535505095081441e-05, "loss": 0.0644, "theoretical_loss": 3.331558572395118, "tokens_seen": 3217162240 }, { "epoch": 0.95, "learning_rate": 2.5314932199309957e-05, "loss": 0.0666, "theoretical_loss": 3.3315483720246704, "tokens_seen": 3217293312 }, { "epoch": 0.95, "learning_rate": 2.5274813447805505e-05, "loss": 0.0614, "theoretical_loss": 3.3315381721861277, "tokens_seen": 3217424384 }, { "epoch": 0.95, "learning_rate": 2.523469469630105e-05, "loss": 0.0636, "theoretical_loss": 3.3315279728794396, "tokens_seen": 3217555456 }, { "epoch": 0.95, "learning_rate": 2.5194575944796598e-05, "loss": 0.0629, "theoretical_loss": 3.3315177741045576, "tokens_seen": 3217686528 }, { "epoch": 0.95, "learning_rate": 2.5154457193292146e-05, "loss": 0.0657, "theoretical_loss": 3.3315075758614325, "tokens_seen": 3217817600 }, { "epoch": 0.95, "learning_rate": 2.5114338441787694e-05, "loss": 0.0652, "theoretical_loss": 3.331497378150014, "tokens_seen": 3217948672 }, { "epoch": 0.95, "learning_rate": 2.507421969028324e-05, "loss": 0.0642, "theoretical_loss": 3.331487180970254, "tokens_seen": 3218079744 }, { "epoch": 0.95, "learning_rate": 2.5034100938778784e-05, "loss": 0.0646, "theoretical_loss": 3.331476984322102, "tokens_seen": 3218210816 }, { "epoch": 0.95, "learning_rate": 2.4993982187274332e-05, "loss": 0.063, "theoretical_loss": 3.331466788205509, "tokens_seen": 3218341888 }, { "epoch": 0.95, "learning_rate": 2.495386343576988e-05, "loss": 0.0674, "theoretical_loss": 3.331456592620426, "tokens_seen": 3218472960 }, { "epoch": 0.95, "learning_rate": 2.4913744684265428e-05, "loss": 0.063, "theoretical_loss": 3.331446397566803, "tokens_seen": 3218604032 }, { "epoch": 0.95, "learning_rate": 2.4873625932760973e-05, "loss": 0.0623, "theoretical_loss": 3.3314362030445914, "tokens_seen": 3218735104 }, { "epoch": 0.95, "learning_rate": 2.4833507181256517e-05, "loss": 0.0646, "theoretical_loss": 3.3314260090537413, "tokens_seen": 3218866176 }, { "epoch": 0.95, "learning_rate": 2.479338842975207e-05, "loss": 0.0635, "theoretical_loss": 3.3314158155942035, "tokens_seen": 3218997248 }, { "epoch": 0.95, "learning_rate": 2.4753269678247613e-05, "loss": 0.0595, "theoretical_loss": 3.3314056226659283, "tokens_seen": 3219128320 }, { "epoch": 0.95, "learning_rate": 2.471315092674316e-05, "loss": 0.0609, "theoretical_loss": 3.3313954302688673, "tokens_seen": 3219259392 }, { "epoch": 0.95, "learning_rate": 2.4673032175238706e-05, "loss": 0.0621, "theoretical_loss": 3.3313852384029707, "tokens_seen": 3219390464 }, { "epoch": 0.95, "objective/train/advantage_avg": -1.3388525985646993e-05, "objective/train/docs_used": 1169560, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.295120120048523, "objective/train/original_loss": 1.2951202392578125, "objective/train/theoretical_loss": 3.331375047068189, "objective/train/tokens_used": 1590046176, "objective/train/value_avg": -0.01158905029296875, "objective/train/value_loss": 0.0002896028745453805, "objective/train/value_max": -2.7120113372802734e-05, "objective/train/value_min": -0.34619140625, "objective/train/value_reward_corr": 0.8636946369004332, "objective/train/value_std": 0.026824951171875, "objective/train/weight_avg": 1.0001267194747925, "objective/train/weighted_lm_loss": 1.2943552732467651, "objective/train/weights_max": 1.162520408630371, "objective/train/weights_min": 0.6686959266662598, "theoretical_loss": 3.331375047068189, "tokens_seen": 3219521536 }, { "epoch": 0.95, "learning_rate": 2.4632913423734254e-05, "loss": 0.0644, "theoretical_loss": 3.331375047068189, "tokens_seen": 3219521536 }, { "epoch": 0.95, "learning_rate": 2.4592794672229802e-05, "loss": 0.0631, "theoretical_loss": 3.331364856264473, "tokens_seen": 3219652608 }, { "epoch": 0.95, "learning_rate": 2.4552675920725347e-05, "loss": 0.064, "theoretical_loss": 3.3313546659917734, "tokens_seen": 3219783680 }, { "epoch": 0.95, "learning_rate": 2.4512557169220895e-05, "loss": 0.0623, "theoretical_loss": 3.3313444762500413, "tokens_seen": 3219914752 }, { "epoch": 0.95, "learning_rate": 2.447243841771644e-05, "loss": 0.0622, "theoretical_loss": 3.331334287039227, "tokens_seen": 3220045824 }, { "epoch": 0.95, "learning_rate": 2.4432319666211988e-05, "loss": 0.0615, "theoretical_loss": 3.331324098359281, "tokens_seen": 3220176896 }, { "epoch": 0.95, "learning_rate": 2.4392200914707536e-05, "loss": 0.0651, "theoretical_loss": 3.331313910210155, "tokens_seen": 3220307968 }, { "epoch": 0.95, "learning_rate": 2.435208216320308e-05, "loss": 0.0612, "theoretical_loss": 3.331303722591799, "tokens_seen": 3220439040 }, { "epoch": 0.95, "learning_rate": 2.431196341169863e-05, "loss": 0.0601, "theoretical_loss": 3.3312935355041637, "tokens_seen": 3220570112 }, { "epoch": 0.95, "learning_rate": 2.4271844660194176e-05, "loss": 0.0626, "theoretical_loss": 3.3312833489472, "tokens_seen": 3220701184 }, { "epoch": 0.95, "learning_rate": 2.4231725908689725e-05, "loss": 0.0622, "theoretical_loss": 3.3312731629208585, "tokens_seen": 3220832256 }, { "epoch": 0.95, "learning_rate": 2.419160715718527e-05, "loss": 0.0636, "theoretical_loss": 3.331262977425091, "tokens_seen": 3220963328 }, { "epoch": 0.95, "learning_rate": 2.4151488405680814e-05, "loss": 0.063, "theoretical_loss": 3.3312527924598463, "tokens_seen": 3221094400 }, { "epoch": 0.95, "learning_rate": 2.4111369654176362e-05, "loss": 0.0639, "theoretical_loss": 3.331242608025077, "tokens_seen": 3221225472 }, { "epoch": 0.95, "learning_rate": 2.407125090267191e-05, "loss": 0.0623, "theoretical_loss": 3.331232424120733, "tokens_seen": 3221356544 }, { "epoch": 0.95, "learning_rate": 2.4031132151167458e-05, "loss": 0.0621, "theoretical_loss": 3.331222240746765, "tokens_seen": 3221487616 }, { "epoch": 0.95, "learning_rate": 2.3991013399663003e-05, "loss": 0.061, "theoretical_loss": 3.331212057903125, "tokens_seen": 3221618688 }, { "epoch": 0.95, "learning_rate": 2.3950894648158547e-05, "loss": 0.0626, "theoretical_loss": 3.331201875589762, "tokens_seen": 3221749760 }, { "epoch": 0.95, "learning_rate": 2.39107758966541e-05, "loss": 0.0603, "theoretical_loss": 3.331191693806628, "tokens_seen": 3221880832 }, { "epoch": 0.95, "learning_rate": 2.3870657145149643e-05, "loss": 0.0656, "theoretical_loss": 3.3311815125536737, "tokens_seen": 3222011904 }, { "epoch": 0.95, "learning_rate": 2.383053839364519e-05, "loss": 0.0651, "theoretical_loss": 3.3311713318308493, "tokens_seen": 3222142976 }, { "epoch": 0.95, "learning_rate": 2.3790419642140736e-05, "loss": 0.0643, "theoretical_loss": 3.3311611516381063, "tokens_seen": 3222274048 }, { "epoch": 0.95, "learning_rate": 2.375030089063628e-05, "loss": 0.064, "theoretical_loss": 3.331150971975396, "tokens_seen": 3222405120 }, { "epoch": 0.95, "learning_rate": 2.3710182139131832e-05, "loss": 0.061, "theoretical_loss": 3.3311407928426675, "tokens_seen": 3222536192 }, { "epoch": 0.95, "learning_rate": 2.3670063387627377e-05, "loss": 0.0601, "theoretical_loss": 3.3311306142398736, "tokens_seen": 3222667264 }, { "epoch": 0.95, "objective/train/advantage_avg": 0.0004191944608464837, "objective/train/docs_used": 1170779, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.4614615440368652, "objective/train/original_loss": 1.4614615440368652, "objective/train/theoretical_loss": 3.3311204361669637, "objective/train/tokens_used": 1593322976, "objective/train/value_avg": -0.0067138671875, "objective/train/value_loss": 0.0001311889209318906, "objective/train/value_max": -2.2113323211669922e-05, "objective/train/value_min": -0.208984375, "objective/train/value_reward_corr": 0.6902528621792718, "objective/train/value_std": 0.01169586181640625, "objective/train/weight_avg": 1.000481367111206, "objective/train/weighted_lm_loss": 1.4613326787948608, "objective/train/weights_max": 1.2133187055587769, "objective/train/weights_min": 0.4179830253124237, "theoretical_loss": 3.3311204361669637, "tokens_seen": 3222798336 }, { "epoch": 0.95, "learning_rate": 2.3629944636122925e-05, "loss": 0.067, "theoretical_loss": 3.3311204361669637, "tokens_seen": 3222798336 }, { "epoch": 0.95, "learning_rate": 2.358982588461847e-05, "loss": 0.0629, "theoretical_loss": 3.3311102586238897, "tokens_seen": 3222929408 }, { "epoch": 0.95, "learning_rate": 2.3549707133114018e-05, "loss": 0.0657, "theoretical_loss": 3.331100081610602, "tokens_seen": 3223060480 }, { "epoch": 0.95, "learning_rate": 2.3509588381609566e-05, "loss": 0.0614, "theoretical_loss": 3.331089905127051, "tokens_seen": 3223191552 }, { "epoch": 0.95, "learning_rate": 2.346946963010511e-05, "loss": 0.063, "theoretical_loss": 3.3310797291731884, "tokens_seen": 3223322624 }, { "epoch": 0.95, "learning_rate": 2.342935087860066e-05, "loss": 0.062, "theoretical_loss": 3.331069553748965, "tokens_seen": 3223453696 }, { "epoch": 0.95, "learning_rate": 2.3389232127096207e-05, "loss": 0.0642, "theoretical_loss": 3.3310593788543312, "tokens_seen": 3223584768 }, { "epoch": 0.95, "learning_rate": 2.334911337559175e-05, "loss": 0.0661, "theoretical_loss": 3.331049204489238, "tokens_seen": 3223715840 }, { "epoch": 0.95, "learning_rate": 2.33089946240873e-05, "loss": 0.06, "theoretical_loss": 3.331039030653637, "tokens_seen": 3223846912 }, { "epoch": 0.95, "learning_rate": 2.3268875872582844e-05, "loss": 0.0623, "theoretical_loss": 3.3310288573474787, "tokens_seen": 3223977984 }, { "epoch": 0.95, "learning_rate": 2.3228757121078392e-05, "loss": 0.0606, "theoretical_loss": 3.3310186845707137, "tokens_seen": 3224109056 }, { "epoch": 0.95, "learning_rate": 2.318863836957394e-05, "loss": 0.0691, "theoretical_loss": 3.331008512323293, "tokens_seen": 3224240128 }, { "epoch": 0.95, "learning_rate": 2.3148519618069488e-05, "loss": 0.0605, "theoretical_loss": 3.330998340605168, "tokens_seen": 3224371200 }, { "epoch": 0.95, "learning_rate": 2.3108400866565033e-05, "loss": 0.0646, "theoretical_loss": 3.330988169416289, "tokens_seen": 3224502272 }, { "epoch": 0.95, "learning_rate": 2.3068282115060578e-05, "loss": 0.0645, "theoretical_loss": 3.3309779987566075, "tokens_seen": 3224633344 }, { "epoch": 0.95, "learning_rate": 2.302816336355613e-05, "loss": 0.0638, "theoretical_loss": 3.3309678286260747, "tokens_seen": 3224764416 }, { "epoch": 0.95, "learning_rate": 2.2988044612051674e-05, "loss": 0.0667, "theoretical_loss": 3.330957659024641, "tokens_seen": 3224895488 }, { "epoch": 0.95, "learning_rate": 2.2947925860547222e-05, "loss": 0.0648, "theoretical_loss": 3.3309474899522566, "tokens_seen": 3225026560 }, { "epoch": 0.95, "learning_rate": 2.2907807109042766e-05, "loss": 0.0626, "theoretical_loss": 3.330937321408874, "tokens_seen": 3225157632 }, { "epoch": 0.95, "learning_rate": 2.286768835753831e-05, "loss": 0.0599, "theoretical_loss": 3.330927153394444, "tokens_seen": 3225288704 }, { "epoch": 0.95, "learning_rate": 2.2827569606033863e-05, "loss": 0.0585, "theoretical_loss": 3.3309169859089165, "tokens_seen": 3225419776 }, { "epoch": 0.95, "learning_rate": 2.2787450854529407e-05, "loss": 0.0639, "theoretical_loss": 3.3309068189522435, "tokens_seen": 3225550848 }, { "epoch": 0.95, "learning_rate": 2.2747332103024955e-05, "loss": 0.0668, "theoretical_loss": 3.3308966525243755, "tokens_seen": 3225681920 }, { "epoch": 0.96, "learning_rate": 2.27072133515205e-05, "loss": 0.0652, "theoretical_loss": 3.3308864866252637, "tokens_seen": 3225812992 }, { "epoch": 0.96, "learning_rate": 2.2667094600016048e-05, "loss": 0.0622, "theoretical_loss": 3.330876321254859, "tokens_seen": 3225944064 }, { "epoch": 0.96, "objective/train/advantage_avg": 5.419146873464342e-06, "objective/train/docs_used": 1172070, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2919609546661377, "objective/train/original_loss": 1.2919608354568481, "objective/train/theoretical_loss": 3.330866156413113, "objective/train/tokens_used": 1596599776, "objective/train/value_avg": -0.007965087890625, "objective/train/value_loss": 0.00030827888986095786, "objective/train/value_max": -3.0934810638427734e-05, "objective/train/value_min": -0.77197265625, "objective/train/value_reward_corr": 0.7185187212608153, "objective/train/value_std": 0.018707275390625, "objective/train/weight_avg": 1.0001412630081177, "objective/train/weighted_lm_loss": 1.2925221920013428, "objective/train/weights_max": 1.6894240379333496, "objective/train/weights_min": 0.37555330991744995, "theoretical_loss": 3.330866156413113, "tokens_seen": 3226075136 }, { "epoch": 0.96, "learning_rate": 2.2626975848511596e-05, "loss": 0.0594, "theoretical_loss": 3.330866156413113, "tokens_seen": 3226075136 }, { "epoch": 0.96, "learning_rate": 2.258685709700714e-05, "loss": 0.0657, "theoretical_loss": 3.3308559920999756, "tokens_seen": 3226206208 }, { "epoch": 0.96, "learning_rate": 2.254673834550269e-05, "loss": 0.0609, "theoretical_loss": 3.3308458283153985, "tokens_seen": 3226337280 }, { "epoch": 0.96, "learning_rate": 2.2506619593998233e-05, "loss": 0.0668, "theoretical_loss": 3.3308356650593334, "tokens_seen": 3226468352 }, { "epoch": 0.96, "learning_rate": 2.246650084249378e-05, "loss": 0.0653, "theoretical_loss": 3.33082550233173, "tokens_seen": 3226599424 }, { "epoch": 0.96, "learning_rate": 2.242638209098933e-05, "loss": 0.0598, "theoretical_loss": 3.33081534013254, "tokens_seen": 3226730496 }, { "epoch": 0.96, "learning_rate": 2.2386263339484874e-05, "loss": 0.0589, "theoretical_loss": 3.330805178461715, "tokens_seen": 3226861568 }, { "epoch": 0.96, "learning_rate": 2.2346144587980422e-05, "loss": 0.0644, "theoretical_loss": 3.3307950173192054, "tokens_seen": 3226992640 }, { "epoch": 0.96, "learning_rate": 2.230602583647597e-05, "loss": 0.0657, "theoretical_loss": 3.330784856704962, "tokens_seen": 3227123712 }, { "epoch": 0.96, "learning_rate": 2.226590708497152e-05, "loss": 0.0638, "theoretical_loss": 3.3307746966189367, "tokens_seen": 3227254784 }, { "epoch": 0.96, "learning_rate": 2.2225788333467063e-05, "loss": 0.0652, "theoretical_loss": 3.3307645370610803, "tokens_seen": 3227385856 }, { "epoch": 0.96, "learning_rate": 2.2185669581962608e-05, "loss": 0.0619, "theoretical_loss": 3.3307543780313433, "tokens_seen": 3227516928 }, { "epoch": 0.96, "learning_rate": 2.214555083045816e-05, "loss": 0.063, "theoretical_loss": 3.3307442195296777, "tokens_seen": 3227648000 }, { "epoch": 0.96, "learning_rate": 2.2105432078953704e-05, "loss": 0.0687, "theoretical_loss": 3.330734061556034, "tokens_seen": 3227779072 }, { "epoch": 0.96, "learning_rate": 2.2065313327449252e-05, "loss": 0.0633, "theoretical_loss": 3.330723904110364, "tokens_seen": 3227910144 }, { "epoch": 0.96, "learning_rate": 2.2025194575944797e-05, "loss": 0.0645, "theoretical_loss": 3.3307137471926174, "tokens_seen": 3228041216 }, { "epoch": 0.96, "learning_rate": 2.198507582444034e-05, "loss": 0.0645, "theoretical_loss": 3.330703590802747, "tokens_seen": 3228172288 }, { "epoch": 0.96, "learning_rate": 2.1944957072935893e-05, "loss": 0.0655, "theoretical_loss": 3.330693434940703, "tokens_seen": 3228303360 }, { "epoch": 0.96, "learning_rate": 2.1904838321431437e-05, "loss": 0.0613, "theoretical_loss": 3.3306832796064367, "tokens_seen": 3228434432 }, { "epoch": 0.96, "learning_rate": 2.1864719569926985e-05, "loss": 0.0628, "theoretical_loss": 3.3306731247998993, "tokens_seen": 3228565504 }, { "epoch": 0.96, "learning_rate": 2.182460081842253e-05, "loss": 0.0648, "theoretical_loss": 3.330662970521042, "tokens_seen": 3228696576 }, { "epoch": 0.96, "learning_rate": 2.1784482066918078e-05, "loss": 0.065, "theoretical_loss": 3.330652816769816, "tokens_seen": 3228827648 }, { "epoch": 0.96, "learning_rate": 2.1744363315413626e-05, "loss": 0.0683, "theoretical_loss": 3.330642663546172, "tokens_seen": 3228958720 }, { "epoch": 0.96, "learning_rate": 2.170424456390917e-05, "loss": 0.0629, "theoretical_loss": 3.330632510850062, "tokens_seen": 3229089792 }, { "epoch": 0.96, "learning_rate": 2.166412581240472e-05, "loss": 0.0609, "theoretical_loss": 3.330622358681436, "tokens_seen": 3229220864 }, { "epoch": 0.96, "objective/train/advantage_avg": 0.0004282891459297389, "objective/train/docs_used": 1173331, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3055778741836548, "objective/train/original_loss": 1.3055778741836548, "objective/train/theoretical_loss": 3.3306122070402466, "objective/train/tokens_used": 1599876576, "objective/train/value_avg": -0.00661468505859375, "objective/train/value_loss": 0.0002891377662308514, "objective/train/value_max": -2.0623207092285156e-05, "objective/train/value_min": -0.9931640625, "objective/train/value_reward_corr": 0.7078800631842583, "objective/train/value_std": 0.015869140625, "objective/train/weight_avg": 1.0005474090576172, "objective/train/weighted_lm_loss": 1.3054112195968628, "objective/train/weights_max": 1.26400625705719, "objective/train/weights_min": 0.23887288570404053, "theoretical_loss": 3.3306122070402466, "tokens_seen": 3229351936 }, { "epoch": 0.96, "learning_rate": 2.1624007060900264e-05, "loss": 0.0609, "theoretical_loss": 3.3306122070402466, "tokens_seen": 3229351936 }, { "epoch": 0.96, "learning_rate": 2.1583888309395812e-05, "loss": 0.0654, "theoretical_loss": 3.3306020559264438, "tokens_seen": 3229483008 }, { "epoch": 0.96, "learning_rate": 2.154376955789136e-05, "loss": 0.0674, "theoretical_loss": 3.33059190533998, "tokens_seen": 3229614080 }, { "epoch": 0.96, "learning_rate": 2.1503650806386904e-05, "loss": 0.0663, "theoretical_loss": 3.330581755280805, "tokens_seen": 3229745152 }, { "epoch": 0.96, "learning_rate": 2.1463532054882452e-05, "loss": 0.0635, "theoretical_loss": 3.3305716057488706, "tokens_seen": 3229876224 }, { "epoch": 0.96, "learning_rate": 2.1423413303378e-05, "loss": 0.0649, "theoretical_loss": 3.3305614567441286, "tokens_seen": 3230007296 }, { "epoch": 0.96, "learning_rate": 2.1383294551873545e-05, "loss": 0.0637, "theoretical_loss": 3.33055130826653, "tokens_seen": 3230138368 }, { "epoch": 0.96, "learning_rate": 2.1343175800369093e-05, "loss": 0.0628, "theoretical_loss": 3.330541160316025, "tokens_seen": 3230269440 }, { "epoch": 0.96, "learning_rate": 2.1303057048864638e-05, "loss": 0.0645, "theoretical_loss": 3.3305310128925663, "tokens_seen": 3230400512 }, { "epoch": 0.96, "learning_rate": 2.1262938297360186e-05, "loss": 0.0642, "theoretical_loss": 3.330520865996104, "tokens_seen": 3230531584 }, { "epoch": 0.96, "learning_rate": 2.1222819545855734e-05, "loss": 0.0606, "theoretical_loss": 3.33051071962659, "tokens_seen": 3230662656 }, { "epoch": 0.96, "learning_rate": 2.1182700794351282e-05, "loss": 0.0632, "theoretical_loss": 3.3305005737839752, "tokens_seen": 3230793728 }, { "epoch": 0.96, "learning_rate": 2.1142582042846827e-05, "loss": 0.0612, "theoretical_loss": 3.3304904284682113, "tokens_seen": 3230924800 }, { "epoch": 0.96, "learning_rate": 2.110246329134237e-05, "loss": 0.0632, "theoretical_loss": 3.3304802836792495, "tokens_seen": 3231055872 }, { "epoch": 0.96, "learning_rate": 2.1062344539837923e-05, "loss": 0.0599, "theoretical_loss": 3.3304701394170406, "tokens_seen": 3231186944 }, { "epoch": 0.96, "learning_rate": 2.1022225788333468e-05, "loss": 0.0638, "theoretical_loss": 3.3304599956815357, "tokens_seen": 3231318016 }, { "epoch": 0.96, "learning_rate": 2.0982107036829016e-05, "loss": 0.0644, "theoretical_loss": 3.3304498524726873, "tokens_seen": 3231449088 }, { "epoch": 0.96, "learning_rate": 2.094198828532456e-05, "loss": 0.0682, "theoretical_loss": 3.3304397097904457, "tokens_seen": 3231580160 }, { "epoch": 0.96, "learning_rate": 2.0901869533820105e-05, "loss": 0.0679, "theoretical_loss": 3.3304295676347624, "tokens_seen": 3231711232 }, { "epoch": 0.96, "learning_rate": 2.0861750782315656e-05, "loss": 0.0642, "theoretical_loss": 3.330419426005589, "tokens_seen": 3231842304 }, { "epoch": 0.96, "learning_rate": 2.08216320308112e-05, "loss": 0.0648, "theoretical_loss": 3.3304092849028764, "tokens_seen": 3231973376 }, { "epoch": 0.96, "learning_rate": 2.078151327930675e-05, "loss": 0.066, "theoretical_loss": 3.330399144326576, "tokens_seen": 3232104448 }, { "epoch": 0.96, "learning_rate": 2.0741394527802294e-05, "loss": 0.0646, "theoretical_loss": 3.3303890042766393, "tokens_seen": 3232235520 }, { "epoch": 0.96, "learning_rate": 2.0701275776297842e-05, "loss": 0.0641, "theoretical_loss": 3.3303788647530173, "tokens_seen": 3232366592 }, { "epoch": 0.96, "learning_rate": 2.066115702479339e-05, "loss": 0.0654, "theoretical_loss": 3.3303687257556622, "tokens_seen": 3232497664 }, { "epoch": 0.96, "objective/train/advantage_avg": -0.0003204024978913367, "objective/train/docs_used": 1174536, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2978699207305908, "objective/train/original_loss": 1.2978699207305908, "objective/train/theoretical_loss": 3.3303585872845245, "objective/train/tokens_used": 1603153376, "objective/train/value_avg": -0.00629425048828125, "objective/train/value_loss": 0.000182927557034418, "objective/train/value_max": -2.5272369384765625e-05, "objective/train/value_min": -0.66796875, "objective/train/value_reward_corr": 0.7506127773096378, "objective/train/value_std": 0.01384735107421875, "objective/train/weight_avg": 0.9997624754905701, "objective/train/weighted_lm_loss": 1.2972239255905151, "objective/train/weights_max": 1.2334792613983154, "objective/train/weights_min": 0.37021923065185547, "theoretical_loss": 3.3303585872845245, "tokens_seen": 3232628736 }, { "epoch": 0.96, "learning_rate": 2.0621038273288935e-05, "loss": 0.0628, "theoretical_loss": 3.3303585872845245, "tokens_seen": 3232628736 }, { "epoch": 0.96, "learning_rate": 2.0580919521784483e-05, "loss": 0.0676, "theoretical_loss": 3.3303484493395556, "tokens_seen": 3232759808 }, { "epoch": 0.96, "learning_rate": 2.054080077028003e-05, "loss": 0.0633, "theoretical_loss": 3.3303383119207073, "tokens_seen": 3232890880 }, { "epoch": 0.96, "learning_rate": 2.0500682018775575e-05, "loss": 0.0649, "theoretical_loss": 3.3303281750279305, "tokens_seen": 3233021952 }, { "epoch": 0.96, "learning_rate": 2.0460563267271123e-05, "loss": 0.065, "theoretical_loss": 3.330318038661177, "tokens_seen": 3233153024 }, { "epoch": 0.96, "learning_rate": 2.0420444515766668e-05, "loss": 0.0623, "theoretical_loss": 3.3303079028203983, "tokens_seen": 3233284096 }, { "epoch": 0.96, "learning_rate": 2.0380325764262216e-05, "loss": 0.0626, "theoretical_loss": 3.330297767505545, "tokens_seen": 3233415168 }, { "epoch": 0.96, "learning_rate": 2.0340207012757764e-05, "loss": 0.0632, "theoretical_loss": 3.330287632716569, "tokens_seen": 3233546240 }, { "epoch": 0.96, "learning_rate": 2.0300088261253312e-05, "loss": 0.0631, "theoretical_loss": 3.3302774984534222, "tokens_seen": 3233677312 }, { "epoch": 0.96, "learning_rate": 2.0259969509748857e-05, "loss": 0.0618, "theoretical_loss": 3.330267364716055, "tokens_seen": 3233808384 }, { "epoch": 0.96, "learning_rate": 2.02198507582444e-05, "loss": 0.0648, "theoretical_loss": 3.330257231504419, "tokens_seen": 3233939456 }, { "epoch": 0.96, "learning_rate": 2.0179732006739953e-05, "loss": 0.0669, "theoretical_loss": 3.3302470988184667, "tokens_seen": 3234070528 }, { "epoch": 0.96, "learning_rate": 2.0139613255235498e-05, "loss": 0.0644, "theoretical_loss": 3.3302369666581484, "tokens_seen": 3234201600 }, { "epoch": 0.96, "learning_rate": 2.0099494503731046e-05, "loss": 0.0621, "theoretical_loss": 3.3302268350234154, "tokens_seen": 3234332672 }, { "epoch": 0.96, "learning_rate": 2.005937575222659e-05, "loss": 0.0633, "theoretical_loss": 3.3302167039142203, "tokens_seen": 3234463744 }, { "epoch": 0.96, "learning_rate": 2.0019257000722135e-05, "loss": 0.059, "theoretical_loss": 3.3302065733305133, "tokens_seen": 3234594816 }, { "epoch": 0.96, "learning_rate": 1.9979138249217687e-05, "loss": 0.0632, "theoretical_loss": 3.330196443272247, "tokens_seen": 3234725888 }, { "epoch": 0.96, "learning_rate": 1.993901949771323e-05, "loss": 0.0614, "theoretical_loss": 3.3301863137393717, "tokens_seen": 3234856960 }, { "epoch": 0.96, "learning_rate": 1.989890074620878e-05, "loss": 0.0614, "theoretical_loss": 3.3301761847318394, "tokens_seen": 3234988032 }, { "epoch": 0.96, "learning_rate": 1.9858781994704324e-05, "loss": 0.0678, "theoretical_loss": 3.330166056249602, "tokens_seen": 3235119104 }, { "epoch": 0.96, "learning_rate": 1.9818663243199872e-05, "loss": 0.0605, "theoretical_loss": 3.3301559282926103, "tokens_seen": 3235250176 }, { "epoch": 0.96, "learning_rate": 1.977854449169542e-05, "loss": 0.0649, "theoretical_loss": 3.3301458008608162, "tokens_seen": 3235381248 }, { "epoch": 0.96, "learning_rate": 1.9738425740190965e-05, "loss": 0.0632, "theoretical_loss": 3.330135673954171, "tokens_seen": 3235512320 }, { "epoch": 0.96, "learning_rate": 1.9698306988686513e-05, "loss": 0.0626, "theoretical_loss": 3.3301255475726266, "tokens_seen": 3235643392 }, { "epoch": 0.96, "learning_rate": 1.9658188237182058e-05, "loss": 0.0634, "theoretical_loss": 3.3301154217161337, "tokens_seen": 3235774464 }, { "epoch": 0.96, "objective/train/advantage_avg": 0.00014976636157371104, "objective/train/docs_used": 1175644, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.328129529953003, "objective/train/original_loss": 1.328129529953003, "objective/train/theoretical_loss": 3.3301052963846445, "objective/train/tokens_used": 1606430176, "objective/train/value_avg": -0.00841522216796875, "objective/train/value_loss": 0.00016073629376478493, "objective/train/value_max": -1.9073486328125e-05, "objective/train/value_min": -0.336669921875, "objective/train/value_reward_corr": 0.8393677676565259, "objective/train/value_std": 0.0188751220703125, "objective/train/weight_avg": 1.000225305557251, "objective/train/weighted_lm_loss": 1.32878839969635, "objective/train/weights_max": 1.2562730312347412, "objective/train/weights_min": 0.3745575249195099, "theoretical_loss": 3.3301052963846445, "tokens_seen": 3235905536 }, { "epoch": 0.96, "learning_rate": 1.9618069485677606e-05, "loss": 0.0637, "theoretical_loss": 3.3301052963846445, "tokens_seen": 3235905536 }, { "epoch": 0.96, "learning_rate": 1.9577950734173154e-05, "loss": 0.0672, "theoretical_loss": 3.33009517157811, "tokens_seen": 3236036608 }, { "epoch": 0.96, "learning_rate": 1.95378319826687e-05, "loss": 0.0657, "theoretical_loss": 3.3300850472964822, "tokens_seen": 3236167680 }, { "epoch": 0.96, "learning_rate": 1.9497713231164246e-05, "loss": 0.0614, "theoretical_loss": 3.330074923539712, "tokens_seen": 3236298752 }, { "epoch": 0.96, "learning_rate": 1.9457594479659794e-05, "loss": 0.0631, "theoretical_loss": 3.3300648003077518, "tokens_seen": 3236429824 }, { "epoch": 0.96, "learning_rate": 1.941747572815534e-05, "loss": 0.0593, "theoretical_loss": 3.3300546776005526, "tokens_seen": 3236560896 }, { "epoch": 0.96, "learning_rate": 1.9377356976650887e-05, "loss": 0.066, "theoretical_loss": 3.330044555418066, "tokens_seen": 3236691968 }, { "epoch": 0.96, "learning_rate": 1.9337238225146432e-05, "loss": 0.0633, "theoretical_loss": 3.330034433760244, "tokens_seen": 3236823040 }, { "epoch": 0.96, "learning_rate": 1.9297119473641983e-05, "loss": 0.0598, "theoretical_loss": 3.330024312627037, "tokens_seen": 3236954112 }, { "epoch": 0.96, "learning_rate": 1.9257000722137528e-05, "loss": 0.0618, "theoretical_loss": 3.330014192018398, "tokens_seen": 3237085184 }, { "epoch": 0.96, "learning_rate": 1.9216881970633076e-05, "loss": 0.0623, "theoretical_loss": 3.330004071934278, "tokens_seen": 3237216256 }, { "epoch": 0.96, "learning_rate": 1.917676321912862e-05, "loss": 0.063, "theoretical_loss": 3.3299939523746276, "tokens_seen": 3237347328 }, { "epoch": 0.96, "learning_rate": 1.9136644467624165e-05, "loss": 0.0649, "theoretical_loss": 3.3299838333394, "tokens_seen": 3237478400 }, { "epoch": 0.96, "learning_rate": 1.9096525716119717e-05, "loss": 0.0596, "theoretical_loss": 3.329973714828546, "tokens_seen": 3237609472 }, { "epoch": 0.96, "learning_rate": 1.905640696461526e-05, "loss": 0.063, "theoretical_loss": 3.329963596842017, "tokens_seen": 3237740544 }, { "epoch": 0.96, "learning_rate": 1.901628821311081e-05, "loss": 0.062, "theoretical_loss": 3.329953479379765, "tokens_seen": 3237871616 }, { "epoch": 0.96, "learning_rate": 1.8976169461606354e-05, "loss": 0.0601, "theoretical_loss": 3.3299433624417416, "tokens_seen": 3238002688 }, { "epoch": 0.96, "learning_rate": 1.8936050710101902e-05, "loss": 0.0623, "theoretical_loss": 3.329933246027898, "tokens_seen": 3238133760 }, { "epoch": 0.96, "learning_rate": 1.889593195859745e-05, "loss": 0.0651, "theoretical_loss": 3.3299231301381864, "tokens_seen": 3238264832 }, { "epoch": 0.96, "learning_rate": 1.8855813207092995e-05, "loss": 0.0648, "theoretical_loss": 3.3299130147725577, "tokens_seen": 3238395904 }, { "epoch": 0.96, "learning_rate": 1.8815694455588543e-05, "loss": 0.0625, "theoretical_loss": 3.3299028999309646, "tokens_seen": 3238526976 }, { "epoch": 0.96, "learning_rate": 1.8775575704084088e-05, "loss": 0.0641, "theoretical_loss": 3.329892785613358, "tokens_seen": 3238658048 }, { "epoch": 0.96, "learning_rate": 1.8735456952579636e-05, "loss": 0.0603, "theoretical_loss": 3.3298826718196897, "tokens_seen": 3238789120 }, { "epoch": 0.96, "learning_rate": 1.8695338201075184e-05, "loss": 0.0601, "theoretical_loss": 3.329872558549911, "tokens_seen": 3238920192 }, { "epoch": 0.96, "learning_rate": 1.865521944957073e-05, "loss": 0.0628, "theoretical_loss": 3.3298624458039745, "tokens_seen": 3239051264 }, { "epoch": 0.96, "objective/train/advantage_avg": 0.00032087211729958653, "objective/train/docs_used": 1176615, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2903029918670654, "objective/train/original_loss": 1.2903029918670654, "objective/train/theoretical_loss": 3.3298523335818304, "objective/train/tokens_used": 1609706976, "objective/train/value_avg": -0.01198577880859375, "objective/train/value_loss": 0.00031094273435883224, "objective/train/value_max": -3.5643577575683594e-05, "objective/train/value_min": -0.66650390625, "objective/train/value_reward_corr": 0.8010630394868313, "objective/train/value_std": 0.0232696533203125, "objective/train/weight_avg": 1.0004661083221436, "objective/train/weighted_lm_loss": 1.29048490524292, "objective/train/weights_max": 1.3449902534484863, "objective/train/weights_min": 0.39820975065231323, "theoretical_loss": 3.3298523335818304, "tokens_seen": 3239182336 }, { "epoch": 0.96, "learning_rate": 1.8615100698066277e-05, "loss": 0.0646, "theoretical_loss": 3.3298523335818304, "tokens_seen": 3239182336 }, { "epoch": 0.96, "learning_rate": 1.8574981946561825e-05, "loss": 0.0641, "theoretical_loss": 3.3298422218834323, "tokens_seen": 3239313408 }, { "epoch": 0.96, "learning_rate": 1.853486319505737e-05, "loss": 0.0649, "theoretical_loss": 3.3298321107087303, "tokens_seen": 3239444480 }, { "epoch": 0.96, "learning_rate": 1.8494744443552917e-05, "loss": 0.0671, "theoretical_loss": 3.3298220000576766, "tokens_seen": 3239575552 }, { "epoch": 0.96, "learning_rate": 1.8454625692048462e-05, "loss": 0.0633, "theoretical_loss": 3.329811889930223, "tokens_seen": 3239706624 }, { "epoch": 0.96, "learning_rate": 1.841450694054401e-05, "loss": 0.0633, "theoretical_loss": 3.329801780326321, "tokens_seen": 3239837696 }, { "epoch": 0.96, "learning_rate": 1.8374388189039558e-05, "loss": 0.0625, "theoretical_loss": 3.3297916712459226, "tokens_seen": 3239968768 }, { "epoch": 0.96, "learning_rate": 1.8334269437535106e-05, "loss": 0.0642, "theoretical_loss": 3.3297815626889795, "tokens_seen": 3240099840 }, { "epoch": 0.96, "learning_rate": 1.829415068603065e-05, "loss": 0.0623, "theoretical_loss": 3.329771454655443, "tokens_seen": 3240230912 }, { "epoch": 0.96, "learning_rate": 1.8254031934526196e-05, "loss": 0.0673, "theoretical_loss": 3.329761347145265, "tokens_seen": 3240361984 }, { "epoch": 0.96, "learning_rate": 1.8213913183021747e-05, "loss": 0.0648, "theoretical_loss": 3.3297512401583975, "tokens_seen": 3240493056 }, { "epoch": 0.96, "learning_rate": 1.817379443151729e-05, "loss": 0.0615, "theoretical_loss": 3.3297411336947924, "tokens_seen": 3240624128 }, { "epoch": 0.96, "learning_rate": 1.813367568001284e-05, "loss": 0.063, "theoretical_loss": 3.3297310277544008, "tokens_seen": 3240755200 }, { "epoch": 0.96, "learning_rate": 1.8093556928508384e-05, "loss": 0.0669, "theoretical_loss": 3.3297209223371746, "tokens_seen": 3240886272 }, { "epoch": 0.96, "learning_rate": 1.805343817700393e-05, "loss": 0.0631, "theoretical_loss": 3.329710817443066, "tokens_seen": 3241017344 }, { "epoch": 0.96, "learning_rate": 1.801331942549948e-05, "loss": 0.0633, "theoretical_loss": 3.329700713072026, "tokens_seen": 3241148416 }, { "epoch": 0.96, "learning_rate": 1.7973200673995025e-05, "loss": 0.0615, "theoretical_loss": 3.3296906092240075, "tokens_seen": 3241279488 }, { "epoch": 0.96, "learning_rate": 1.7933081922490573e-05, "loss": 0.0634, "theoretical_loss": 3.3296805058989616, "tokens_seen": 3241410560 }, { "epoch": 0.96, "learning_rate": 1.7892963170986118e-05, "loss": 0.066, "theoretical_loss": 3.32967040309684, "tokens_seen": 3241541632 }, { "epoch": 0.96, "learning_rate": 1.7852844419481666e-05, "loss": 0.0643, "theoretical_loss": 3.3296603008175945, "tokens_seen": 3241672704 }, { "epoch": 0.96, "learning_rate": 1.7812725667977214e-05, "loss": 0.064, "theoretical_loss": 3.3296501990611773, "tokens_seen": 3241803776 }, { "epoch": 0.96, "learning_rate": 1.777260691647276e-05, "loss": 0.0657, "theoretical_loss": 3.32964009782754, "tokens_seen": 3241934848 }, { "epoch": 0.96, "learning_rate": 1.7732488164968307e-05, "loss": 0.0634, "theoretical_loss": 3.329629997116634, "tokens_seen": 3242065920 }, { "epoch": 0.96, "learning_rate": 1.7692369413463855e-05, "loss": 0.0643, "theoretical_loss": 3.3296198969284116, "tokens_seen": 3242196992 }, { "epoch": 0.97, "learning_rate": 1.76522506619594e-05, "loss": 0.0624, "theoretical_loss": 3.3296097972628242, "tokens_seen": 3242328064 }, { "epoch": 0.97, "objective/train/advantage_avg": -0.00046441989252343774, "objective/train/docs_used": 1177712, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2305247783660889, "objective/train/original_loss": 1.2305247783660889, "objective/train/theoretical_loss": 3.3295996981198246, "objective/train/tokens_used": 1612983776, "objective/train/value_avg": -0.006511688232421875, "objective/train/value_loss": 0.00024891889188438654, "objective/train/value_max": -5.4776668548583984e-05, "objective/train/value_min": -0.24560546875, "objective/train/value_reward_corr": 0.7710545966781841, "objective/train/value_std": 0.0149078369140625, "objective/train/weight_avg": 0.9996459484100342, "objective/train/weighted_lm_loss": 1.229850172996521, "objective/train/weights_max": 1.2005599737167358, "objective/train/weights_min": 0.388961523771286, "theoretical_loss": 3.3295996981198246, "tokens_seen": 3242459136 }, { "epoch": 0.97, "learning_rate": 1.7612131910454948e-05, "loss": 0.0581, "theoretical_loss": 3.3295996981198246, "tokens_seen": 3242459136 }, { "epoch": 0.97, "learning_rate": 1.7572013158950492e-05, "loss": 0.0627, "theoretical_loss": 3.3295895994993634, "tokens_seen": 3242590208 }, { "epoch": 0.97, "learning_rate": 1.753189440744604e-05, "loss": 0.0635, "theoretical_loss": 3.329579501401393, "tokens_seen": 3242721280 }, { "epoch": 0.97, "learning_rate": 1.749177565594159e-05, "loss": 0.0656, "theoretical_loss": 3.3295694038258654, "tokens_seen": 3242852352 }, { "epoch": 0.97, "learning_rate": 1.7451656904437133e-05, "loss": 0.0607, "theoretical_loss": 3.3295593067727323, "tokens_seen": 3242983424 }, { "epoch": 0.97, "learning_rate": 1.741153815293268e-05, "loss": 0.0622, "theoretical_loss": 3.3295492102419457, "tokens_seen": 3243114496 }, { "epoch": 0.97, "learning_rate": 1.7371419401428226e-05, "loss": 0.0647, "theoretical_loss": 3.329539114233457, "tokens_seen": 3243245568 }, { "epoch": 0.97, "learning_rate": 1.7331300649923777e-05, "loss": 0.0669, "theoretical_loss": 3.329529018747219, "tokens_seen": 3243376640 }, { "epoch": 0.97, "learning_rate": 1.7291181898419322e-05, "loss": 0.0634, "theoretical_loss": 3.3295189237831826, "tokens_seen": 3243507712 }, { "epoch": 0.97, "learning_rate": 1.725106314691487e-05, "loss": 0.0618, "theoretical_loss": 3.3295088293412998, "tokens_seen": 3243638784 }, { "epoch": 0.97, "learning_rate": 1.7210944395410415e-05, "loss": 0.0588, "theoretical_loss": 3.329498735421523, "tokens_seen": 3243769856 }, { "epoch": 0.97, "learning_rate": 1.717082564390596e-05, "loss": 0.0658, "theoretical_loss": 3.329488642023804, "tokens_seen": 3243900928 }, { "epoch": 0.97, "learning_rate": 1.713070689240151e-05, "loss": 0.0605, "theoretical_loss": 3.3294785491480945, "tokens_seen": 3244032000 }, { "epoch": 0.97, "learning_rate": 1.7090588140897055e-05, "loss": 0.0626, "theoretical_loss": 3.3294684567943467, "tokens_seen": 3244163072 }, { "epoch": 0.97, "learning_rate": 1.7050469389392603e-05, "loss": 0.064, "theoretical_loss": 3.3294583649625116, "tokens_seen": 3244294144 }, { "epoch": 0.97, "learning_rate": 1.7010350637888148e-05, "loss": 0.0629, "theoretical_loss": 3.3294482736525426, "tokens_seen": 3244425216 }, { "epoch": 0.97, "learning_rate": 1.6970231886383696e-05, "loss": 0.0657, "theoretical_loss": 3.3294381828643904, "tokens_seen": 3244556288 }, { "epoch": 0.97, "learning_rate": 1.6930113134879244e-05, "loss": 0.0615, "theoretical_loss": 3.3294280925980075, "tokens_seen": 3244687360 }, { "epoch": 0.97, "learning_rate": 1.688999438337479e-05, "loss": 0.0645, "theoretical_loss": 3.329418002853346, "tokens_seen": 3244818432 }, { "epoch": 0.97, "learning_rate": 1.6849875631870337e-05, "loss": 0.0634, "theoretical_loss": 3.3294079136303574, "tokens_seen": 3244949504 }, { "epoch": 0.97, "learning_rate": 1.680975688036588e-05, "loss": 0.0617, "theoretical_loss": 3.3293978249289937, "tokens_seen": 3245080576 }, { "epoch": 0.97, "learning_rate": 1.676963812886143e-05, "loss": 0.0657, "theoretical_loss": 3.3293877367492075, "tokens_seen": 3245211648 }, { "epoch": 0.97, "learning_rate": 1.6729519377356978e-05, "loss": 0.0627, "theoretical_loss": 3.32937764909095, "tokens_seen": 3245342720 }, { "epoch": 0.97, "learning_rate": 1.6689400625852522e-05, "loss": 0.0661, "theoretical_loss": 3.3293675619541734, "tokens_seen": 3245473792 }, { "epoch": 0.97, "learning_rate": 1.664928187434807e-05, "loss": 0.0645, "theoretical_loss": 3.32935747533883, "tokens_seen": 3245604864 }, { "epoch": 0.97, "objective/train/advantage_avg": -0.000281167624052614, "objective/train/docs_used": 1178984, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3685897588729858, "objective/train/original_loss": 1.3685897588729858, "objective/train/theoretical_loss": 3.3293473892448713, "objective/train/tokens_used": 1616260576, "objective/train/value_avg": -0.0092315673828125, "objective/train/value_loss": 0.00018864420417230576, "objective/train/value_max": -3.272294998168945e-05, "objective/train/value_min": -0.236572265625, "objective/train/value_reward_corr": 0.8092628092905488, "objective/train/value_std": 0.018890380859375, "objective/train/weight_avg": 0.9998118281364441, "objective/train/weighted_lm_loss": 1.3677239418029785, "objective/train/weights_max": 1.2229843139648438, "objective/train/weights_min": 0.821930468082428, "theoretical_loss": 3.3293473892448713, "tokens_seen": 3245735936 }, { "epoch": 0.97, "learning_rate": 1.660916312284362e-05, "loss": 0.0665, "theoretical_loss": 3.3293473892448713, "tokens_seen": 3245735936 }, { "epoch": 0.97, "learning_rate": 1.6569044371339163e-05, "loss": 0.0646, "theoretical_loss": 3.32933730367225, "tokens_seen": 3245867008 }, { "epoch": 0.97, "learning_rate": 1.652892561983471e-05, "loss": 0.0633, "theoretical_loss": 3.329327218620917, "tokens_seen": 3245998080 }, { "epoch": 0.97, "learning_rate": 1.6488806868330256e-05, "loss": 0.0612, "theoretical_loss": 3.329317134090825, "tokens_seen": 3246129152 }, { "epoch": 0.97, "learning_rate": 1.6448688116825807e-05, "loss": 0.0614, "theoretical_loss": 3.329307050081926, "tokens_seen": 3246260224 }, { "epoch": 0.97, "learning_rate": 1.6408569365321352e-05, "loss": 0.0624, "theoretical_loss": 3.3292969665941725, "tokens_seen": 3246391296 }, { "epoch": 0.97, "learning_rate": 1.63684506138169e-05, "loss": 0.0644, "theoretical_loss": 3.329286883627516, "tokens_seen": 3246522368 }, { "epoch": 0.97, "learning_rate": 1.6328331862312445e-05, "loss": 0.0646, "theoretical_loss": 3.3292768011819076, "tokens_seen": 3246653440 }, { "epoch": 0.97, "learning_rate": 1.628821311080799e-05, "loss": 0.0594, "theoretical_loss": 3.329266719257301, "tokens_seen": 3246784512 }, { "epoch": 0.97, "learning_rate": 1.624809435930354e-05, "loss": 0.0635, "theoretical_loss": 3.3292566378536472, "tokens_seen": 3246915584 }, { "epoch": 0.97, "learning_rate": 1.6207975607799086e-05, "loss": 0.0644, "theoretical_loss": 3.329246556970899, "tokens_seen": 3247046656 }, { "epoch": 0.97, "learning_rate": 1.6167856856294634e-05, "loss": 0.0615, "theoretical_loss": 3.329236476609008, "tokens_seen": 3247177728 }, { "epoch": 0.97, "learning_rate": 1.612773810479018e-05, "loss": 0.062, "theoretical_loss": 3.3292263967679263, "tokens_seen": 3247308800 }, { "epoch": 0.97, "learning_rate": 1.6087619353285726e-05, "loss": 0.0617, "theoretical_loss": 3.3292163174476057, "tokens_seen": 3247439872 }, { "epoch": 0.97, "learning_rate": 1.6047500601781274e-05, "loss": 0.0624, "theoretical_loss": 3.329206238647999, "tokens_seen": 3247570944 }, { "epoch": 0.97, "learning_rate": 1.600738185027682e-05, "loss": 0.0611, "theoretical_loss": 3.329196160369057, "tokens_seen": 3247702016 }, { "epoch": 0.97, "learning_rate": 1.5967263098772367e-05, "loss": 0.0628, "theoretical_loss": 3.3291860826107333, "tokens_seen": 3247833088 }, { "epoch": 0.97, "learning_rate": 1.5927144347267912e-05, "loss": 0.0633, "theoretical_loss": 3.3291760053729798, "tokens_seen": 3247964160 }, { "epoch": 0.97, "learning_rate": 1.588702559576346e-05, "loss": 0.0689, "theoretical_loss": 3.3291659286557476, "tokens_seen": 3248095232 }, { "epoch": 0.97, "learning_rate": 1.5846906844259008e-05, "loss": 0.0652, "theoretical_loss": 3.329155852458989, "tokens_seen": 3248226304 }, { "epoch": 0.97, "learning_rate": 1.5806788092754553e-05, "loss": 0.0643, "theoretical_loss": 3.329145776782657, "tokens_seen": 3248357376 }, { "epoch": 0.97, "learning_rate": 1.57666693412501e-05, "loss": 0.0631, "theoretical_loss": 3.329135701626703, "tokens_seen": 3248488448 }, { "epoch": 0.97, "learning_rate": 1.572655058974565e-05, "loss": 0.0601, "theoretical_loss": 3.329125626991079, "tokens_seen": 3248619520 }, { "epoch": 0.97, "learning_rate": 1.5686431838241193e-05, "loss": 0.0632, "theoretical_loss": 3.329115552875738, "tokens_seen": 3248750592 }, { "epoch": 0.97, "learning_rate": 1.564631308673674e-05, "loss": 0.065, "theoretical_loss": 3.3291054792806314, "tokens_seen": 3248881664 }, { "epoch": 0.97, "objective/train/advantage_avg": 0.0008206228958442807, "objective/train/docs_used": 1180177, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.5078701972961426, "objective/train/original_loss": 1.5078701972961426, "objective/train/theoretical_loss": 3.3290954062057114, "objective/train/tokens_used": 1619537376, "objective/train/value_avg": -0.00443267822265625, "objective/train/value_loss": 0.00017720594769343734, "objective/train/value_max": -3.069639205932617e-05, "objective/train/value_min": -0.2113037109375, "objective/train/value_reward_corr": 0.3883148743704122, "objective/train/value_std": 0.007213592529296875, "objective/train/weight_avg": 1.0008951425552368, "objective/train/weighted_lm_loss": 1.5097239017486572, "objective/train/weights_max": 1.1671221256256104, "objective/train/weights_min": 0.3709288537502289, "theoretical_loss": 3.3290954062057114, "tokens_seen": 3249012736 }, { "epoch": 0.97, "learning_rate": 1.560619433523229e-05, "loss": 0.0669, "theoretical_loss": 3.3290954062057114, "tokens_seen": 3249012736 }, { "epoch": 0.97, "learning_rate": 1.5566075583727834e-05, "loss": 0.0606, "theoretical_loss": 3.3290853336509305, "tokens_seen": 3249143808 }, { "epoch": 0.97, "learning_rate": 1.5525956832223382e-05, "loss": 0.0657, "theoretical_loss": 3.3290752616162402, "tokens_seen": 3249274880 }, { "epoch": 0.97, "learning_rate": 1.5485838080718927e-05, "loss": 0.0643, "theoretical_loss": 3.329065190101594, "tokens_seen": 3249405952 }, { "epoch": 0.97, "learning_rate": 1.5445719329214475e-05, "loss": 0.0614, "theoretical_loss": 3.3290551191069424, "tokens_seen": 3249537024 }, { "epoch": 0.97, "learning_rate": 1.5405600577710023e-05, "loss": 0.0663, "theoretical_loss": 3.3290450486322385, "tokens_seen": 3249668096 }, { "epoch": 0.97, "learning_rate": 1.5365481826205568e-05, "loss": 0.0652, "theoretical_loss": 3.3290349786774347, "tokens_seen": 3249799168 }, { "epoch": 0.97, "learning_rate": 1.5325363074701116e-05, "loss": 0.0649, "theoretical_loss": 3.3290249092424826, "tokens_seen": 3249930240 }, { "epoch": 0.97, "learning_rate": 1.5285244323196664e-05, "loss": 0.0641, "theoretical_loss": 3.3290148403273347, "tokens_seen": 3250061312 }, { "epoch": 0.97, "learning_rate": 1.524512557169221e-05, "loss": 0.0641, "theoretical_loss": 3.3290047719319436, "tokens_seen": 3250192384 }, { "epoch": 0.97, "learning_rate": 1.5205006820187757e-05, "loss": 0.0642, "theoretical_loss": 3.3289947040562606, "tokens_seen": 3250323456 }, { "epoch": 0.97, "learning_rate": 1.5164888068683301e-05, "loss": 0.06, "theoretical_loss": 3.3289846367002385, "tokens_seen": 3250454528 }, { "epoch": 0.97, "learning_rate": 1.512476931717885e-05, "loss": 0.0663, "theoretical_loss": 3.3289745698638296, "tokens_seen": 3250585600 }, { "epoch": 0.97, "learning_rate": 1.5084650565674396e-05, "loss": 0.0638, "theoretical_loss": 3.3289645035469855, "tokens_seen": 3250716672 }, { "epoch": 0.97, "learning_rate": 1.5044531814169944e-05, "loss": 0.061, "theoretical_loss": 3.328954437749659, "tokens_seen": 3250847744 }, { "epoch": 0.97, "learning_rate": 1.500441306266549e-05, "loss": 0.0621, "theoretical_loss": 3.3289443724718026, "tokens_seen": 3250978816 }, { "epoch": 0.97, "learning_rate": 1.4964294311161038e-05, "loss": 0.0612, "theoretical_loss": 3.328934307713368, "tokens_seen": 3251109888 }, { "epoch": 0.97, "learning_rate": 1.4924175559656583e-05, "loss": 0.0654, "theoretical_loss": 3.328924243474307, "tokens_seen": 3251240960 }, { "epoch": 0.97, "learning_rate": 1.4884056808152131e-05, "loss": 0.0623, "theoretical_loss": 3.328914179754573, "tokens_seen": 3251372032 }, { "epoch": 0.97, "learning_rate": 1.4843938056647677e-05, "loss": 0.0665, "theoretical_loss": 3.328904116554118, "tokens_seen": 3251503104 }, { "epoch": 0.97, "learning_rate": 1.4803819305143225e-05, "loss": 0.0662, "theoretical_loss": 3.328894053872894, "tokens_seen": 3251634176 }, { "epoch": 0.97, "learning_rate": 1.4763700553638772e-05, "loss": 0.0628, "theoretical_loss": 3.3288839917108533, "tokens_seen": 3251765248 }, { "epoch": 0.97, "learning_rate": 1.4723581802134316e-05, "loss": 0.0609, "theoretical_loss": 3.3288739300679477, "tokens_seen": 3251896320 }, { "epoch": 0.97, "learning_rate": 1.4683463050629864e-05, "loss": 0.0644, "theoretical_loss": 3.3288638689441306, "tokens_seen": 3252027392 }, { "epoch": 0.97, "learning_rate": 1.464334429912541e-05, "loss": 0.063, "theoretical_loss": 3.3288538083393533, "tokens_seen": 3252158464 }, { "epoch": 0.97, "objective/train/advantage_avg": -0.000380589539417997, "objective/train/docs_used": 1181412, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2194581031799316, "objective/train/original_loss": 1.2194581031799316, "objective/train/theoretical_loss": 3.328843748253569, "objective/train/tokens_used": 1622814176, "objective/train/value_avg": -0.00691986083984375, "objective/train/value_loss": 0.00011079585237894207, "objective/train/value_max": -3.647804260253906e-05, "objective/train/value_min": -0.216064453125, "objective/train/value_reward_corr": 0.809284621853898, "objective/train/value_std": 0.01476287841796875, "objective/train/weight_avg": 0.9996742606163025, "objective/train/weighted_lm_loss": 1.2187222242355347, "objective/train/weights_max": 1.138139009475708, "objective/train/weights_min": 0.8142285943031311, "theoretical_loss": 3.328843748253569, "tokens_seen": 3252289536 }, { "epoch": 0.97, "learning_rate": 1.4603225547620959e-05, "loss": 0.0619, "theoretical_loss": 3.328843748253569, "tokens_seen": 3252289536 }, { "epoch": 0.97, "learning_rate": 1.4563106796116505e-05, "loss": 0.0649, "theoretical_loss": 3.328833688686729, "tokens_seen": 3252420608 }, { "epoch": 0.97, "learning_rate": 1.4522988044612053e-05, "loss": 0.0611, "theoretical_loss": 3.3288236296387863, "tokens_seen": 3252551680 }, { "epoch": 0.97, "learning_rate": 1.4482869293107598e-05, "loss": 0.0642, "theoretical_loss": 3.328813571109693, "tokens_seen": 3252682752 }, { "epoch": 0.97, "learning_rate": 1.4442750541603146e-05, "loss": 0.0664, "theoretical_loss": 3.328803513099402, "tokens_seen": 3252813824 }, { "epoch": 0.97, "learning_rate": 1.4402631790098692e-05, "loss": 0.0635, "theoretical_loss": 3.3287934556078644, "tokens_seen": 3252944896 }, { "epoch": 0.97, "learning_rate": 1.436251303859424e-05, "loss": 0.0621, "theoretical_loss": 3.3287833986350335, "tokens_seen": 3253075968 }, { "epoch": 0.97, "learning_rate": 1.4322394287089787e-05, "loss": 0.066, "theoretical_loss": 3.3287733421808614, "tokens_seen": 3253207040 }, { "epoch": 0.97, "learning_rate": 1.4282275535585331e-05, "loss": 0.0642, "theoretical_loss": 3.3287632862453007, "tokens_seen": 3253338112 }, { "epoch": 0.97, "learning_rate": 1.424215678408088e-05, "loss": 0.0621, "theoretical_loss": 3.328753230828303, "tokens_seen": 3253469184 }, { "epoch": 0.97, "learning_rate": 1.4202038032576426e-05, "loss": 0.061, "theoretical_loss": 3.3287431759298216, "tokens_seen": 3253600256 }, { "epoch": 0.97, "learning_rate": 1.4161919281071974e-05, "loss": 0.066, "theoretical_loss": 3.3287331215498086, "tokens_seen": 3253731328 }, { "epoch": 0.97, "learning_rate": 1.412180052956752e-05, "loss": 0.0644, "theoretical_loss": 3.328723067688216, "tokens_seen": 3253862400 }, { "epoch": 0.97, "learning_rate": 1.4081681778063068e-05, "loss": 0.0647, "theoretical_loss": 3.328713014344997, "tokens_seen": 3253993472 }, { "epoch": 0.97, "learning_rate": 1.4041563026558613e-05, "loss": 0.067, "theoretical_loss": 3.328702961520103, "tokens_seen": 3254124544 }, { "epoch": 0.97, "learning_rate": 1.4001444275054161e-05, "loss": 0.0628, "theoretical_loss": 3.328692909213487, "tokens_seen": 3254255616 }, { "epoch": 0.97, "learning_rate": 1.3961325523549707e-05, "loss": 0.0653, "theoretical_loss": 3.328682857425101, "tokens_seen": 3254386688 }, { "epoch": 0.97, "learning_rate": 1.3921206772045254e-05, "loss": 0.061, "theoretical_loss": 3.3286728061548976, "tokens_seen": 3254517760 }, { "epoch": 0.97, "learning_rate": 1.3881088020540802e-05, "loss": 0.06, "theoretical_loss": 3.3286627554028296, "tokens_seen": 3254648832 }, { "epoch": 0.97, "learning_rate": 1.3840969269036347e-05, "loss": 0.0639, "theoretical_loss": 3.328652705168849, "tokens_seen": 3254779904 }, { "epoch": 0.97, "learning_rate": 1.3800850517531895e-05, "loss": 0.06, "theoretical_loss": 3.3286426554529083, "tokens_seen": 3254910976 }, { "epoch": 0.97, "learning_rate": 1.3760731766027441e-05, "loss": 0.0627, "theoretical_loss": 3.3286326062549603, "tokens_seen": 3255042048 }, { "epoch": 0.97, "learning_rate": 1.3720613014522989e-05, "loss": 0.0649, "theoretical_loss": 3.328622557574957, "tokens_seen": 3255173120 }, { "epoch": 0.97, "learning_rate": 1.3680494263018535e-05, "loss": 0.0681, "theoretical_loss": 3.3286125094128507, "tokens_seen": 3255304192 }, { "epoch": 0.97, "learning_rate": 1.3640375511514083e-05, "loss": 0.0633, "theoretical_loss": 3.328602461768594, "tokens_seen": 3255435264 }, { "epoch": 0.97, "objective/train/advantage_avg": 0.00025052836281247437, "objective/train/docs_used": 1182603, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.1211589574813843, "objective/train/original_loss": 1.1211589574813843, "objective/train/theoretical_loss": 3.32859241464214, "objective/train/tokens_used": 1626090976, "objective/train/value_avg": -0.006855010986328125, "objective/train/value_loss": 0.00018591461412142962, "objective/train/value_max": -1.7523765563964844e-05, "objective/train/value_min": -0.312744140625, "objective/train/value_reward_corr": 0.7151767376606779, "objective/train/value_std": 0.0147247314453125, "objective/train/weight_avg": 1.000331163406372, "objective/train/weighted_lm_loss": 1.121063232421875, "objective/train/weights_max": 1.2366453409194946, "objective/train/weights_min": 0.37432897090911865, "theoretical_loss": 3.32859241464214, "tokens_seen": 3255566336 }, { "epoch": 0.97, "learning_rate": 1.3600256760009628e-05, "loss": 0.0611, "theoretical_loss": 3.32859241464214, "tokens_seen": 3255566336 }, { "epoch": 0.97, "learning_rate": 1.3560138008505176e-05, "loss": 0.0619, "theoretical_loss": 3.3285823680334405, "tokens_seen": 3255697408 }, { "epoch": 0.97, "learning_rate": 1.3520019257000723e-05, "loss": 0.0657, "theoretical_loss": 3.328572321942448, "tokens_seen": 3255828480 }, { "epoch": 0.97, "learning_rate": 1.3479900505496269e-05, "loss": 0.0601, "theoretical_loss": 3.3285622763691154, "tokens_seen": 3255959552 }, { "epoch": 0.97, "learning_rate": 1.3439781753991817e-05, "loss": 0.0634, "theoretical_loss": 3.328552231313395, "tokens_seen": 3256090624 }, { "epoch": 0.97, "learning_rate": 1.3399663002487362e-05, "loss": 0.0624, "theoretical_loss": 3.328542186775239, "tokens_seen": 3256221696 }, { "epoch": 0.97, "learning_rate": 1.335954425098291e-05, "loss": 0.0627, "theoretical_loss": 3.3285321427545997, "tokens_seen": 3256352768 }, { "epoch": 0.97, "learning_rate": 1.3319425499478456e-05, "loss": 0.0647, "theoretical_loss": 3.3285220992514306, "tokens_seen": 3256483840 }, { "epoch": 0.97, "learning_rate": 1.3279306747974004e-05, "loss": 0.0663, "theoretical_loss": 3.3285120562656836, "tokens_seen": 3256614912 }, { "epoch": 0.97, "learning_rate": 1.323918799646955e-05, "loss": 0.0633, "theoretical_loss": 3.328502013797311, "tokens_seen": 3256745984 }, { "epoch": 0.97, "learning_rate": 1.3199069244965097e-05, "loss": 0.0606, "theoretical_loss": 3.3284919718462653, "tokens_seen": 3256877056 }, { "epoch": 0.97, "learning_rate": 1.3158950493460643e-05, "loss": 0.0674, "theoretical_loss": 3.3284819304125, "tokens_seen": 3257008128 }, { "epoch": 0.97, "learning_rate": 1.311883174195619e-05, "loss": 0.0637, "theoretical_loss": 3.3284718894959666, "tokens_seen": 3257139200 }, { "epoch": 0.97, "learning_rate": 1.3078712990451738e-05, "loss": 0.0659, "theoretical_loss": 3.328461849096618, "tokens_seen": 3257270272 }, { "epoch": 0.97, "learning_rate": 1.3038594238947284e-05, "loss": 0.0634, "theoretical_loss": 3.328451809214407, "tokens_seen": 3257401344 }, { "epoch": 0.97, "learning_rate": 1.2998475487442832e-05, "loss": 0.0604, "theoretical_loss": 3.3284417698492854, "tokens_seen": 3257532416 }, { "epoch": 0.97, "learning_rate": 1.2958356735938377e-05, "loss": 0.0649, "theoretical_loss": 3.328431731001207, "tokens_seen": 3257663488 }, { "epoch": 0.97, "learning_rate": 1.2918237984433925e-05, "loss": 0.0619, "theoretical_loss": 3.3284216926701227, "tokens_seen": 3257794560 }, { "epoch": 0.97, "learning_rate": 1.2878119232929471e-05, "loss": 0.0658, "theoretical_loss": 3.328411654855987, "tokens_seen": 3257925632 }, { "epoch": 0.97, "learning_rate": 1.283800048142502e-05, "loss": 0.0613, "theoretical_loss": 3.3284016175587507, "tokens_seen": 3258056704 }, { "epoch": 0.97, "learning_rate": 1.2797881729920566e-05, "loss": 0.0615, "theoretical_loss": 3.3283915807783675, "tokens_seen": 3258187776 }, { "epoch": 0.97, "learning_rate": 1.2757762978416112e-05, "loss": 0.0594, "theoretical_loss": 3.3283815445147895, "tokens_seen": 3258318848 }, { "epoch": 0.97, "learning_rate": 1.2717644226911658e-05, "loss": 0.0584, "theoretical_loss": 3.3283715087679697, "tokens_seen": 3258449920 }, { "epoch": 0.97, "learning_rate": 1.2677525475407205e-05, "loss": 0.0619, "theoretical_loss": 3.3283614735378606, "tokens_seen": 3258580992 }, { "epoch": 0.97, "learning_rate": 1.2637406723902753e-05, "loss": 0.0632, "theoretical_loss": 3.3283514388244146, "tokens_seen": 3258712064 }, { "epoch": 0.97, "objective/train/advantage_avg": -5.351888103177771e-05, "objective/train/docs_used": 1183737, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2912063598632812, "objective/train/original_loss": 1.2912062406539917, "objective/train/theoretical_loss": 3.3283414046275843, "objective/train/tokens_used": 1629367776, "objective/train/value_avg": -0.008880615234375, "objective/train/value_loss": 0.00038528675213456154, "objective/train/value_max": -3.5643577575683594e-05, "objective/train/value_min": -0.98193359375, "objective/train/value_reward_corr": 0.7479802870524459, "objective/train/value_std": 0.0225372314453125, "objective/train/weight_avg": 1.000126600265503, "objective/train/weighted_lm_loss": 1.2910633087158203, "objective/train/weights_max": 2.5176849365234375, "objective/train/weights_min": 0.3765632212162018, "theoretical_loss": 3.3283414046275843, "tokens_seen": 3258843136 }, { "epoch": 0.98, "learning_rate": 1.2597287972398299e-05, "loss": 0.0606, "theoretical_loss": 3.3283414046275843, "tokens_seen": 3258843136 }, { "epoch": 0.98, "learning_rate": 1.2557169220893847e-05, "loss": 0.0653, "theoretical_loss": 3.3283313709473226, "tokens_seen": 3258974208 }, { "epoch": 0.98, "learning_rate": 1.2517050469389392e-05, "loss": 0.0633, "theoretical_loss": 3.328321337783582, "tokens_seen": 3259105280 }, { "epoch": 0.98, "learning_rate": 1.247693171788494e-05, "loss": 0.0649, "theoretical_loss": 3.3283113051363147, "tokens_seen": 3259236352 }, { "epoch": 0.98, "learning_rate": 1.2436812966380486e-05, "loss": 0.0586, "theoretical_loss": 3.328301273005474, "tokens_seen": 3259367424 }, { "epoch": 0.98, "learning_rate": 1.2396694214876034e-05, "loss": 0.0627, "theoretical_loss": 3.3282912413910126, "tokens_seen": 3259498496 }, { "epoch": 0.98, "learning_rate": 1.235657546337158e-05, "loss": 0.0647, "theoretical_loss": 3.3282812102928827, "tokens_seen": 3259629568 }, { "epoch": 0.98, "learning_rate": 1.2316456711867127e-05, "loss": 0.0662, "theoretical_loss": 3.328271179711037, "tokens_seen": 3259760640 }, { "epoch": 0.98, "learning_rate": 1.2276337960362673e-05, "loss": 0.0602, "theoretical_loss": 3.3282611496454284, "tokens_seen": 3259891712 }, { "epoch": 0.98, "learning_rate": 1.223621920885822e-05, "loss": 0.0617, "theoretical_loss": 3.3282511200960094, "tokens_seen": 3260022784 }, { "epoch": 0.98, "learning_rate": 1.2196100457353768e-05, "loss": 0.0618, "theoretical_loss": 3.328241091062733, "tokens_seen": 3260153856 }, { "epoch": 0.98, "learning_rate": 1.2155981705849314e-05, "loss": 0.0625, "theoretical_loss": 3.3282310625455516, "tokens_seen": 3260284928 }, { "epoch": 0.98, "learning_rate": 1.2115862954344862e-05, "loss": 0.0583, "theoretical_loss": 3.3282210345444176, "tokens_seen": 3260416000 }, { "epoch": 0.98, "learning_rate": 1.2075744202840407e-05, "loss": 0.0635, "theoretical_loss": 3.3282110070592847, "tokens_seen": 3260547072 }, { "epoch": 0.98, "learning_rate": 1.2035625451335955e-05, "loss": 0.0661, "theoretical_loss": 3.3282009800901045, "tokens_seen": 3260678144 }, { "epoch": 0.98, "learning_rate": 1.1995506699831501e-05, "loss": 0.0659, "theoretical_loss": 3.32819095363683, "tokens_seen": 3260809216 }, { "epoch": 0.98, "learning_rate": 1.195538794832705e-05, "loss": 0.0647, "theoretical_loss": 3.3281809276994148, "tokens_seen": 3260940288 }, { "epoch": 0.98, "learning_rate": 1.1915269196822596e-05, "loss": 0.0615, "theoretical_loss": 3.3281709022778103, "tokens_seen": 3261071360 }, { "epoch": 0.98, "learning_rate": 1.187515044531814e-05, "loss": 0.0629, "theoretical_loss": 3.3281608773719698, "tokens_seen": 3261202432 }, { "epoch": 0.98, "learning_rate": 1.1835031693813689e-05, "loss": 0.0637, "theoretical_loss": 3.3281508529818464, "tokens_seen": 3261333504 }, { "epoch": 0.98, "learning_rate": 1.1794912942309235e-05, "loss": 0.0629, "theoretical_loss": 3.3281408291073924, "tokens_seen": 3261464576 }, { "epoch": 0.98, "learning_rate": 1.1754794190804783e-05, "loss": 0.0652, "theoretical_loss": 3.3281308057485606, "tokens_seen": 3261595648 }, { "epoch": 0.98, "learning_rate": 1.171467543930033e-05, "loss": 0.0632, "theoretical_loss": 3.3281207829053043, "tokens_seen": 3261726720 }, { "epoch": 0.98, "learning_rate": 1.1674556687795876e-05, "loss": 0.0649, "theoretical_loss": 3.3281107605775753, "tokens_seen": 3261857792 }, { "epoch": 0.98, "learning_rate": 1.1634437936291422e-05, "loss": 0.0603, "theoretical_loss": 3.328100738765327, "tokens_seen": 3261988864 }, { "epoch": 0.98, "objective/train/advantage_avg": 0.000411924091167748, "objective/train/docs_used": 1184948, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.234977126121521, "objective/train/original_loss": 1.234976887702942, "objective/train/theoretical_loss": 3.328090717468512, "objective/train/tokens_used": 1632644576, "objective/train/value_avg": -0.00949859619140625, "objective/train/value_loss": 0.0004404170613270253, "objective/train/value_max": -3.9458274841308594e-05, "objective/train/value_min": -0.943359375, "objective/train/value_reward_corr": 0.7398071228885635, "objective/train/value_std": 0.024871826171875, "objective/train/weight_avg": 1.0006182193756104, "objective/train/weighted_lm_loss": 1.2354453802108765, "objective/train/weights_max": 2.0797736644744873, "objective/train/weights_min": 0.3946770429611206, "theoretical_loss": 3.328090717468512, "tokens_seen": 3262119936 }, { "epoch": 0.98, "learning_rate": 1.159431918478697e-05, "loss": 0.0638, "theoretical_loss": 3.328090717468512, "tokens_seen": 3262119936 }, { "epoch": 0.98, "learning_rate": 1.1554200433282516e-05, "loss": 0.0691, "theoretical_loss": 3.3280806966870835, "tokens_seen": 3262251008 }, { "epoch": 0.98, "learning_rate": 1.1514081681778065e-05, "loss": 0.0627, "theoretical_loss": 3.3280706764209933, "tokens_seen": 3262382080 }, { "epoch": 0.98, "learning_rate": 1.1473962930273611e-05, "loss": 0.0627, "theoretical_loss": 3.3280606566701953, "tokens_seen": 3262513152 }, { "epoch": 0.98, "learning_rate": 1.1433844178769156e-05, "loss": 0.0612, "theoretical_loss": 3.3280506374346417, "tokens_seen": 3262644224 }, { "epoch": 0.98, "learning_rate": 1.1393725427264704e-05, "loss": 0.0638, "theoretical_loss": 3.3280406187142857, "tokens_seen": 3262775296 }, { "epoch": 0.98, "learning_rate": 1.135360667576025e-05, "loss": 0.0623, "theoretical_loss": 3.3280306005090794, "tokens_seen": 3262906368 }, { "epoch": 0.98, "learning_rate": 1.1313487924255798e-05, "loss": 0.0556, "theoretical_loss": 3.328020582818976, "tokens_seen": 3263037440 }, { "epoch": 0.98, "learning_rate": 1.1273369172751344e-05, "loss": 0.0651, "theoretical_loss": 3.3280105656439285, "tokens_seen": 3263168512 }, { "epoch": 0.98, "learning_rate": 1.123325042124689e-05, "loss": 0.0637, "theoretical_loss": 3.3280005489838898, "tokens_seen": 3263299584 }, { "epoch": 0.98, "learning_rate": 1.1193131669742437e-05, "loss": 0.0603, "theoretical_loss": 3.3279905328388124, "tokens_seen": 3263430656 }, { "epoch": 0.98, "learning_rate": 1.1153012918237985e-05, "loss": 0.0628, "theoretical_loss": 3.3279805172086494, "tokens_seen": 3263561728 }, { "epoch": 0.98, "learning_rate": 1.1112894166733532e-05, "loss": 0.0587, "theoretical_loss": 3.327970502093353, "tokens_seen": 3263692800 }, { "epoch": 0.98, "learning_rate": 1.107277541522908e-05, "loss": 0.062, "theoretical_loss": 3.327960487492877, "tokens_seen": 3263823872 }, { "epoch": 0.98, "learning_rate": 1.1032656663724626e-05, "loss": 0.0609, "theoretical_loss": 3.3279504734071743, "tokens_seen": 3263954944 }, { "epoch": 0.98, "learning_rate": 1.099253791222017e-05, "loss": 0.065, "theoretical_loss": 3.3279404598361975, "tokens_seen": 3264086016 }, { "epoch": 0.98, "learning_rate": 1.0952419160715719e-05, "loss": 0.0601, "theoretical_loss": 3.3279304467798987, "tokens_seen": 3264217088 }, { "epoch": 0.98, "learning_rate": 1.0912300409211265e-05, "loss": 0.0645, "theoretical_loss": 3.3279204342382314, "tokens_seen": 3264348160 }, { "epoch": 0.98, "learning_rate": 1.0872181657706813e-05, "loss": 0.0627, "theoretical_loss": 3.3279104222111484, "tokens_seen": 3264479232 }, { "epoch": 0.98, "learning_rate": 1.083206290620236e-05, "loss": 0.0667, "theoretical_loss": 3.3279004106986028, "tokens_seen": 3264610304 }, { "epoch": 0.98, "learning_rate": 1.0791944154697906e-05, "loss": 0.0622, "theoretical_loss": 3.3278903997005473, "tokens_seen": 3264741376 }, { "epoch": 0.98, "learning_rate": 1.0751825403193452e-05, "loss": 0.0612, "theoretical_loss": 3.3278803892169355, "tokens_seen": 3264872448 }, { "epoch": 0.98, "learning_rate": 1.0711706651689e-05, "loss": 0.0659, "theoretical_loss": 3.327870379247719, "tokens_seen": 3265003520 }, { "epoch": 0.98, "learning_rate": 1.0671587900184547e-05, "loss": 0.0624, "theoretical_loss": 3.3278603697928517, "tokens_seen": 3265134592 }, { "epoch": 0.98, "learning_rate": 1.0631469148680093e-05, "loss": 0.0649, "theoretical_loss": 3.327850360852286, "tokens_seen": 3265265664 }, { "epoch": 0.98, "objective/train/advantage_avg": 0.00013351708184927702, "objective/train/docs_used": 1186260, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3229857683181763, "objective/train/original_loss": 1.3229856491088867, "objective/train/theoretical_loss": 3.327840352425975, "objective/train/tokens_used": 1635921376, "objective/train/value_avg": -0.006275177001953125, "objective/train/value_loss": 0.0001440990890841931, "objective/train/value_max": -6.401538848876953e-05, "objective/train/value_min": -0.2490234375, "objective/train/value_reward_corr": 0.703439741742091, "objective/train/value_std": 0.01100921630859375, "objective/train/weight_avg": 1.0002005100250244, "objective/train/weighted_lm_loss": 1.3221129179000854, "objective/train/weights_max": 1.1178979873657227, "objective/train/weights_min": 0.38491126894950867, "theoretical_loss": 3.327840352425975, "tokens_seen": 3265396736 }, { "epoch": 0.98, "learning_rate": 1.0591350397175641e-05, "loss": 0.0639, "theoretical_loss": 3.327840352425975, "tokens_seen": 3265396736 }, { "epoch": 0.98, "learning_rate": 1.0551231645671186e-05, "loss": 0.0617, "theoretical_loss": 3.327830344513872, "tokens_seen": 3265527808 }, { "epoch": 0.98, "learning_rate": 1.0511112894166734e-05, "loss": 0.0632, "theoretical_loss": 3.32782033711593, "tokens_seen": 3265658880 }, { "epoch": 0.98, "learning_rate": 1.047099414266228e-05, "loss": 0.0632, "theoretical_loss": 3.3278103302321007, "tokens_seen": 3265789952 }, { "epoch": 0.98, "learning_rate": 1.0430875391157828e-05, "loss": 0.0654, "theoretical_loss": 3.3278003238623386, "tokens_seen": 3265921024 }, { "epoch": 0.98, "learning_rate": 1.0390756639653375e-05, "loss": 0.0642, "theoretical_loss": 3.327790318006596, "tokens_seen": 3266052096 }, { "epoch": 0.98, "learning_rate": 1.0350637888148921e-05, "loss": 0.0681, "theoretical_loss": 3.3277803126648258, "tokens_seen": 3266183168 }, { "epoch": 0.98, "learning_rate": 1.0310519136644467e-05, "loss": 0.0609, "theoretical_loss": 3.327770307836981, "tokens_seen": 3266314240 }, { "epoch": 0.98, "learning_rate": 1.0270400385140015e-05, "loss": 0.0627, "theoretical_loss": 3.3277603035230143, "tokens_seen": 3266445312 }, { "epoch": 0.98, "learning_rate": 1.0230281633635562e-05, "loss": 0.0611, "theoretical_loss": 3.3277502997228794, "tokens_seen": 3266576384 }, { "epoch": 0.98, "learning_rate": 1.0190162882131108e-05, "loss": 0.062, "theoretical_loss": 3.327740296436529, "tokens_seen": 3266707456 }, { "epoch": 0.98, "learning_rate": 1.0150044130626656e-05, "loss": 0.0634, "theoretical_loss": 3.327730293663916, "tokens_seen": 3266838528 }, { "epoch": 0.98, "learning_rate": 1.01099253791222e-05, "loss": 0.0636, "theoretical_loss": 3.3277202914049933, "tokens_seen": 3266969600 }, { "epoch": 0.98, "learning_rate": 1.0069806627617749e-05, "loss": 0.0655, "theoretical_loss": 3.327710289659714, "tokens_seen": 3267100672 }, { "epoch": 0.98, "learning_rate": 1.0029687876113295e-05, "loss": 0.0667, "theoretical_loss": 3.3277002884280313, "tokens_seen": 3267231744 }, { "epoch": 0.98, "learning_rate": 9.989569124608843e-06, "loss": 0.0608, "theoretical_loss": 3.327690287709898, "tokens_seen": 3267362816 }, { "epoch": 0.98, "learning_rate": 9.94945037310439e-06, "loss": 0.0619, "theoretical_loss": 3.327680287505267, "tokens_seen": 3267493888 }, { "epoch": 0.98, "learning_rate": 9.909331621599936e-06, "loss": 0.0662, "theoretical_loss": 3.327670287814092, "tokens_seen": 3267624960 }, { "epoch": 0.98, "learning_rate": 9.869212870095482e-06, "loss": 0.0612, "theoretical_loss": 3.3276602886363253, "tokens_seen": 3267756032 }, { "epoch": 0.98, "learning_rate": 9.829094118591029e-06, "loss": 0.0619, "theoretical_loss": 3.3276502899719205, "tokens_seen": 3267887104 }, { "epoch": 0.98, "learning_rate": 9.788975367086577e-06, "loss": 0.0641, "theoretical_loss": 3.32764029182083, "tokens_seen": 3268018176 }, { "epoch": 0.98, "learning_rate": 9.748856615582123e-06, "loss": 0.0633, "theoretical_loss": 3.3276302941830074, "tokens_seen": 3268149248 }, { "epoch": 0.98, "learning_rate": 9.70873786407767e-06, "loss": 0.0618, "theoretical_loss": 3.327620297058406, "tokens_seen": 3268280320 }, { "epoch": 0.98, "learning_rate": 9.668619112573216e-06, "loss": 0.0636, "theoretical_loss": 3.327610300446978, "tokens_seen": 3268411392 }, { "epoch": 0.98, "learning_rate": 9.628500361068764e-06, "loss": 0.066, "theoretical_loss": 3.3276003043486773, "tokens_seen": 3268542464 }, { "epoch": 0.98, "objective/train/advantage_avg": -0.000499613699503243, "objective/train/docs_used": 1187457, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.170939326286316, "objective/train/original_loss": 1.1709394454956055, "objective/train/theoretical_loss": 3.3275903087634564, "objective/train/tokens_used": 1639198176, "objective/train/value_avg": -0.00986480712890625, "objective/train/value_loss": 0.00037557882023975253, "objective/train/value_max": -3.7610530853271484e-05, "objective/train/value_min": -0.72607421875, "objective/train/value_reward_corr": 0.7420529883770068, "objective/train/value_std": 0.0198974609375, "objective/train/weight_avg": 0.9996645450592041, "objective/train/weighted_lm_loss": 1.1702909469604492, "objective/train/weights_max": 1.3982270956039429, "objective/train/weights_min": 0.37701165676116943, "theoretical_loss": 3.3275903087634564, "tokens_seen": 3268673536 }, { "epoch": 0.98, "learning_rate": 9.58838160956431e-06, "loss": 0.0618, "theoretical_loss": 3.3275903087634564, "tokens_seen": 3268673536 }, { "epoch": 0.98, "learning_rate": 9.548262858059858e-06, "loss": 0.0628, "theoretical_loss": 3.3275803136912687, "tokens_seen": 3268804608 }, { "epoch": 0.98, "learning_rate": 9.508144106555405e-06, "loss": 0.0653, "theoretical_loss": 3.327570319132067, "tokens_seen": 3268935680 }, { "epoch": 0.98, "learning_rate": 9.468025355050951e-06, "loss": 0.0638, "theoretical_loss": 3.327560325085805, "tokens_seen": 3269066752 }, { "epoch": 0.98, "learning_rate": 9.427906603546498e-06, "loss": 0.0672, "theoretical_loss": 3.3275503315524357, "tokens_seen": 3269197824 }, { "epoch": 0.98, "learning_rate": 9.387787852042044e-06, "loss": 0.0615, "theoretical_loss": 3.3275403385319113, "tokens_seen": 3269328896 }, { "epoch": 0.98, "learning_rate": 9.347669100537592e-06, "loss": 0.0646, "theoretical_loss": 3.3275303460241865, "tokens_seen": 3269459968 }, { "epoch": 0.98, "learning_rate": 9.307550349033138e-06, "loss": 0.0577, "theoretical_loss": 3.327520354029213, "tokens_seen": 3269591040 }, { "epoch": 0.98, "learning_rate": 9.267431597528685e-06, "loss": 0.0611, "theoretical_loss": 3.327510362546944, "tokens_seen": 3269722112 }, { "epoch": 0.98, "learning_rate": 9.227312846024231e-06, "loss": 0.0649, "theoretical_loss": 3.327500371577334, "tokens_seen": 3269853184 }, { "epoch": 0.98, "learning_rate": 9.187194094519779e-06, "loss": 0.0625, "theoretical_loss": 3.3274903811203345, "tokens_seen": 3269984256 }, { "epoch": 0.98, "learning_rate": 9.147075343015325e-06, "loss": 0.0639, "theoretical_loss": 3.3274803911759, "tokens_seen": 3270115328 }, { "epoch": 0.98, "learning_rate": 9.106956591510874e-06, "loss": 0.0611, "theoretical_loss": 3.327470401743983, "tokens_seen": 3270246400 }, { "epoch": 0.98, "learning_rate": 9.06683784000642e-06, "loss": 0.065, "theoretical_loss": 3.3274604128245366, "tokens_seen": 3270377472 }, { "epoch": 0.98, "learning_rate": 9.026719088501965e-06, "loss": 0.0604, "theoretical_loss": 3.327450424417514, "tokens_seen": 3270508544 }, { "epoch": 0.98, "learning_rate": 8.986600336997513e-06, "loss": 0.0647, "theoretical_loss": 3.3274404365228687, "tokens_seen": 3270639616 }, { "epoch": 0.98, "learning_rate": 8.946481585493059e-06, "loss": 0.0629, "theoretical_loss": 3.3274304491405533, "tokens_seen": 3270770688 }, { "epoch": 0.98, "learning_rate": 8.906362833988607e-06, "loss": 0.0653, "theoretical_loss": 3.3274204622705215, "tokens_seen": 3270901760 }, { "epoch": 0.98, "learning_rate": 8.866244082484153e-06, "loss": 0.0644, "theoretical_loss": 3.3274104759127265, "tokens_seen": 3271032832 }, { "epoch": 0.98, "learning_rate": 8.8261253309797e-06, "loss": 0.0622, "theoretical_loss": 3.3274004900671215, "tokens_seen": 3271163904 }, { "epoch": 0.98, "learning_rate": 8.786006579475246e-06, "loss": 0.0654, "theoretical_loss": 3.3273905047336596, "tokens_seen": 3271294976 }, { "epoch": 0.98, "learning_rate": 8.745887827970794e-06, "loss": 0.0639, "theoretical_loss": 3.3273805199122934, "tokens_seen": 3271426048 }, { "epoch": 0.98, "learning_rate": 8.70576907646634e-06, "loss": 0.0658, "theoretical_loss": 3.3273705356029772, "tokens_seen": 3271557120 }, { "epoch": 0.98, "learning_rate": 8.665650324961889e-06, "loss": 0.0606, "theoretical_loss": 3.327360551805664, "tokens_seen": 3271688192 }, { "epoch": 0.98, "learning_rate": 8.625531573457435e-06, "loss": 0.0627, "theoretical_loss": 3.327350568520306, "tokens_seen": 3271819264 }, { "epoch": 0.98, "objective/train/advantage_avg": 0.0008538314723409712, "objective/train/docs_used": 1188646, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.27946937084198, "objective/train/original_loss": 1.279469609260559, "objective/train/theoretical_loss": 3.3273405857468576, "objective/train/tokens_used": 1642474976, "objective/train/value_avg": -0.00585174560546875, "objective/train/value_loss": 0.0001343698095297441, "objective/train/value_max": -3.3974647521972656e-05, "objective/train/value_min": -0.28515625, "objective/train/value_reward_corr": 0.7611813557417162, "objective/train/value_std": 0.01348876953125, "objective/train/weight_avg": 1.0009163618087769, "objective/train/weighted_lm_loss": 1.2803977727890015, "objective/train/weights_max": 1.2134668827056885, "objective/train/weights_min": 0.4454297423362732, "theoretical_loss": 3.3273405857468576, "tokens_seen": 3271950336 }, { "epoch": 0.98, "learning_rate": 8.58541282195298e-06, "loss": 0.0628, "theoretical_loss": 3.3273405857468576, "tokens_seen": 3271950336 }, { "epoch": 0.98, "learning_rate": 8.545294070448528e-06, "loss": 0.0639, "theoretical_loss": 3.327330603485272, "tokens_seen": 3272081408 }, { "epoch": 0.98, "learning_rate": 8.505175318944074e-06, "loss": 0.0624, "theoretical_loss": 3.3273206217355016, "tokens_seen": 3272212480 }, { "epoch": 0.98, "learning_rate": 8.465056567439622e-06, "loss": 0.0605, "theoretical_loss": 3.3273106404975, "tokens_seen": 3272343552 }, { "epoch": 0.98, "learning_rate": 8.424937815935168e-06, "loss": 0.0631, "theoretical_loss": 3.3273006597712214, "tokens_seen": 3272474624 }, { "epoch": 0.98, "learning_rate": 8.384819064430715e-06, "loss": 0.0672, "theoretical_loss": 3.3272906795566177, "tokens_seen": 3272605696 }, { "epoch": 0.98, "learning_rate": 8.344700312926261e-06, "loss": 0.0623, "theoretical_loss": 3.3272806998536426, "tokens_seen": 3272736768 }, { "epoch": 0.98, "learning_rate": 8.30458156142181e-06, "loss": 0.0643, "theoretical_loss": 3.3272707206622503, "tokens_seen": 3272867840 }, { "epoch": 0.98, "learning_rate": 8.264462809917356e-06, "loss": 0.0647, "theoretical_loss": 3.3272607419823927, "tokens_seen": 3272998912 }, { "epoch": 0.98, "learning_rate": 8.224344058412904e-06, "loss": 0.0564, "theoretical_loss": 3.327250763814024, "tokens_seen": 3273129984 }, { "epoch": 0.98, "learning_rate": 8.18422530690845e-06, "loss": 0.0631, "theoretical_loss": 3.3272407861570974, "tokens_seen": 3273261056 }, { "epoch": 0.98, "learning_rate": 8.144106555403995e-06, "loss": 0.0592, "theoretical_loss": 3.327230809011566, "tokens_seen": 3273392128 }, { "epoch": 0.98, "learning_rate": 8.103987803899543e-06, "loss": 0.0642, "theoretical_loss": 3.3272208323773826, "tokens_seen": 3273523200 }, { "epoch": 0.98, "learning_rate": 8.06386905239509e-06, "loss": 0.0644, "theoretical_loss": 3.327210856254502, "tokens_seen": 3273654272 }, { "epoch": 0.98, "learning_rate": 8.023750300890637e-06, "loss": 0.064, "theoretical_loss": 3.327200880642876, "tokens_seen": 3273785344 }, { "epoch": 0.98, "learning_rate": 7.983631549386184e-06, "loss": 0.0657, "theoretical_loss": 3.3271909055424587, "tokens_seen": 3273916416 }, { "epoch": 0.98, "learning_rate": 7.94351279788173e-06, "loss": 0.0609, "theoretical_loss": 3.327180930953203, "tokens_seen": 3274047488 }, { "epoch": 0.98, "learning_rate": 7.903394046377276e-06, "loss": 0.0617, "theoretical_loss": 3.327170956875063, "tokens_seen": 3274178560 }, { "epoch": 0.98, "learning_rate": 7.863275294872824e-06, "loss": 0.0627, "theoretical_loss": 3.327160983307991, "tokens_seen": 3274309632 }, { "epoch": 0.98, "learning_rate": 7.82315654336837e-06, "loss": 0.0593, "theoretical_loss": 3.3271510102519413, "tokens_seen": 3274440704 }, { "epoch": 0.98, "learning_rate": 7.783037791863917e-06, "loss": 0.0645, "theoretical_loss": 3.327141037706867, "tokens_seen": 3274571776 }, { "epoch": 0.98, "learning_rate": 7.742919040359463e-06, "loss": 0.063, "theoretical_loss": 3.327131065672721, "tokens_seen": 3274702848 }, { "epoch": 0.98, "learning_rate": 7.702800288855012e-06, "loss": 0.0656, "theoretical_loss": 3.3271210941494576, "tokens_seen": 3274833920 }, { "epoch": 0.98, "learning_rate": 7.662681537350558e-06, "loss": 0.0614, "theoretical_loss": 3.327111123137029, "tokens_seen": 3274964992 }, { "epoch": 0.98, "learning_rate": 7.622562785846105e-06, "loss": 0.0634, "theoretical_loss": 3.3271011526353895, "tokens_seen": 3275096064 }, { "epoch": 0.98, "objective/train/advantage_avg": 0.0006191369029693305, "objective/train/docs_used": 1189805, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.1741362810134888, "objective/train/original_loss": 1.1741364002227783, "objective/train/theoretical_loss": 3.327091182644492, "objective/train/tokens_used": 1645751776, "objective/train/value_avg": -0.00403594970703125, "objective/train/value_loss": 5.6109125580405816e-05, "objective/train/value_max": -4.792213439941406e-05, "objective/train/value_min": -0.215576171875, "objective/train/value_reward_corr": 0.797675698347817, "objective/train/value_std": 0.00965118408203125, "objective/train/weight_avg": 1.0006468296051025, "objective/train/weighted_lm_loss": 1.1750082969665527, "objective/train/weights_max": 1.1459459066390991, "objective/train/weights_min": 0.8323221206665039, "theoretical_loss": 3.327091182644492, "tokens_seen": 3275227136 }, { "epoch": 0.98, "learning_rate": 7.582444034341651e-06, "loss": 0.0602, "theoretical_loss": 3.327091182644492, "tokens_seen": 3275227136 }, { "epoch": 0.99, "learning_rate": 7.542325282837198e-06, "loss": 0.0614, "theoretical_loss": 3.3270812131642904, "tokens_seen": 3275358208 }, { "epoch": 0.99, "learning_rate": 7.502206531332745e-06, "loss": 0.0628, "theoretical_loss": 3.3270712441947374, "tokens_seen": 3275489280 }, { "epoch": 0.99, "learning_rate": 7.462087779828291e-06, "loss": 0.0581, "theoretical_loss": 3.3270612757357876, "tokens_seen": 3275620352 }, { "epoch": 0.99, "learning_rate": 7.421969028323839e-06, "loss": 0.0624, "theoretical_loss": 3.327051307787393, "tokens_seen": 3275751424 }, { "epoch": 0.99, "learning_rate": 7.381850276819386e-06, "loss": 0.0628, "theoretical_loss": 3.3270413403495076, "tokens_seen": 3275882496 }, { "epoch": 0.99, "learning_rate": 7.341731525314932e-06, "loss": 0.0691, "theoretical_loss": 3.327031373422085, "tokens_seen": 3276013568 }, { "epoch": 0.99, "learning_rate": 7.301612773810479e-06, "loss": 0.0664, "theoretical_loss": 3.3270214070050788, "tokens_seen": 3276144640 }, { "epoch": 0.99, "learning_rate": 7.261494022306027e-06, "loss": 0.0656, "theoretical_loss": 3.3270114410984415, "tokens_seen": 3276275712 }, { "epoch": 0.99, "learning_rate": 7.221375270801573e-06, "loss": 0.0618, "theoretical_loss": 3.327001475702128, "tokens_seen": 3276406784 }, { "epoch": 0.99, "learning_rate": 7.18125651929712e-06, "loss": 0.0591, "theoretical_loss": 3.3269915108160903, "tokens_seen": 3276537856 }, { "epoch": 0.99, "learning_rate": 7.141137767792666e-06, "loss": 0.0662, "theoretical_loss": 3.3269815464402828, "tokens_seen": 3276668928 }, { "epoch": 0.99, "learning_rate": 7.101019016288213e-06, "loss": 0.0649, "theoretical_loss": 3.3269715825746586, "tokens_seen": 3276800000 }, { "epoch": 0.99, "learning_rate": 7.06090026478376e-06, "loss": 0.0628, "theoretical_loss": 3.3269616192191718, "tokens_seen": 3276931072 }, { "epoch": 0.99, "learning_rate": 7.0207815132793065e-06, "loss": 0.0674, "theoretical_loss": 3.3269516563737747, "tokens_seen": 3277062144 }, { "epoch": 0.99, "learning_rate": 6.980662761774854e-06, "loss": 0.0625, "theoretical_loss": 3.3269416940384215, "tokens_seen": 3277193216 }, { "epoch": 0.99, "learning_rate": 6.940544010270401e-06, "loss": 0.0598, "theoretical_loss": 3.326931732213066, "tokens_seen": 3277324288 }, { "epoch": 0.99, "learning_rate": 6.900425258765947e-06, "loss": 0.0653, "theoretical_loss": 3.326921770897661, "tokens_seen": 3277455360 }, { "epoch": 0.99, "learning_rate": 6.8603065072614945e-06, "loss": 0.0643, "theoretical_loss": 3.32691181009216, "tokens_seen": 3277586432 }, { "epoch": 0.99, "learning_rate": 6.820187755757042e-06, "loss": 0.0628, "theoretical_loss": 3.326901849796517, "tokens_seen": 3277717504 }, { "epoch": 0.99, "learning_rate": 6.780069004252588e-06, "loss": 0.0624, "theoretical_loss": 3.326891890010686, "tokens_seen": 3277848576 }, { "epoch": 0.99, "learning_rate": 6.7399502527481345e-06, "loss": 0.0665, "theoretical_loss": 3.3268819307346194, "tokens_seen": 3277979648 }, { "epoch": 0.99, "learning_rate": 6.699831501243681e-06, "loss": 0.0598, "theoretical_loss": 3.326871971968271, "tokens_seen": 3278110720 }, { "epoch": 0.99, "learning_rate": 6.659712749739228e-06, "loss": 0.0626, "theoretical_loss": 3.3268620137115947, "tokens_seen": 3278241792 }, { "epoch": 0.99, "learning_rate": 6.619593998234775e-06, "loss": 0.0662, "theoretical_loss": 3.326852055964544, "tokens_seen": 3278372864 }, { "epoch": 0.99, "objective/train/advantage_avg": 0.0009127157391048968, "objective/train/docs_used": 1191058, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3104063272476196, "objective/train/original_loss": 1.3104063272476196, "objective/train/theoretical_loss": 3.326842098727072, "objective/train/tokens_used": 1649028576, "objective/train/value_avg": -0.004886627197265625, "objective/train/value_loss": 5.030036481912248e-05, "objective/train/value_max": -2.1636486053466797e-05, "objective/train/value_min": -0.2078857421875, "objective/train/value_reward_corr": 0.8053587776868172, "objective/train/value_std": 0.01006317138671875, "objective/train/weight_avg": 1.000937819480896, "objective/train/weighted_lm_loss": 1.3115791082382202, "objective/train/weights_max": 1.1710323095321655, "objective/train/weights_min": 0.8215762376785278, "theoretical_loss": 3.326842098727072, "tokens_seen": 3278503936 }, { "epoch": 0.99, "learning_rate": 6.579475246730322e-06, "loss": 0.0659, "theoretical_loss": 3.326842098727072, "tokens_seen": 3278503936 }, { "epoch": 0.99, "learning_rate": 6.539356495225869e-06, "loss": 0.0649, "theoretical_loss": 3.3268321419991325, "tokens_seen": 3278635008 }, { "epoch": 0.99, "learning_rate": 6.499237743721416e-06, "loss": 0.0646, "theoretical_loss": 3.3268221857806792, "tokens_seen": 3278766080 }, { "epoch": 0.99, "learning_rate": 6.459118992216962e-06, "loss": 0.0651, "theoretical_loss": 3.326812230071666, "tokens_seen": 3278897152 }, { "epoch": 0.99, "learning_rate": 6.41900024071251e-06, "loss": 0.0669, "theoretical_loss": 3.326802274872046, "tokens_seen": 3279028224 }, { "epoch": 0.99, "learning_rate": 6.378881489208056e-06, "loss": 0.0617, "theoretical_loss": 3.326792320181772, "tokens_seen": 3279159296 }, { "epoch": 0.99, "learning_rate": 6.338762737703602e-06, "loss": 0.0666, "theoretical_loss": 3.326782366000799, "tokens_seen": 3279290368 }, { "epoch": 0.99, "learning_rate": 6.2986439861991495e-06, "loss": 0.0647, "theoretical_loss": 3.3267724123290803, "tokens_seen": 3279421440 }, { "epoch": 0.99, "learning_rate": 6.258525234694696e-06, "loss": 0.0637, "theoretical_loss": 3.3267624591665688, "tokens_seen": 3279552512 }, { "epoch": 0.99, "learning_rate": 6.218406483190243e-06, "loss": 0.0614, "theoretical_loss": 3.3267525065132184, "tokens_seen": 3279683584 }, { "epoch": 0.99, "learning_rate": 6.17828773168579e-06, "loss": 0.0607, "theoretical_loss": 3.326742554368983, "tokens_seen": 3279814656 }, { "epoch": 0.99, "learning_rate": 6.138168980181337e-06, "loss": 0.0655, "theoretical_loss": 3.326732602733816, "tokens_seen": 3279945728 }, { "epoch": 0.99, "learning_rate": 6.098050228676884e-06, "loss": 0.0626, "theoretical_loss": 3.326722651607671, "tokens_seen": 3280076800 }, { "epoch": 0.99, "learning_rate": 6.057931477172431e-06, "loss": 0.0581, "theoretical_loss": 3.3267127009905018, "tokens_seen": 3280207872 }, { "epoch": 0.99, "learning_rate": 6.0178127256679775e-06, "loss": 0.0653, "theoretical_loss": 3.3267027508822613, "tokens_seen": 3280338944 }, { "epoch": 0.99, "learning_rate": 5.977693974163525e-06, "loss": 0.066, "theoretical_loss": 3.3266928012829045, "tokens_seen": 3280470016 }, { "epoch": 0.99, "learning_rate": 5.93757522265907e-06, "loss": 0.0652, "theoretical_loss": 3.3266828521923837, "tokens_seen": 3280601088 }, { "epoch": 0.99, "learning_rate": 5.8974564711546174e-06, "loss": 0.064, "theoretical_loss": 3.3266729036106533, "tokens_seen": 3280732160 }, { "epoch": 0.99, "learning_rate": 5.857337719650165e-06, "loss": 0.062, "theoretical_loss": 3.326662955537667, "tokens_seen": 3280863232 }, { "epoch": 0.99, "learning_rate": 5.817218968145711e-06, "loss": 0.0612, "theoretical_loss": 3.326653007973378, "tokens_seen": 3280994304 }, { "epoch": 0.99, "learning_rate": 5.777100216641258e-06, "loss": 0.0629, "theoretical_loss": 3.32664306091774, "tokens_seen": 3281125376 }, { "epoch": 0.99, "learning_rate": 5.7369814651368054e-06, "loss": 0.0661, "theoretical_loss": 3.3266331143707073, "tokens_seen": 3281256448 }, { "epoch": 0.99, "learning_rate": 5.696862713632352e-06, "loss": 0.0671, "theoretical_loss": 3.326623168332233, "tokens_seen": 3281387520 }, { "epoch": 0.99, "learning_rate": 5.656743962127899e-06, "loss": 0.062, "theoretical_loss": 3.326613222802271, "tokens_seen": 3281518592 }, { "epoch": 0.99, "learning_rate": 5.616625210623445e-06, "loss": 0.0645, "theoretical_loss": 3.326603277780775, "tokens_seen": 3281649664 }, { "epoch": 0.99, "objective/train/advantage_avg": 0.0006102763582020998, "objective/train/docs_used": 1192226, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2206430435180664, "objective/train/original_loss": 1.2206430435180664, "objective/train/theoretical_loss": 3.326593333267698, "objective/train/tokens_used": 1652305376, "objective/train/value_avg": -0.0096893310546875, "objective/train/value_loss": 0.0003871889493893832, "objective/train/value_max": -3.0219554901123047e-05, "objective/train/value_min": -0.49169921875, "objective/train/value_reward_corr": 0.7183331473464151, "objective/train/value_std": 0.0209503173828125, "objective/train/weight_avg": 1.0007809400558472, "objective/train/weighted_lm_loss": 1.220513105392456, "objective/train/weights_max": 1.3832874298095703, "objective/train/weights_min": 0.37178733944892883, "theoretical_loss": 3.326593333267698, "tokens_seen": 3281780736 }, { "epoch": 0.99, "learning_rate": 5.576506459118993e-06, "loss": 0.0683, "theoretical_loss": 3.326593333267698, "tokens_seen": 3281780736 }, { "epoch": 0.99, "learning_rate": 5.53638770761454e-06, "loss": 0.0639, "theoretical_loss": 3.3265833892629946, "tokens_seen": 3281911808 }, { "epoch": 0.99, "learning_rate": 5.496268956110085e-06, "loss": 0.0622, "theoretical_loss": 3.3265734457666185, "tokens_seen": 3282042880 }, { "epoch": 0.99, "learning_rate": 5.4561502046056325e-06, "loss": 0.0638, "theoretical_loss": 3.326563502778523, "tokens_seen": 3282173952 }, { "epoch": 0.99, "learning_rate": 5.41603145310118e-06, "loss": 0.0598, "theoretical_loss": 3.3265535602986622, "tokens_seen": 3282305024 }, { "epoch": 0.99, "learning_rate": 5.375912701596726e-06, "loss": 0.06, "theoretical_loss": 3.326543618326989, "tokens_seen": 3282436096 }, { "epoch": 0.99, "learning_rate": 5.335793950092273e-06, "loss": 0.0601, "theoretical_loss": 3.3265336768634586, "tokens_seen": 3282567168 }, { "epoch": 0.99, "learning_rate": 5.2956751985878205e-06, "loss": 0.0635, "theoretical_loss": 3.3265237359080233, "tokens_seen": 3282698240 }, { "epoch": 0.99, "learning_rate": 5.255556447083367e-06, "loss": 0.0585, "theoretical_loss": 3.3265137954606376, "tokens_seen": 3282829312 }, { "epoch": 0.99, "learning_rate": 5.215437695578914e-06, "loss": 0.0644, "theoretical_loss": 3.326503855521255, "tokens_seen": 3282960384 }, { "epoch": 0.99, "learning_rate": 5.1753189440744605e-06, "loss": 0.0638, "theoretical_loss": 3.3264939160898295, "tokens_seen": 3283091456 }, { "epoch": 0.99, "learning_rate": 5.135200192570008e-06, "loss": 0.0686, "theoretical_loss": 3.3264839771663146, "tokens_seen": 3283222528 }, { "epoch": 0.99, "learning_rate": 5.095081441065554e-06, "loss": 0.0618, "theoretical_loss": 3.326474038750664, "tokens_seen": 3283353600 }, { "epoch": 0.99, "learning_rate": 5.0549626895611e-06, "loss": 0.0622, "theoretical_loss": 3.326464100842832, "tokens_seen": 3283484672 }, { "epoch": 0.99, "learning_rate": 5.014843938056648e-06, "loss": 0.0631, "theoretical_loss": 3.3264541634427713, "tokens_seen": 3283615744 }, { "epoch": 0.99, "learning_rate": 4.974725186552195e-06, "loss": 0.0642, "theoretical_loss": 3.3264442265504375, "tokens_seen": 3283746816 }, { "epoch": 0.99, "learning_rate": 4.934606435047741e-06, "loss": 0.061, "theoretical_loss": 3.3264342901657824, "tokens_seen": 3283877888 }, { "epoch": 0.99, "learning_rate": 4.894487683543288e-06, "loss": 0.0628, "theoretical_loss": 3.3264243542887613, "tokens_seen": 3284008960 }, { "epoch": 0.99, "learning_rate": 4.854368932038835e-06, "loss": 0.0639, "theoretical_loss": 3.326414418919327, "tokens_seen": 3284140032 }, { "epoch": 0.99, "learning_rate": 4.814250180534382e-06, "loss": 0.063, "theoretical_loss": 3.326404484057434, "tokens_seen": 3284271104 }, { "epoch": 0.99, "learning_rate": 4.774131429029929e-06, "loss": 0.0637, "theoretical_loss": 3.3263945497030356, "tokens_seen": 3284402176 }, { "epoch": 0.99, "learning_rate": 4.7340126775254756e-06, "loss": 0.0645, "theoretical_loss": 3.326384615856086, "tokens_seen": 3284533248 }, { "epoch": 0.99, "learning_rate": 4.693893926021022e-06, "loss": 0.0676, "theoretical_loss": 3.326374682516539, "tokens_seen": 3284664320 }, { "epoch": 0.99, "learning_rate": 4.653775174516569e-06, "loss": 0.0643, "theoretical_loss": 3.3263647496843483, "tokens_seen": 3284795392 }, { "epoch": 0.99, "learning_rate": 4.6136564230121155e-06, "loss": 0.0669, "theoretical_loss": 3.3263548173594675, "tokens_seen": 3284926464 }, { "epoch": 0.99, "objective/train/advantage_avg": 0.0005359156057238579, "objective/train/docs_used": 1193279, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2919590473175049, "objective/train/original_loss": 1.2919588088989258, "objective/train/theoretical_loss": 3.326344885541851, "objective/train/tokens_used": 1655582176, "objective/train/value_avg": -0.0065460205078125, "objective/train/value_loss": 0.00020337516616564244, "objective/train/value_max": -2.467632293701172e-05, "objective/train/value_min": -0.310791015625, "objective/train/value_reward_corr": 0.768606695950726, "objective/train/value_std": 0.01708984375, "objective/train/weight_avg": 1.0006251335144043, "objective/train/weighted_lm_loss": 1.2926040887832642, "objective/train/weights_max": 1.130799412727356, "objective/train/weights_min": 0.36851292848587036, "theoretical_loss": 3.326344885541851, "tokens_seen": 3285057536 }, { "epoch": 0.99, "learning_rate": 4.573537671507663e-06, "loss": 0.0632, "theoretical_loss": 3.326344885541851, "tokens_seen": 3285057536 }, { "epoch": 0.99, "learning_rate": 4.53341892000321e-06, "loss": 0.059, "theoretical_loss": 3.3263349542314526, "tokens_seen": 3285188608 }, { "epoch": 0.99, "learning_rate": 4.493300168498756e-06, "loss": 0.0627, "theoretical_loss": 3.3263250234282253, "tokens_seen": 3285319680 }, { "epoch": 0.99, "learning_rate": 4.4531814169943035e-06, "loss": 0.0631, "theoretical_loss": 3.3263150931321244, "tokens_seen": 3285450752 }, { "epoch": 0.99, "learning_rate": 4.41306266548985e-06, "loss": 0.0632, "theoretical_loss": 3.3263051633431027, "tokens_seen": 3285581824 }, { "epoch": 0.99, "learning_rate": 4.372943913985397e-06, "loss": 0.0633, "theoretical_loss": 3.326295234061114, "tokens_seen": 3285712896 }, { "epoch": 0.99, "learning_rate": 4.332825162480944e-06, "loss": 0.0635, "theoretical_loss": 3.326285305286113, "tokens_seen": 3285843968 }, { "epoch": 0.99, "learning_rate": 4.29270641097649e-06, "loss": 0.0633, "theoretical_loss": 3.326275377018053, "tokens_seen": 3285975040 }, { "epoch": 0.99, "learning_rate": 4.252587659472037e-06, "loss": 0.0617, "theoretical_loss": 3.326265449256888, "tokens_seen": 3286106112 }, { "epoch": 0.99, "learning_rate": 4.212468907967584e-06, "loss": 0.0634, "theoretical_loss": 3.326255522002572, "tokens_seen": 3286237184 }, { "epoch": 0.99, "learning_rate": 4.172350156463131e-06, "loss": 0.0622, "theoretical_loss": 3.3262455952550587, "tokens_seen": 3286368256 }, { "epoch": 0.99, "learning_rate": 4.132231404958678e-06, "loss": 0.0597, "theoretical_loss": 3.3262356690143022, "tokens_seen": 3286499328 }, { "epoch": 0.99, "learning_rate": 4.092112653454225e-06, "loss": 0.0622, "theoretical_loss": 3.326225743280257, "tokens_seen": 3286630400 }, { "epoch": 0.99, "learning_rate": 4.051993901949771e-06, "loss": 0.0611, "theoretical_loss": 3.3262158180528756, "tokens_seen": 3286761472 }, { "epoch": 0.99, "learning_rate": 4.011875150445319e-06, "loss": 0.0655, "theoretical_loss": 3.326205893332113, "tokens_seen": 3286892544 }, { "epoch": 0.99, "learning_rate": 3.971756398940865e-06, "loss": 0.0616, "theoretical_loss": 3.326195969117923, "tokens_seen": 3287023616 }, { "epoch": 0.99, "learning_rate": 3.931637647436412e-06, "loss": 0.0608, "theoretical_loss": 3.3261860454102594, "tokens_seen": 3287154688 }, { "epoch": 0.99, "learning_rate": 3.8915188959319586e-06, "loss": 0.0628, "theoretical_loss": 3.326176122209076, "tokens_seen": 3287285760 }, { "epoch": 0.99, "learning_rate": 3.851400144427506e-06, "loss": 0.0658, "theoretical_loss": 3.3261661995143275, "tokens_seen": 3287416832 }, { "epoch": 0.99, "learning_rate": 3.8112813929230526e-06, "loss": 0.0631, "theoretical_loss": 3.3261562773259667, "tokens_seen": 3287547904 }, { "epoch": 0.99, "learning_rate": 3.771162641418599e-06, "loss": 0.0594, "theoretical_loss": 3.3261463556439486, "tokens_seen": 3287678976 }, { "epoch": 0.99, "learning_rate": 3.7310438899141457e-06, "loss": 0.0606, "theoretical_loss": 3.3261364344682267, "tokens_seen": 3287810048 }, { "epoch": 0.99, "learning_rate": 3.690925138409693e-06, "loss": 0.0592, "theoretical_loss": 3.3261265137987546, "tokens_seen": 3287941120 }, { "epoch": 0.99, "learning_rate": 3.6508063869052397e-06, "loss": 0.0626, "theoretical_loss": 3.3261165936354873, "tokens_seen": 3288072192 }, { "epoch": 0.99, "learning_rate": 3.6106876354007865e-06, "loss": 0.0635, "theoretical_loss": 3.326106673978378, "tokens_seen": 3288203264 }, { "debugging/Compilability": 1.0, "debugging/distinct-1-grams": 0.7749536012041982, "debugging/entropy-1-grams": 4.976317199024697, "debugging/length": 425.42857142857144, "debugging/num_segments": 7, "debugging/raw_token_scores_avg": 0.009957980364561081, "debugging/raw_token_scores_std": 0.02842884324491024, "debugging/score": 0.008477571128683236, "debugging/score_std": 0.011697317683467299, "epoch": 0.99, "objective/train/advantage_avg": -0.0003846609324682504, "objective/train/docs_used": 1194491, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.3025975227355957, "objective/train/original_loss": 1.3025972843170166, "objective/train/theoretical_loss": 3.3260967548273808, "objective/train/tokens_used": 1658858976, "objective/train/value_avg": -0.0095672607421875, "objective/train/value_loss": 0.0002692395937629044, "objective/train/value_max": -2.4318695068359375e-05, "objective/train/value_min": -0.9033203125, "objective/train/value_reward_corr": 0.8163688121660296, "objective/train/value_std": 0.0225372314453125, "objective/train/weight_avg": 0.9997378587722778, "objective/train/weighted_lm_loss": 1.3015943765640259, "objective/train/weights_max": 1.2044645547866821, "objective/train/weights_min": 0.38554611802101135, "theoretical_loss": 3.3260967548273808, "tokens_seen": 3288334336 }, { "epoch": 0.99, "learning_rate": 3.570568883896333e-06, "loss": 0.0605, "theoretical_loss": 3.3260967548273808, "tokens_seen": 3288334336 }, { "epoch": 0.99, "learning_rate": 3.53045013239188e-06, "loss": 0.0661, "theoretical_loss": 3.3260868361824496, "tokens_seen": 3288465408 }, { "epoch": 0.99, "learning_rate": 3.490331380887427e-06, "loss": 0.0613, "theoretical_loss": 3.326076918043539, "tokens_seen": 3288596480 }, { "epoch": 0.99, "learning_rate": 3.4502126293829736e-06, "loss": 0.0614, "theoretical_loss": 3.3260670004106028, "tokens_seen": 3288727552 }, { "epoch": 0.99, "learning_rate": 3.410093877878521e-06, "loss": 0.0619, "theoretical_loss": 3.3260570832835943, "tokens_seen": 3288858624 }, { "epoch": 0.99, "learning_rate": 3.3699751263740672e-06, "loss": 0.0642, "theoretical_loss": 3.3260471666624687, "tokens_seen": 3288989696 }, { "epoch": 0.99, "learning_rate": 3.329856374869614e-06, "loss": 0.0613, "theoretical_loss": 3.326037250547179, "tokens_seen": 3289120768 }, { "epoch": 0.99, "learning_rate": 3.289737623365161e-06, "loss": 0.062, "theoretical_loss": 3.32602733493768, "tokens_seen": 3289251840 }, { "epoch": 0.99, "learning_rate": 3.249618871860708e-06, "loss": 0.0648, "theoretical_loss": 3.326017419833925, "tokens_seen": 3289382912 }, { "epoch": 0.99, "learning_rate": 3.209500120356255e-06, "loss": 0.0617, "theoretical_loss": 3.326007505235869, "tokens_seen": 3289513984 }, { "epoch": 0.99, "learning_rate": 3.169381368851801e-06, "loss": 0.0615, "theoretical_loss": 3.3259975911434654, "tokens_seen": 3289645056 }, { "epoch": 0.99, "learning_rate": 3.129262617347348e-06, "loss": 0.0608, "theoretical_loss": 3.325987677556668, "tokens_seen": 3289776128 }, { "epoch": 0.99, "learning_rate": 3.089143865842895e-06, "loss": 0.0612, "theoretical_loss": 3.3259777644754323, "tokens_seen": 3289907200 }, { "epoch": 0.99, "learning_rate": 3.049025114338442e-06, "loss": 0.0627, "theoretical_loss": 3.3259678518997102, "tokens_seen": 3290038272 }, { "epoch": 0.99, "learning_rate": 3.0089063628339887e-06, "loss": 0.0654, "theoretical_loss": 3.3259579398294576, "tokens_seen": 3290169344 }, { "epoch": 0.99, "learning_rate": 2.968787611329535e-06, "loss": 0.0634, "theoretical_loss": 3.325948028264628, "tokens_seen": 3290300416 }, { "epoch": 0.99, "learning_rate": 2.9286688598250823e-06, "loss": 0.0641, "theoretical_loss": 3.325938117205175, "tokens_seen": 3290431488 }, { "epoch": 0.99, "learning_rate": 2.888550108320629e-06, "loss": 0.0615, "theoretical_loss": 3.3259282066510534, "tokens_seen": 3290562560 }, { "epoch": 0.99, "learning_rate": 2.848431356816176e-06, "loss": 0.0651, "theoretical_loss": 3.3259182966022167, "tokens_seen": 3290693632 }, { "epoch": 0.99, "learning_rate": 2.8083126053117227e-06, "loss": 0.0621, "theoretical_loss": 3.32590838705862, "tokens_seen": 3290824704 }, { "epoch": 0.99, "learning_rate": 2.76819385380727e-06, "loss": 0.0647, "theoretical_loss": 3.3258984780202163, "tokens_seen": 3290955776 }, { "epoch": 0.99, "learning_rate": 2.7280751023028163e-06, "loss": 0.061, "theoretical_loss": 3.32588856948696, "tokens_seen": 3291086848 }, { "epoch": 0.99, "learning_rate": 2.687956350798363e-06, "loss": 0.0633, "theoretical_loss": 3.325878661458806, "tokens_seen": 3291217920 }, { "epoch": 0.99, "learning_rate": 2.6478375992939103e-06, "loss": 0.0638, "theoretical_loss": 3.3258687539357075, "tokens_seen": 3291348992 }, { "epoch": 0.99, "learning_rate": 2.607718847789457e-06, "loss": 0.063, "theoretical_loss": 3.325858846917619, "tokens_seen": 3291480064 }, { "epoch": 0.99, "objective/train/advantage_avg": 0.0006125391228124499, "objective/train/docs_used": 1195662, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.1160727739334106, "objective/train/original_loss": 1.116072654724121, "objective/train/theoretical_loss": 3.325848940404495, "objective/train/tokens_used": 1662135776, "objective/train/value_avg": -0.00574493408203125, "objective/train/value_loss": 0.00015230326971504837, "objective/train/value_max": -2.956390380859375e-05, "objective/train/value_min": -0.9951171875, "objective/train/value_reward_corr": 0.698867631201437, "objective/train/value_std": 0.014129638671875, "objective/train/weight_avg": 1.0006864070892334, "objective/train/weighted_lm_loss": 1.1161766052246094, "objective/train/weights_max": 2.102025032043457, "objective/train/weights_min": 0.3759489059448242, "theoretical_loss": 3.325848940404495, "tokens_seen": 3291611136 }, { "epoch": 0.99, "learning_rate": 2.567600096285004e-06, "loss": 0.0622, "theoretical_loss": 3.325848940404495, "tokens_seen": 3291611136 }, { "epoch": 0.99, "learning_rate": 2.52748134478055e-06, "loss": 0.0644, "theoretical_loss": 3.3258390343962887, "tokens_seen": 3291742208 }, { "epoch": 1.0, "learning_rate": 2.4873625932760974e-06, "loss": 0.0654, "theoretical_loss": 3.3258291288929556, "tokens_seen": 3291873280 }, { "epoch": 1.0, "learning_rate": 2.447243841771644e-06, "loss": 0.0613, "theoretical_loss": 3.3258192238944484, "tokens_seen": 3292004352 }, { "epoch": 1.0, "learning_rate": 2.407125090267191e-06, "loss": 0.0656, "theoretical_loss": 3.3258093194007223, "tokens_seen": 3292135424 }, { "epoch": 1.0, "learning_rate": 2.3670063387627378e-06, "loss": 0.0649, "theoretical_loss": 3.3257994154117316, "tokens_seen": 3292266496 }, { "epoch": 1.0, "learning_rate": 2.3268875872582846e-06, "loss": 0.0619, "theoretical_loss": 3.3257895119274297, "tokens_seen": 3292397568 }, { "epoch": 1.0, "learning_rate": 2.2867688357538314e-06, "loss": 0.0635, "theoretical_loss": 3.325779608947771, "tokens_seen": 3292528640 }, { "epoch": 1.0, "learning_rate": 2.246650084249378e-06, "loss": 0.0629, "theoretical_loss": 3.3257697064727103, "tokens_seen": 3292659712 }, { "epoch": 1.0, "learning_rate": 2.206531332744925e-06, "loss": 0.0619, "theoretical_loss": 3.3257598045022014, "tokens_seen": 3292790784 }, { "epoch": 1.0, "learning_rate": 2.166412581240472e-06, "loss": 0.0599, "theoretical_loss": 3.3257499030361983, "tokens_seen": 3292921856 }, { "epoch": 1.0, "learning_rate": 2.1262938297360185e-06, "loss": 0.0623, "theoretical_loss": 3.3257400020746553, "tokens_seen": 3293052928 }, { "epoch": 1.0, "learning_rate": 2.0861750782315653e-06, "loss": 0.0656, "theoretical_loss": 3.325730101617527, "tokens_seen": 3293184000 }, { "epoch": 1.0, "learning_rate": 2.0460563267271125e-06, "loss": 0.0619, "theoretical_loss": 3.325720201664767, "tokens_seen": 3293315072 }, { "epoch": 1.0, "learning_rate": 2.0059375752226593e-06, "loss": 0.0611, "theoretical_loss": 3.32571030221633, "tokens_seen": 3293446144 }, { "epoch": 1.0, "learning_rate": 1.965818823718206e-06, "loss": 0.0636, "theoretical_loss": 3.32570040327217, "tokens_seen": 3293577216 }, { "epoch": 1.0, "learning_rate": 1.925700072213753e-06, "loss": 0.0606, "theoretical_loss": 3.3256905048322416, "tokens_seen": 3293708288 }, { "epoch": 1.0, "learning_rate": 1.8855813207092995e-06, "loss": 0.0666, "theoretical_loss": 3.325680606896499, "tokens_seen": 3293839360 }, { "epoch": 1.0, "learning_rate": 1.8454625692048465e-06, "loss": 0.0633, "theoretical_loss": 3.325670709464896, "tokens_seen": 3293970432 }, { "epoch": 1.0, "learning_rate": 1.8053438177003932e-06, "loss": 0.059, "theoretical_loss": 3.325660812537387, "tokens_seen": 3294101504 }, { "epoch": 1.0, "learning_rate": 1.76522506619594e-06, "loss": 0.0631, "theoretical_loss": 3.3256509161139265, "tokens_seen": 3294232576 }, { "epoch": 1.0, "learning_rate": 1.7251063146914868e-06, "loss": 0.0657, "theoretical_loss": 3.3256410201944693, "tokens_seen": 3294363648 }, { "epoch": 1.0, "learning_rate": 1.6849875631870336e-06, "loss": 0.0623, "theoretical_loss": 3.325631124778968, "tokens_seen": 3294494720 }, { "epoch": 1.0, "learning_rate": 1.6448688116825804e-06, "loss": 0.0655, "theoretical_loss": 3.325621229867379, "tokens_seen": 3294625792 }, { "epoch": 1.0, "learning_rate": 1.6047500601781274e-06, "loss": 0.0613, "theoretical_loss": 3.3256113354596546, "tokens_seen": 3294756864 }, { "epoch": 1.0, "objective/train/advantage_avg": 0.0005046845762990415, "objective/train/docs_used": 1196765, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.1708943843841553, "objective/train/original_loss": 1.1708942651748657, "objective/train/theoretical_loss": 3.3256014415557504, "objective/train/tokens_used": 1665412576, "objective/train/value_avg": -0.006610870361328125, "objective/train/value_loss": 0.0002913080679718405, "objective/train/value_max": -4.297494888305664e-05, "objective/train/value_min": -0.66552734375, "objective/train/value_reward_corr": 0.676859747643346, "objective/train/value_std": 0.0159759521484375, "objective/train/weight_avg": 1.0006282329559326, "objective/train/weighted_lm_loss": 1.1712182760238647, "objective/train/weights_max": 1.9172251224517822, "objective/train/weights_min": 0.2500159740447998, "theoretical_loss": 3.3256014415557504, "tokens_seen": 3294887936 }, { "epoch": 1.0, "learning_rate": 1.564631308673674e-06, "loss": 0.0625, "theoretical_loss": 3.3256014415557504, "tokens_seen": 3294887936 }, { "epoch": 1.0, "learning_rate": 1.524512557169221e-06, "loss": 0.0652, "theoretical_loss": 3.3255915481556206, "tokens_seen": 3295019008 }, { "epoch": 1.0, "learning_rate": 1.4843938056647676e-06, "loss": 0.0609, "theoretical_loss": 3.325581655259219, "tokens_seen": 3295150080 }, { "epoch": 1.0, "learning_rate": 1.4442750541603146e-06, "loss": 0.0604, "theoretical_loss": 3.3255717628665002, "tokens_seen": 3295281152 }, { "epoch": 1.0, "learning_rate": 1.4041563026558613e-06, "loss": 0.0576, "theoretical_loss": 3.3255618709774186, "tokens_seen": 3295412224 }, { "epoch": 1.0, "learning_rate": 1.3640375511514081e-06, "loss": 0.0625, "theoretical_loss": 3.3255519795919284, "tokens_seen": 3295543296 }, { "epoch": 1.0, "learning_rate": 1.3239187996469551e-06, "loss": 0.0669, "theoretical_loss": 3.3255420887099842, "tokens_seen": 3295674368 }, { "epoch": 1.0, "learning_rate": 1.283800048142502e-06, "loss": 0.0642, "theoretical_loss": 3.3255321983315396, "tokens_seen": 3295805440 }, { "epoch": 1.0, "learning_rate": 1.2436812966380487e-06, "loss": 0.0608, "theoretical_loss": 3.32552230845655, "tokens_seen": 3295936512 }, { "epoch": 1.0, "learning_rate": 1.2035625451335955e-06, "loss": 0.0655, "theoretical_loss": 3.3255124190849688, "tokens_seen": 3296067584 }, { "epoch": 1.0, "learning_rate": 1.1634437936291423e-06, "loss": 0.0618, "theoretical_loss": 3.3255025302167507, "tokens_seen": 3296198656 }, { "epoch": 1.0, "learning_rate": 1.123325042124689e-06, "loss": 0.0639, "theoretical_loss": 3.3254926418518505, "tokens_seen": 3296329728 }, { "epoch": 1.0, "learning_rate": 1.083206290620236e-06, "loss": 0.0619, "theoretical_loss": 3.3254827539902223, "tokens_seen": 3296460800 }, { "epoch": 1.0, "learning_rate": 1.0430875391157827e-06, "loss": 0.0628, "theoretical_loss": 3.32547286663182, "tokens_seen": 3296591872 }, { "epoch": 1.0, "learning_rate": 1.0029687876113297e-06, "loss": 0.0659, "theoretical_loss": 3.3254629797765984, "tokens_seen": 3296722944 }, { "epoch": 1.0, "learning_rate": 9.628500361068764e-07, "loss": 0.0635, "theoretical_loss": 3.3254530934245117, "tokens_seen": 3296854016 }, { "epoch": 1.0, "learning_rate": 9.227312846024232e-07, "loss": 0.0628, "theoretical_loss": 3.325443207575515, "tokens_seen": 3296985088 }, { "epoch": 1.0, "learning_rate": 8.8261253309797e-07, "loss": 0.0595, "theoretical_loss": 3.3254333222295616, "tokens_seen": 3297116160 }, { "epoch": 1.0, "learning_rate": 8.424937815935168e-07, "loss": 0.063, "theoretical_loss": 3.3254234373866067, "tokens_seen": 3297247232 }, { "epoch": 1.0, "learning_rate": 8.023750300890637e-07, "loss": 0.0643, "theoretical_loss": 3.3254135530466042, "tokens_seen": 3297378304 }, { "epoch": 1.0, "learning_rate": 7.622562785846105e-07, "loss": 0.0643, "theoretical_loss": 3.325403669209509, "tokens_seen": 3297509376 }, { "epoch": 1.0, "learning_rate": 7.221375270801573e-07, "loss": 0.0628, "theoretical_loss": 3.325393785875275, "tokens_seen": 3297640448 }, { "epoch": 1.0, "learning_rate": 6.820187755757041e-07, "loss": 0.0631, "theoretical_loss": 3.3253839030438574, "tokens_seen": 3297771520 }, { "epoch": 1.0, "learning_rate": 6.41900024071251e-07, "loss": 0.0692, "theoretical_loss": 3.32537402071521, "tokens_seen": 3297902592 }, { "epoch": 1.0, "learning_rate": 6.017812725667977e-07, "loss": 0.0684, "theoretical_loss": 3.325364138889287, "tokens_seen": 3298033664 }, { "epoch": 1.0, "objective/train/advantage_avg": -0.00031999059137888253, "objective/train/docs_used": 1197928, "objective/train/instantaneous_batch_size": 32, "objective/train/instantaneous_microbatch_size": 32768, "objective/train/lm_loss": 1.2385108470916748, "objective/train/original_loss": 1.2385106086730957, "objective/train/theoretical_loss": 3.325354257566044, "objective/train/tokens_used": 1668689376, "objective/train/value_avg": -0.00969696044921875, "objective/train/value_loss": 0.00017712591215968132, "objective/train/value_max": -6.866455078125e-05, "objective/train/value_min": -0.402099609375, "objective/train/value_reward_corr": 0.803785432217599, "objective/train/value_std": 0.01806640625, "objective/train/weight_avg": 0.9997603893280029, "objective/train/weighted_lm_loss": 1.2381342649459839, "objective/train/weights_max": 1.1496511697769165, "objective/train/weights_min": 0.3705187439918518, "theoretical_loss": 3.325354257566044, "tokens_seen": 3298164736 }, { "epoch": 1.0, "learning_rate": 5.616625210623445e-07, "loss": 0.0662, "theoretical_loss": 3.325354257566044, "tokens_seen": 3298164736 }, { "epoch": 1.0, "learning_rate": 5.215437695578913e-07, "loss": 0.0629, "theoretical_loss": 3.3253443767454343, "tokens_seen": 3298295808 }, { "epoch": 1.0, "learning_rate": 4.814250180534382e-07, "loss": 0.0629, "theoretical_loss": 3.3253344964274127, "tokens_seen": 3298426880 }, { "epoch": 1.0, "learning_rate": 4.41306266548985e-07, "loss": 0.0637, "theoretical_loss": 3.325324616611934, "tokens_seen": 3298557952 }, { "epoch": 1.0, "learning_rate": 4.0118751504453185e-07, "loss": 0.0648, "theoretical_loss": 3.325314737298952, "tokens_seen": 3298689024 }, { "epoch": 1.0, "learning_rate": 3.6106876354007864e-07, "loss": 0.0596, "theoretical_loss": 3.325304858488422, "tokens_seen": 3298820096 }, { "epoch": 1.0, "learning_rate": 3.209500120356255e-07, "loss": 0.0626, "theoretical_loss": 3.3252949801802982, "tokens_seen": 3298951168 }, { "epoch": 1.0, "learning_rate": 2.8083126053117227e-07, "loss": 0.0619, "theoretical_loss": 3.3252851023745347, "tokens_seen": 3299082240 }, { "epoch": 1.0, "learning_rate": 2.407125090267191e-07, "loss": 0.0634, "theoretical_loss": 3.325275225071086, "tokens_seen": 3299213312 }, { "epoch": 1.0, "learning_rate": 2.0059375752226593e-07, "loss": 0.0647, "theoretical_loss": 3.325265348269907, "tokens_seen": 3299344384 }, { "epoch": 1.0, "learning_rate": 1.6047500601781274e-07, "loss": 0.0625, "theoretical_loss": 3.3252554719709524, "tokens_seen": 3299475456 }, { "epoch": 1.0, "learning_rate": 1.2035625451335956e-07, "loss": 0.0654, "theoretical_loss": 3.325245596174176, "tokens_seen": 3299606528 }, { "epoch": 1.0, "learning_rate": 8.023750300890637e-08, "loss": 0.0662, "theoretical_loss": 3.3252357208795327, "tokens_seen": 3299737600 }, { "epoch": 1.0, "learning_rate": 4.0118751504453185e-08, "loss": 0.0596, "theoretical_loss": 3.325225846086977, "tokens_seen": 3299868672 } ], "max_steps": 12589, "num_train_epochs": 9223372036854775807, "total_flos": 8.420284921943163e+17, "trial_name": null, "trial_params": null }