{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 100, "global_step": 9375, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5.33049040511727e-10, "logits/generated": 2.4733197689056396, "logits/real": 2.993959903717041, "logps/generated": -744.6532592773438, "logps/real": -445.47125244140625, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 5.3304904051172705e-09, "logits/generated": 2.914949655532837, "logits/real": 2.1627933979034424, "logps/generated": -786.878662109375, "logps/real": -367.7186279296875, "loss": 0.6972, "rewards/accuracies": 0.4166666567325592, "rewards/generated": -0.0017182661686092615, "rewards/margins": 0.0003558894095476717, "rewards/real": -0.0013623759150505066, "step": 10 }, { "epoch": 0.01, "learning_rate": 1.0660980810234541e-08, "logits/generated": 3.6670937538146973, "logits/real": 2.493891477584839, "logps/generated": -754.4434814453125, "logps/real": -391.56927490234375, "loss": 0.6253, "rewards/accuracies": 0.7875000238418579, "rewards/generated": -0.2193388193845749, "rewards/margins": 0.19206231832504272, "rewards/real": -0.027276504784822464, "step": 20 }, { "epoch": 0.01, "learning_rate": 1.5991471215351812e-08, "logits/generated": 3.352174758911133, "logits/real": 2.6071481704711914, "logps/generated": -768.8364868164062, "logps/real": -397.295166015625, "loss": 0.4816, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -0.741288959980011, "rewards/margins": 0.6227419972419739, "rewards/real": -0.11854700744152069, "step": 30 }, { "epoch": 0.01, "learning_rate": 2.1321961620469082e-08, "logits/generated": 3.278261184692383, "logits/real": 1.9227397441864014, "logps/generated": -747.123291015625, "logps/real": -358.121337890625, "loss": 0.3565, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -1.4226820468902588, "rewards/margins": 1.2152725458145142, "rewards/real": -0.2074095755815506, "step": 40 }, { "epoch": 0.02, "learning_rate": 2.6652452025586352e-08, "logits/generated": 3.479980945587158, "logits/real": 2.228393316268921, "logps/generated": -776.4798583984375, "logps/real": -430.91351318359375, "loss": 0.2816, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.098968744277954, "rewards/margins": 1.7570228576660156, "rewards/real": -0.3419457674026489, "step": 50 }, { "epoch": 0.02, "learning_rate": 3.1982942430703625e-08, "logits/generated": 3.5384933948516846, "logits/real": 2.410613775253296, "logps/generated": -767.2821044921875, "logps/real": -386.021240234375, "loss": 0.2216, "rewards/accuracies": 1.0, "rewards/generated": -2.6462717056274414, "rewards/margins": 2.2934679985046387, "rewards/real": -0.3528037369251251, "step": 60 }, { "epoch": 0.02, "learning_rate": 3.731343283582089e-08, "logits/generated": 3.4870619773864746, "logits/real": 2.468822956085205, "logps/generated": -744.57373046875, "logps/real": -386.4364318847656, "loss": 0.193, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.9016287326812744, "rewards/margins": 2.517054557800293, "rewards/real": -0.3845742642879486, "step": 70 }, { "epoch": 0.03, "learning_rate": 4.2643923240938164e-08, "logits/generated": 3.9800186157226562, "logits/real": 2.0814194679260254, "logps/generated": -790.8582153320312, "logps/real": -453.763916015625, "loss": 0.1313, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.697575330734253, "rewards/margins": 3.1558220386505127, "rewards/real": -0.5417531728744507, "step": 80 }, { "epoch": 0.03, "learning_rate": 4.7974413646055434e-08, "logits/generated": 3.3658294677734375, "logits/real": 2.511828899383545, "logps/generated": -795.5068969726562, "logps/real": -413.8627014160156, "loss": 0.1019, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -4.45084810256958, "rewards/margins": 3.9712982177734375, "rewards/real": -0.4795497953891754, "step": 90 }, { "epoch": 0.03, "learning_rate": 5.3304904051172704e-08, "logits/generated": 3.3936619758605957, "logits/real": 2.2098138332366943, "logps/generated": -770.1859130859375, "logps/real": -419.8675842285156, "loss": 0.1028, "rewards/accuracies": 0.987500011920929, "rewards/generated": -4.769425868988037, "rewards/margins": 4.332492351531982, "rewards/real": -0.4369334578514099, "step": 100 }, { "epoch": 0.04, "learning_rate": 5.8635394456289973e-08, "logits/generated": 3.610246181488037, "logits/real": 2.5977251529693604, "logps/generated": -854.7119140625, "logps/real": -420.6315002441406, "loss": 0.0747, "rewards/accuracies": 1.0, "rewards/generated": -6.063816547393799, "rewards/margins": 5.467397689819336, "rewards/real": -0.5964194536209106, "step": 110 }, { "epoch": 0.04, "learning_rate": 6.396588486140725e-08, "logits/generated": 4.131409645080566, "logits/real": 2.2879414558410645, "logps/generated": -803.0030517578125, "logps/real": -444.0707092285156, "loss": 0.064, "rewards/accuracies": 1.0, "rewards/generated": -5.934601783752441, "rewards/margins": 5.36789608001709, "rewards/real": -0.5667055249214172, "step": 120 }, { "epoch": 0.04, "learning_rate": 6.929637526652451e-08, "logits/generated": 3.729031801223755, "logits/real": 2.4918339252471924, "logps/generated": -749.9617919921875, "logps/real": -405.51239013671875, "loss": 0.0617, "rewards/accuracies": 1.0, "rewards/generated": -5.877609729766846, "rewards/margins": 5.392262935638428, "rewards/real": -0.485347181558609, "step": 130 }, { "epoch": 0.04, "learning_rate": 7.462686567164178e-08, "logits/generated": 3.4843528270721436, "logits/real": 2.5223817825317383, "logps/generated": -827.0458984375, "logps/real": -403.2733154296875, "loss": 0.0419, "rewards/accuracies": 1.0, "rewards/generated": -7.424189567565918, "rewards/margins": 6.950144290924072, "rewards/real": -0.47404488921165466, "step": 140 }, { "epoch": 0.05, "learning_rate": 7.995735607675907e-08, "logits/generated": 3.8641517162323, "logits/real": 2.4353888034820557, "logps/generated": -832.6451416015625, "logps/real": -397.03326416015625, "loss": 0.0387, "rewards/accuracies": 1.0, "rewards/generated": -8.154227256774902, "rewards/margins": 7.542234897613525, "rewards/real": -0.6119927167892456, "step": 150 }, { "epoch": 0.05, "learning_rate": 8.528784648187633e-08, "logits/generated": 3.9688518047332764, "logits/real": 2.541832685470581, "logps/generated": -867.1422119140625, "logps/real": -443.7312927246094, "loss": 0.0344, "rewards/accuracies": 1.0, "rewards/generated": -8.62398624420166, "rewards/margins": 7.869688987731934, "rewards/real": -0.7542966604232788, "step": 160 }, { "epoch": 0.05, "learning_rate": 9.061833688699359e-08, "logits/generated": 3.883540630340576, "logits/real": 2.688405752182007, "logps/generated": -836.8126220703125, "logps/real": -403.43658447265625, "loss": 0.0302, "rewards/accuracies": 1.0, "rewards/generated": -8.594613075256348, "rewards/margins": 7.828234672546387, "rewards/real": -0.7663780450820923, "step": 170 }, { "epoch": 0.06, "learning_rate": 9.594882729211087e-08, "logits/generated": 4.8235650062561035, "logits/real": 2.9211935997009277, "logps/generated": -886.8326416015625, "logps/real": -413.6666564941406, "loss": 0.0265, "rewards/accuracies": 1.0, "rewards/generated": -9.72240924835205, "rewards/margins": 8.859731674194336, "rewards/real": -0.8626779317855835, "step": 180 }, { "epoch": 0.06, "learning_rate": 1.0127931769722814e-07, "logits/generated": 4.334928035736084, "logits/real": 3.248250961303711, "logps/generated": -856.8572998046875, "logps/real": -372.23382568359375, "loss": 0.0319, "rewards/accuracies": 1.0, "rewards/generated": -10.214251518249512, "rewards/margins": 9.531461715698242, "rewards/real": -0.6827906370162964, "step": 190 }, { "epoch": 0.06, "learning_rate": 1.0660980810234541e-07, "logits/generated": 4.498294830322266, "logits/real": 2.933957576751709, "logps/generated": -944.5314331054688, "logps/real": -416.85662841796875, "loss": 0.0144, "rewards/accuracies": 1.0, "rewards/generated": -10.166244506835938, "rewards/margins": 9.261064529418945, "rewards/real": -0.9051799774169922, "step": 200 }, { "epoch": 0.07, "learning_rate": 1.1194029850746268e-07, "logits/generated": 4.267141342163086, "logits/real": 2.7207605838775635, "logps/generated": -854.9772338867188, "logps/real": -429.77801513671875, "loss": 0.0176, "rewards/accuracies": 1.0, "rewards/generated": -9.995696067810059, "rewards/margins": 8.752180099487305, "rewards/real": -1.2435152530670166, "step": 210 }, { "epoch": 0.07, "learning_rate": 1.1727078891257995e-07, "logits/generated": 4.258756637573242, "logits/real": 3.0526013374328613, "logps/generated": -863.81396484375, "logps/real": -436.41766357421875, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/generated": -11.426506996154785, "rewards/margins": 10.267839431762695, "rewards/real": -1.1586660146713257, "step": 220 }, { "epoch": 0.07, "learning_rate": 1.226012793176972e-07, "logits/generated": 4.650750160217285, "logits/real": 2.9421982765197754, "logps/generated": -928.5386962890625, "logps/real": -501.5003967285156, "loss": 0.0186, "rewards/accuracies": 1.0, "rewards/generated": -12.003137588500977, "rewards/margins": 11.038549423217773, "rewards/real": -0.9645878672599792, "step": 230 }, { "epoch": 0.08, "learning_rate": 1.279317697228145e-07, "logits/generated": 4.473868370056152, "logits/real": 3.1168899536132812, "logps/generated": -846.9971923828125, "logps/real": -390.3717041015625, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/generated": -11.940008163452148, "rewards/margins": 11.190433502197266, "rewards/real": -0.7495745420455933, "step": 240 }, { "epoch": 0.08, "learning_rate": 1.3326226012793176e-07, "logits/generated": 4.030431270599365, "logits/real": 2.7810325622558594, "logps/generated": -858.9724731445312, "logps/real": -481.1717834472656, "loss": 0.0255, "rewards/accuracies": 1.0, "rewards/generated": -11.293512344360352, "rewards/margins": 9.975330352783203, "rewards/real": -1.318180799484253, "step": 250 }, { "epoch": 0.08, "learning_rate": 1.3859275053304903e-07, "logits/generated": 4.768280982971191, "logits/real": 2.7952544689178467, "logps/generated": -882.0071411132812, "logps/real": -440.50201416015625, "loss": 0.0121, "rewards/accuracies": 1.0, "rewards/generated": -12.502702713012695, "rewards/margins": 11.474106788635254, "rewards/real": -1.0285974740982056, "step": 260 }, { "epoch": 0.09, "learning_rate": 1.439232409381663e-07, "logits/generated": 4.882699966430664, "logits/real": 3.024937152862549, "logps/generated": -888.2691650390625, "logps/real": -400.3338928222656, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/generated": -13.276326179504395, "rewards/margins": 12.185705184936523, "rewards/real": -1.0906213521957397, "step": 270 }, { "epoch": 0.09, "learning_rate": 1.4925373134328355e-07, "logits/generated": 4.333956718444824, "logits/real": 3.462357759475708, "logps/generated": -824.2071533203125, "logps/real": -414.9501037597656, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/generated": -13.203546524047852, "rewards/margins": 12.313494682312012, "rewards/real": -0.8900521993637085, "step": 280 }, { "epoch": 0.09, "learning_rate": 1.5458422174840087e-07, "logits/generated": 4.403844356536865, "logits/real": 3.5299458503723145, "logps/generated": -872.6373291015625, "logps/real": -456.9317321777344, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/generated": -12.527276992797852, "rewards/margins": 11.394366264343262, "rewards/real": -1.132910966873169, "step": 290 }, { "epoch": 0.1, "learning_rate": 1.5991471215351813e-07, "logits/generated": 4.374533653259277, "logits/real": 3.450122356414795, "logps/generated": -825.4142456054688, "logps/real": -390.957763671875, "loss": 0.0136, "rewards/accuracies": 1.0, "rewards/generated": -15.316762924194336, "rewards/margins": 14.383761405944824, "rewards/real": -0.9330012202262878, "step": 300 }, { "epoch": 0.1, "learning_rate": 1.652452025586354e-07, "logits/generated": 4.574627876281738, "logits/real": 3.2677390575408936, "logps/generated": -913.6970825195312, "logps/real": -468.8421936035156, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/generated": -15.125872611999512, "rewards/margins": 13.73762035369873, "rewards/real": -1.3882533311843872, "step": 310 }, { "epoch": 0.1, "learning_rate": 1.7057569296375266e-07, "logits/generated": 4.2853684425354, "logits/real": 3.7285492420196533, "logps/generated": -990.7057495117188, "logps/real": -408.87481689453125, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/generated": -16.85171127319336, "rewards/margins": 15.923856735229492, "rewards/real": -0.9278553128242493, "step": 320 }, { "epoch": 0.11, "learning_rate": 1.7590618336886992e-07, "logits/generated": 4.532025337219238, "logits/real": 3.439203977584839, "logps/generated": -912.990234375, "logps/real": -419.6997985839844, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/generated": -15.591650009155273, "rewards/margins": 14.240079879760742, "rewards/real": -1.351569414138794, "step": 330 }, { "epoch": 0.11, "learning_rate": 1.8123667377398718e-07, "logits/generated": 3.9934635162353516, "logits/real": 3.253384828567505, "logps/generated": -945.3134765625, "logps/real": -379.3480224609375, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/generated": -16.622173309326172, "rewards/margins": 15.268315315246582, "rewards/real": -1.3538602590560913, "step": 340 }, { "epoch": 0.11, "learning_rate": 1.8656716417910447e-07, "logits/generated": 4.302222728729248, "logits/real": 3.7138893604278564, "logps/generated": -903.6271362304688, "logps/real": -406.9642333984375, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/generated": -16.617626190185547, "rewards/margins": 15.309722900390625, "rewards/real": -1.307904839515686, "step": 350 }, { "epoch": 0.12, "learning_rate": 1.9189765458422174e-07, "logits/generated": 4.664093494415283, "logits/real": 3.9194228649139404, "logps/generated": -909.6126708984375, "logps/real": -432.28997802734375, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/generated": -16.447002410888672, "rewards/margins": 14.763036727905273, "rewards/real": -1.6839679479599, "step": 360 }, { "epoch": 0.12, "learning_rate": 1.9722814498933903e-07, "logits/generated": 4.805423736572266, "logits/real": 3.6372475624084473, "logps/generated": -928.4811401367188, "logps/real": -411.20404052734375, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/generated": -17.282123565673828, "rewards/margins": 15.73072338104248, "rewards/real": -1.5513983964920044, "step": 370 }, { "epoch": 0.12, "learning_rate": 2.025586353944563e-07, "logits/generated": 4.7325119972229, "logits/real": 3.725853443145752, "logps/generated": -899.93212890625, "logps/real": -451.94647216796875, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/generated": -18.013702392578125, "rewards/margins": 16.122299194335938, "rewards/real": -1.8914031982421875, "step": 380 }, { "epoch": 0.12, "learning_rate": 2.0788912579957355e-07, "logits/generated": 5.061275005340576, "logits/real": 3.7229161262512207, "logps/generated": -993.5968627929688, "logps/real": -409.90380859375, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/generated": -21.721073150634766, "rewards/margins": 20.405847549438477, "rewards/real": -1.3152250051498413, "step": 390 }, { "epoch": 0.13, "learning_rate": 2.1321961620469082e-07, "logits/generated": 5.341602802276611, "logits/real": 3.318124771118164, "logps/generated": -908.2542724609375, "logps/real": -455.63726806640625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/generated": -17.183940887451172, "rewards/margins": 15.601503372192383, "rewards/real": -1.582434058189392, "step": 400 }, { "epoch": 0.13, "learning_rate": 2.185501066098081e-07, "logits/generated": 5.38491678237915, "logits/real": 4.025693416595459, "logps/generated": -953.0545654296875, "logps/real": -440.70208740234375, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/generated": -21.363040924072266, "rewards/margins": 20.044790267944336, "rewards/real": -1.3182494640350342, "step": 410 }, { "epoch": 0.13, "learning_rate": 2.2388059701492537e-07, "logits/generated": 5.220414638519287, "logits/real": 3.3056139945983887, "logps/generated": -935.1546630859375, "logps/real": -430.81951904296875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/generated": -18.312911987304688, "rewards/margins": 16.902334213256836, "rewards/real": -1.4105777740478516, "step": 420 }, { "epoch": 0.14, "learning_rate": 2.2921108742004263e-07, "logits/generated": 5.521912574768066, "logits/real": 3.5793323516845703, "logps/generated": -938.9608154296875, "logps/real": -426.83343505859375, "loss": 0.0067, "rewards/accuracies": 0.987500011920929, "rewards/generated": -19.861270904541016, "rewards/margins": 18.283893585205078, "rewards/real": -1.577378511428833, "step": 430 }, { "epoch": 0.14, "learning_rate": 2.345415778251599e-07, "logits/generated": 4.9842939376831055, "logits/real": 3.8313961029052734, "logps/generated": -945.9016723632812, "logps/real": -437.95892333984375, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/generated": -19.22454261779785, "rewards/margins": 17.613040924072266, "rewards/real": -1.6115000247955322, "step": 440 }, { "epoch": 0.14, "learning_rate": 2.3987206823027716e-07, "logits/generated": 4.956530570983887, "logits/real": 3.860657215118408, "logps/generated": -991.1004028320312, "logps/real": -467.4757385253906, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/generated": -23.72904396057129, "rewards/margins": 21.930561065673828, "rewards/real": -1.798484206199646, "step": 450 }, { "epoch": 0.15, "learning_rate": 2.452025586353944e-07, "logits/generated": 5.490859031677246, "logits/real": 4.132542610168457, "logps/generated": -985.9427490234375, "logps/real": -446.23992919921875, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/generated": -23.700206756591797, "rewards/margins": 22.092304229736328, "rewards/real": -1.6079027652740479, "step": 460 }, { "epoch": 0.15, "learning_rate": 2.505330490405117e-07, "logits/generated": 5.674051284790039, "logits/real": 4.49200963973999, "logps/generated": -1017.6057739257812, "logps/real": -388.3606262207031, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/generated": -26.551244735717773, "rewards/margins": 25.398548126220703, "rewards/real": -1.152698040008545, "step": 470 }, { "epoch": 0.15, "learning_rate": 2.55863539445629e-07, "logits/generated": 5.34218692779541, "logits/real": 4.256376266479492, "logps/generated": -1017.3714599609375, "logps/real": -415.5160217285156, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/generated": -25.728673934936523, "rewards/margins": 23.847108840942383, "rewards/real": -1.881567358970642, "step": 480 }, { "epoch": 0.16, "learning_rate": 2.611940298507462e-07, "logits/generated": 5.433243751525879, "logits/real": 4.6507978439331055, "logps/generated": -1037.853271484375, "logps/real": -371.70513916015625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/generated": -25.7724609375, "rewards/margins": 24.16178321838379, "rewards/real": -1.6106784343719482, "step": 490 }, { "epoch": 0.16, "learning_rate": 2.665245202558635e-07, "logits/generated": 5.157405376434326, "logits/real": 4.670738220214844, "logps/generated": -1063.46630859375, "logps/real": -446.3619079589844, "loss": 0.0097, "rewards/accuracies": 0.987500011920929, "rewards/generated": -28.132465362548828, "rewards/margins": 25.88495445251465, "rewards/real": -2.247509241104126, "step": 500 }, { "epoch": 0.16, "learning_rate": 2.7185501066098084e-07, "logits/generated": 5.115183353424072, "logits/real": 4.353975296020508, "logps/generated": -1002.1138916015625, "logps/real": -446.313232421875, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/generated": -26.830562591552734, "rewards/margins": 24.81540870666504, "rewards/real": -2.015152931213379, "step": 510 }, { "epoch": 0.17, "learning_rate": 2.7718550106609805e-07, "logits/generated": 5.822426795959473, "logits/real": 4.092729568481445, "logps/generated": -1142.877197265625, "logps/real": -423.13427734375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/generated": -35.513492584228516, "rewards/margins": 32.95065689086914, "rewards/real": -2.562835216522217, "step": 520 }, { "epoch": 0.17, "learning_rate": 2.8251599147121537e-07, "logits/generated": 6.064093112945557, "logits/real": 4.645484447479248, "logps/generated": -985.5988159179688, "logps/real": -454.4009704589844, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/generated": -27.420032501220703, "rewards/margins": 24.551271438598633, "rewards/real": -2.8687572479248047, "step": 530 }, { "epoch": 0.17, "learning_rate": 2.878464818763326e-07, "logits/generated": 5.6674604415893555, "logits/real": 4.8912434577941895, "logps/generated": -1181.345703125, "logps/real": -363.5838928222656, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/generated": -39.11265182495117, "rewards/margins": 37.521728515625, "rewards/real": -1.5909217596054077, "step": 540 }, { "epoch": 0.18, "learning_rate": 2.931769722814499e-07, "logits/generated": 5.423783779144287, "logits/real": 4.705772399902344, "logps/generated": -1178.148193359375, "logps/real": -454.99462890625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/generated": -37.417789459228516, "rewards/margins": 34.702919006347656, "rewards/real": -2.7148735523223877, "step": 550 }, { "epoch": 0.18, "learning_rate": 2.985074626865671e-07, "logits/generated": 5.513905048370361, "logits/real": 5.298013210296631, "logps/generated": -1090.534423828125, "logps/real": -423.19281005859375, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/generated": -33.68505096435547, "rewards/margins": 31.353504180908203, "rewards/real": -2.331547975540161, "step": 560 }, { "epoch": 0.18, "learning_rate": 3.038379530916844e-07, "logits/generated": 5.618046283721924, "logits/real": 4.737415313720703, "logps/generated": -1092.471435546875, "logps/real": -455.97625732421875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -31.836315155029297, "rewards/margins": 29.207265853881836, "rewards/real": -2.629049062728882, "step": 570 }, { "epoch": 0.19, "learning_rate": 3.0916844349680174e-07, "logits/generated": 5.837884902954102, "logits/real": 4.931227684020996, "logps/generated": -998.5939331054688, "logps/real": -460.0169982910156, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -30.707849502563477, "rewards/margins": 27.957523345947266, "rewards/real": -2.750328302383423, "step": 580 }, { "epoch": 0.19, "learning_rate": 3.1449893390191895e-07, "logits/generated": 6.2692766189575195, "logits/real": 4.8175554275512695, "logps/generated": -1141.907470703125, "logps/real": -435.086181640625, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/generated": -38.28876876831055, "rewards/margins": 36.220149993896484, "rewards/real": -2.0686206817626953, "step": 590 }, { "epoch": 0.19, "learning_rate": 3.1982942430703626e-07, "logits/generated": 6.040721893310547, "logits/real": 5.1572418212890625, "logps/generated": -1195.434326171875, "logps/real": -386.48065185546875, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/generated": -43.73341751098633, "rewards/margins": 41.48738098144531, "rewards/real": -2.2460429668426514, "step": 600 }, { "epoch": 0.2, "learning_rate": 3.2515991471215347e-07, "logits/generated": 6.458198547363281, "logits/real": 5.017695903778076, "logps/generated": -1221.5955810546875, "logps/real": -450.3197326660156, "loss": 0.0343, "rewards/accuracies": 0.987500011920929, "rewards/generated": -49.95851516723633, "rewards/margins": 46.46921920776367, "rewards/real": -3.48930025100708, "step": 610 }, { "epoch": 0.2, "learning_rate": 3.304904051172708e-07, "logits/generated": 5.222648620605469, "logits/real": 5.956960201263428, "logps/generated": -1281.3074951171875, "logps/real": -429.00128173828125, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/generated": -46.34756088256836, "rewards/margins": 42.98705291748047, "rewards/real": -3.3605053424835205, "step": 620 }, { "epoch": 0.2, "learning_rate": 3.3582089552238805e-07, "logits/generated": 6.912026882171631, "logits/real": 5.952193260192871, "logps/generated": -1393.0687255859375, "logps/real": -446.5885314941406, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/generated": -64.23793029785156, "rewards/margins": 60.48468780517578, "rewards/real": -3.7532401084899902, "step": 630 }, { "epoch": 0.2, "learning_rate": 3.411513859275053e-07, "logits/generated": 7.402287483215332, "logits/real": 5.988776206970215, "logps/generated": -1291.233154296875, "logps/real": -448.9556579589844, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/generated": -53.81024932861328, "rewards/margins": 49.61492919921875, "rewards/real": -4.195318222045898, "step": 640 }, { "epoch": 0.21, "learning_rate": 3.464818763326226e-07, "logits/generated": 6.1990885734558105, "logits/real": 6.460501670837402, "logps/generated": -1401.2054443359375, "logps/real": -427.198486328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -60.654327392578125, "rewards/margins": 56.32172775268555, "rewards/real": -4.332607746124268, "step": 650 }, { "epoch": 0.21, "learning_rate": 3.5181236673773984e-07, "logits/generated": 6.238941192626953, "logits/real": 6.262744903564453, "logps/generated": -1464.2178955078125, "logps/real": -442.047607421875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -66.75634002685547, "rewards/margins": 62.018089294433594, "rewards/real": -4.738252639770508, "step": 660 }, { "epoch": 0.21, "learning_rate": 3.5714285714285716e-07, "logits/generated": 6.5927734375, "logits/real": 6.336920738220215, "logps/generated": -1426.91064453125, "logps/real": -479.88665771484375, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -66.54981994628906, "rewards/margins": 61.196250915527344, "rewards/real": -5.3535614013671875, "step": 670 }, { "epoch": 0.22, "learning_rate": 3.6247334754797437e-07, "logits/generated": 6.889130592346191, "logits/real": 6.0183515548706055, "logps/generated": -1280.54052734375, "logps/real": -443.0462341308594, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/generated": -53.02849578857422, "rewards/margins": 48.8522834777832, "rewards/real": -4.17621374130249, "step": 680 }, { "epoch": 0.22, "learning_rate": 3.678038379530917e-07, "logits/generated": 6.886399269104004, "logits/real": 5.8616228103637695, "logps/generated": -1328.938232421875, "logps/real": -467.85260009765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -60.66303634643555, "rewards/margins": 57.2181510925293, "rewards/real": -3.4448928833007812, "step": 690 }, { "epoch": 0.22, "learning_rate": 3.7313432835820895e-07, "logits/generated": 6.184852123260498, "logits/real": 6.833343505859375, "logps/generated": -1430.8035888671875, "logps/real": -440.9766540527344, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/generated": -64.0339126586914, "rewards/margins": 60.376373291015625, "rewards/real": -3.6575417518615723, "step": 700 }, { "epoch": 0.23, "learning_rate": 3.784648187633262e-07, "logits/generated": 6.439333915710449, "logits/real": 6.414175510406494, "logps/generated": -1413.538818359375, "logps/real": -506.282958984375, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/generated": -65.80122375488281, "rewards/margins": 60.92878341674805, "rewards/real": -4.87244176864624, "step": 710 }, { "epoch": 0.23, "learning_rate": 3.8379530916844347e-07, "logits/generated": 6.854152679443359, "logits/real": 6.468166351318359, "logps/generated": -1360.7110595703125, "logps/real": -422.0220642089844, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/generated": -63.5214958190918, "rewards/margins": 61.0288200378418, "rewards/real": -2.4926705360412598, "step": 720 }, { "epoch": 0.23, "learning_rate": 3.8912579957356074e-07, "logits/generated": 6.815456390380859, "logits/real": 6.330857753753662, "logps/generated": -1460.0595703125, "logps/real": -477.5538635253906, "loss": 0.0118, "rewards/accuracies": 0.987500011920929, "rewards/generated": -72.48738098144531, "rewards/margins": 67.97703552246094, "rewards/real": -4.510356426239014, "step": 730 }, { "epoch": 0.24, "learning_rate": 3.9445628997867805e-07, "logits/generated": 7.79461145401001, "logits/real": 6.379164695739746, "logps/generated": -1320.86572265625, "logps/real": -415.1160583496094, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -56.91374969482422, "rewards/margins": 55.47412872314453, "rewards/real": -1.4396289587020874, "step": 740 }, { "epoch": 0.24, "learning_rate": 3.9978678038379526e-07, "logits/generated": 7.447874546051025, "logits/real": 7.000211238861084, "logps/generated": -1366.9166259765625, "logps/real": -402.1700134277344, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/generated": -56.7216682434082, "rewards/margins": 55.308265686035156, "rewards/real": -1.4133905172348022, "step": 750 }, { "epoch": 0.24, "learning_rate": 4.051172707889126e-07, "logits/generated": 7.207322120666504, "logits/real": 6.658274173736572, "logps/generated": -1613.7154541015625, "logps/real": -432.3534240722656, "loss": 0.0101, "rewards/accuracies": 1.0, "rewards/generated": -81.71043395996094, "rewards/margins": 78.81815338134766, "rewards/real": -2.8922817707061768, "step": 760 }, { "epoch": 0.25, "learning_rate": 4.1044776119402984e-07, "logits/generated": 6.566279411315918, "logits/real": 7.33911657333374, "logps/generated": -1126.0272216796875, "logps/real": -409.57940673828125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -37.07782745361328, "rewards/margins": 38.781490325927734, "rewards/real": 1.7036640644073486, "step": 770 }, { "epoch": 0.25, "learning_rate": 4.157782515991471e-07, "logits/generated": 7.485255241394043, "logits/real": 7.565821647644043, "logps/generated": -1186.6368408203125, "logps/real": -383.3221435546875, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/generated": -43.06455993652344, "rewards/margins": 44.8535270690918, "rewards/real": 1.788967490196228, "step": 780 }, { "epoch": 0.25, "learning_rate": 4.2110874200426437e-07, "logits/generated": 7.132505893707275, "logits/real": 7.167974948883057, "logps/generated": -1089.1512451171875, "logps/real": -415.9585876464844, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/generated": -37.17241287231445, "rewards/margins": 39.06093215942383, "rewards/real": 1.8885204792022705, "step": 790 }, { "epoch": 0.26, "learning_rate": 4.2643923240938163e-07, "logits/generated": 6.375537395477295, "logits/real": 7.784512996673584, "logps/generated": -1080.314697265625, "logps/real": -360.1445007324219, "loss": 0.011, "rewards/accuracies": 0.987500011920929, "rewards/generated": -30.7370548248291, "rewards/margins": 32.6918830871582, "rewards/real": 1.9548307657241821, "step": 800 }, { "epoch": 0.26, "learning_rate": 4.317697228144989e-07, "logits/generated": 7.082189083099365, "logits/real": 7.79370641708374, "logps/generated": -1347.234619140625, "logps/real": -379.0875549316406, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/generated": -67.62068176269531, "rewards/margins": 64.62043762207031, "rewards/real": -3.000237226486206, "step": 810 }, { "epoch": 0.26, "learning_rate": 4.371002132196162e-07, "logits/generated": 8.219111442565918, "logits/real": 7.7538161277771, "logps/generated": -1355.7952880859375, "logps/real": -480.03729248046875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/generated": -62.55017852783203, "rewards/margins": 57.40230178833008, "rewards/real": -5.14788293838501, "step": 820 }, { "epoch": 0.27, "learning_rate": 4.4243070362473347e-07, "logits/generated": 6.6060075759887695, "logits/real": 4.620954990386963, "logps/generated": -1465.8955078125, "logps/real": -455.8670959472656, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/generated": -68.65296173095703, "rewards/margins": 63.1667594909668, "rewards/real": -5.486207485198975, "step": 830 }, { "epoch": 0.27, "learning_rate": 4.4776119402985074e-07, "logits/generated": 6.373946189880371, "logits/real": 4.057518482208252, "logps/generated": -1346.488037109375, "logps/real": -480.8456115722656, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/generated": -60.35419464111328, "rewards/margins": 54.289276123046875, "rewards/real": -6.064919948577881, "step": 840 }, { "epoch": 0.27, "learning_rate": 4.53091684434968e-07, "logits/generated": 7.535623073577881, "logits/real": 3.96494722366333, "logps/generated": -1660.769775390625, "logps/real": -445.7167053222656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -85.6654052734375, "rewards/margins": 81.42391967773438, "rewards/real": -4.241487503051758, "step": 850 }, { "epoch": 0.28, "learning_rate": 4.5842217484008526e-07, "logits/generated": 6.470145225524902, "logits/real": 3.8205597400665283, "logps/generated": -1416.6033935546875, "logps/real": -484.15252685546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -72.32617950439453, "rewards/margins": 67.55338287353516, "rewards/real": -4.772789478302002, "step": 860 }, { "epoch": 0.28, "learning_rate": 4.637526652452025e-07, "logits/generated": 7.0599045753479, "logits/real": 3.4782166481018066, "logps/generated": -1430.54345703125, "logps/real": -435.47991943359375, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/generated": -70.65374755859375, "rewards/margins": 66.18858337402344, "rewards/real": -4.4651689529418945, "step": 870 }, { "epoch": 0.28, "learning_rate": 4.690831556503198e-07, "logits/generated": 6.818316459655762, "logits/real": 6.137295722961426, "logps/generated": -1887.0960693359375, "logps/real": -522.5638427734375, "loss": 0.0288, "rewards/accuracies": 1.0, "rewards/generated": -113.33845520019531, "rewards/margins": 101.10160827636719, "rewards/real": -12.236835479736328, "step": 880 }, { "epoch": 0.28, "learning_rate": 4.744136460554371e-07, "logits/generated": 6.243293762207031, "logits/real": 5.558531761169434, "logps/generated": -1486.5711669921875, "logps/real": -497.50555419921875, "loss": 0.0078, "rewards/accuracies": 0.987500011920929, "rewards/generated": -69.30940246582031, "rewards/margins": 62.63942337036133, "rewards/real": -6.669985771179199, "step": 890 }, { "epoch": 0.29, "learning_rate": 4.797441364605543e-07, "logits/generated": 5.630170822143555, "logits/real": 3.8515732288360596, "logps/generated": -1765.241455078125, "logps/real": -412.55145263671875, "loss": 0.2937, "rewards/accuracies": 0.987500011920929, "rewards/generated": -105.3599624633789, "rewards/margins": 103.2026596069336, "rewards/real": -2.157282590866089, "step": 900 }, { "epoch": 0.29, "learning_rate": 4.850746268656717e-07, "logits/generated": 5.002291202545166, "logits/real": 3.640393018722534, "logps/generated": -1557.229248046875, "logps/real": -456.93890380859375, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/generated": -81.83430480957031, "rewards/margins": 79.40706634521484, "rewards/real": -2.427229404449463, "step": 910 }, { "epoch": 0.29, "learning_rate": 4.904051172707888e-07, "logits/generated": 5.952645778656006, "logits/real": 4.249642848968506, "logps/generated": -1700.862548828125, "logps/real": -428.80523681640625, "loss": 0.0184, "rewards/accuracies": 0.987500011920929, "rewards/generated": -91.6487045288086, "rewards/margins": 88.17597961425781, "rewards/real": -3.4727301597595215, "step": 920 }, { "epoch": 0.3, "learning_rate": 4.957356076759062e-07, "logits/generated": 5.7331037521362305, "logits/real": 4.043749809265137, "logps/generated": -1740.08984375, "logps/real": -433.04803466796875, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/generated": -89.92688751220703, "rewards/margins": 86.73829650878906, "rewards/real": -3.188586711883545, "step": 930 }, { "epoch": 0.3, "learning_rate": 4.998814744577456e-07, "logits/generated": 5.830160140991211, "logits/real": 5.589596748352051, "logps/generated": -1539.909423828125, "logps/real": -411.137451171875, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/generated": -79.81306457519531, "rewards/margins": 77.7004623413086, "rewards/real": -2.1125919818878174, "step": 940 }, { "epoch": 0.3, "learning_rate": 4.992888467464738e-07, "logits/generated": 5.686253547668457, "logits/real": 5.019293785095215, "logps/generated": -1552.7132568359375, "logps/real": -438.41925048828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -82.39158630371094, "rewards/margins": 80.49710845947266, "rewards/real": -1.8944790363311768, "step": 950 }, { "epoch": 0.31, "learning_rate": 4.986962190352021e-07, "logits/generated": 7.768735408782959, "logits/real": 6.325190544128418, "logps/generated": -1645.41796875, "logps/real": -451.18035888671875, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/generated": -85.29425048828125, "rewards/margins": 80.68913269042969, "rewards/real": -4.605116844177246, "step": 960 }, { "epoch": 0.31, "learning_rate": 4.981035913239302e-07, "logits/generated": 7.660915374755859, "logits/real": 6.830654144287109, "logps/generated": -1547.409423828125, "logps/real": -451.9595642089844, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -84.11827850341797, "rewards/margins": 79.18711853027344, "rewards/real": -4.931160926818848, "step": 970 }, { "epoch": 0.31, "learning_rate": 4.975109636126585e-07, "logits/generated": 7.7794294357299805, "logits/real": 8.336104393005371, "logps/generated": -1673.5439453125, "logps/real": -415.1363830566406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -92.60560607910156, "rewards/margins": 88.01842498779297, "rewards/real": -4.5871758460998535, "step": 980 }, { "epoch": 0.32, "learning_rate": 4.969183359013867e-07, "logits/generated": 7.937457084655762, "logits/real": 7.033485412597656, "logps/generated": -1622.06591796875, "logps/real": -480.75537109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -89.27228546142578, "rewards/margins": 85.18975830078125, "rewards/real": -4.082520961761475, "step": 990 }, { "epoch": 0.32, "learning_rate": 4.96325708190115e-07, "logits/generated": 6.595461368560791, "logits/real": 8.076391220092773, "logps/generated": -1706.6907958984375, "logps/real": -447.84423828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -96.0951919555664, "rewards/margins": 92.5496597290039, "rewards/real": -3.545525074005127, "step": 1000 }, { "epoch": 0.32, "learning_rate": 4.957330804788432e-07, "logits/generated": 6.543757438659668, "logits/real": 7.346884727478027, "logps/generated": -1789.3675537109375, "logps/real": -419.0418395996094, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/generated": -97.88018798828125, "rewards/margins": 94.17094421386719, "rewards/real": -3.709242582321167, "step": 1010 }, { "epoch": 0.33, "learning_rate": 4.951404527675714e-07, "logits/generated": 6.897200107574463, "logits/real": 7.889591217041016, "logps/generated": -1685.7294921875, "logps/real": -446.888427734375, "loss": 0.0345, "rewards/accuracies": 1.0, "rewards/generated": -87.568359375, "rewards/margins": 85.30208587646484, "rewards/real": -2.2662765979766846, "step": 1020 }, { "epoch": 0.33, "learning_rate": 4.945478250562996e-07, "logits/generated": 7.153252601623535, "logits/real": 8.06161117553711, "logps/generated": -1738.279296875, "logps/real": -437.81536865234375, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/generated": -95.5464859008789, "rewards/margins": 92.51522064208984, "rewards/real": -3.0312657356262207, "step": 1030 }, { "epoch": 0.33, "learning_rate": 4.939551973450278e-07, "logits/generated": 5.871662616729736, "logits/real": 7.041135311126709, "logps/generated": -1695.6005859375, "logps/real": -478.1349182128906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -95.05136108398438, "rewards/margins": 86.76447296142578, "rewards/real": -8.28688907623291, "step": 1040 }, { "epoch": 0.34, "learning_rate": 4.933625696337561e-07, "logits/generated": 6.500063896179199, "logits/real": 8.22092342376709, "logps/generated": -2075.172119140625, "logps/real": -488.11785888671875, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/generated": -122.46177673339844, "rewards/margins": 112.75472259521484, "rewards/real": -9.707052230834961, "step": 1050 }, { "epoch": 0.34, "learning_rate": 4.927699419224843e-07, "logits/generated": 9.17035961151123, "logits/real": 7.364115238189697, "logps/generated": -2150.434326171875, "logps/real": -461.96075439453125, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/generated": -146.5005645751953, "rewards/margins": 140.67984008789062, "rewards/real": -5.820720672607422, "step": 1060 }, { "epoch": 0.34, "learning_rate": 4.921773142112125e-07, "logits/generated": 6.842865943908691, "logits/real": 9.300943374633789, "logps/generated": -2127.19482421875, "logps/real": -510.9596252441406, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/generated": -135.00425720214844, "rewards/margins": 126.00932312011719, "rewards/real": -8.994917869567871, "step": 1070 }, { "epoch": 0.35, "learning_rate": 4.915846864999407e-07, "logits/generated": 8.094195365905762, "logits/real": 9.420785903930664, "logps/generated": -2197.156494140625, "logps/real": -505.39581298828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -140.7816925048828, "rewards/margins": 131.50852966308594, "rewards/real": -9.27318286895752, "step": 1080 }, { "epoch": 0.35, "learning_rate": 4.909920587886689e-07, "logits/generated": 6.9371466636657715, "logits/real": 7.835204124450684, "logps/generated": -2110.518798828125, "logps/real": -475.09228515625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -139.9143829345703, "rewards/margins": 132.29214477539062, "rewards/real": -7.622225284576416, "step": 1090 }, { "epoch": 0.35, "learning_rate": 4.903994310773972e-07, "logits/generated": 7.8442182540893555, "logits/real": 8.97354507446289, "logps/generated": -2230.311279296875, "logps/real": -497.91546630859375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -147.75302124023438, "rewards/margins": 137.24615478515625, "rewards/real": -10.506855010986328, "step": 1100 }, { "epoch": 0.36, "learning_rate": 4.898068033661254e-07, "logits/generated": 7.6216864585876465, "logits/real": 7.699243068695068, "logps/generated": -2523.005126953125, "logps/real": -545.977783203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -179.80250549316406, "rewards/margins": 169.1790008544922, "rewards/real": -10.623506546020508, "step": 1110 }, { "epoch": 0.36, "learning_rate": 4.892141756548536e-07, "logits/generated": 8.07519245147705, "logits/real": 8.746581077575684, "logps/generated": -1840.968994140625, "logps/real": -434.7252502441406, "loss": 0.0483, "rewards/accuracies": 0.987500011920929, "rewards/generated": -110.52458190917969, "rewards/margins": 106.19657135009766, "rewards/real": -4.328009605407715, "step": 1120 }, { "epoch": 0.36, "learning_rate": 4.886215479435819e-07, "logits/generated": 6.407035827636719, "logits/real": 8.111191749572754, "logps/generated": -2167.253662109375, "logps/real": -584.6397094726562, "loss": 0.1792, "rewards/accuracies": 0.949999988079071, "rewards/generated": -143.14059448242188, "rewards/margins": 120.68763732910156, "rewards/real": -22.452938079833984, "step": 1130 }, { "epoch": 0.36, "learning_rate": 4.8802892023231e-07, "logits/generated": 6.236830711364746, "logits/real": 5.228099822998047, "logps/generated": -2550.7197265625, "logps/real": -706.9818725585938, "loss": 0.1339, "rewards/accuracies": 1.0, "rewards/generated": -182.18605041503906, "rewards/margins": 150.96920776367188, "rewards/real": -31.21683692932129, "step": 1140 }, { "epoch": 0.37, "learning_rate": 4.874362925210383e-07, "logits/generated": 5.741302013397217, "logits/real": 4.911172389984131, "logps/generated": -2231.459716796875, "logps/real": -512.3751220703125, "loss": 0.0439, "rewards/accuracies": 0.987500011920929, "rewards/generated": -142.37979125976562, "rewards/margins": 127.73725891113281, "rewards/real": -14.642518997192383, "step": 1150 }, { "epoch": 0.37, "learning_rate": 4.868436648097665e-07, "logits/generated": 5.199759483337402, "logits/real": 6.4419708251953125, "logps/generated": -2287.717041015625, "logps/real": -551.3569946289062, "loss": 0.0394, "rewards/accuracies": 1.0, "rewards/generated": -148.15785217285156, "rewards/margins": 134.6517791748047, "rewards/real": -13.50605583190918, "step": 1160 }, { "epoch": 0.37, "learning_rate": 4.862510370984946e-07, "logits/generated": 6.572251796722412, "logits/real": 5.398228645324707, "logps/generated": -1907.2447509765625, "logps/real": -482.88275146484375, "loss": 0.0868, "rewards/accuracies": 1.0, "rewards/generated": -116.68013763427734, "rewards/margins": 108.76756286621094, "rewards/real": -7.912576198577881, "step": 1170 }, { "epoch": 0.38, "learning_rate": 4.856584093872229e-07, "logits/generated": 5.3536553382873535, "logits/real": 4.817139625549316, "logps/generated": -2303.44287109375, "logps/real": -469.7828063964844, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/generated": -149.1165771484375, "rewards/margins": 141.90130615234375, "rewards/real": -7.215256690979004, "step": 1180 }, { "epoch": 0.38, "learning_rate": 4.850657816759511e-07, "logits/generated": 5.181337833404541, "logits/real": 5.077664852142334, "logps/generated": -2017.979248046875, "logps/real": -494.0741271972656, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -127.82281494140625, "rewards/margins": 119.51688385009766, "rewards/real": -8.305940628051758, "step": 1190 }, { "epoch": 0.38, "learning_rate": 4.844731539646794e-07, "logits/generated": 4.419014930725098, "logits/real": 5.044297218322754, "logps/generated": -2165.772705078125, "logps/real": -443.818115234375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/generated": -140.24099731445312, "rewards/margins": 131.45724487304688, "rewards/real": -8.783735275268555, "step": 1200 }, { "epoch": 0.39, "learning_rate": 4.838805262534076e-07, "logits/generated": 6.1997504234313965, "logits/real": 5.568020820617676, "logps/generated": -2035.696044921875, "logps/real": -475.54669189453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -130.20091247558594, "rewards/margins": 121.5607681274414, "rewards/real": -8.640148162841797, "step": 1210 }, { "epoch": 0.39, "learning_rate": 4.832878985421358e-07, "logits/generated": 6.777440547943115, "logits/real": 5.8462443351745605, "logps/generated": -2143.581787109375, "logps/real": -469.10235595703125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -141.8614501953125, "rewards/margins": 132.47967529296875, "rewards/real": -9.381769180297852, "step": 1220 }, { "epoch": 0.39, "learning_rate": 4.82695270830864e-07, "logits/generated": 7.070141792297363, "logits/real": 7.65410852432251, "logps/generated": -2212.40478515625, "logps/real": -515.7273559570312, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -146.12078857421875, "rewards/margins": 133.88491821289062, "rewards/real": -12.235891342163086, "step": 1230 }, { "epoch": 0.4, "learning_rate": 4.821026431195922e-07, "logits/generated": 6.384008884429932, "logits/real": 8.929574966430664, "logps/generated": -2295.981201171875, "logps/real": -497.1014099121094, "loss": 0.0394, "rewards/accuracies": 1.0, "rewards/generated": -145.4716339111328, "rewards/margins": 136.9638671875, "rewards/real": -8.507768630981445, "step": 1240 }, { "epoch": 0.4, "learning_rate": 4.815100154083205e-07, "logits/generated": 6.723616600036621, "logits/real": 9.097654342651367, "logps/generated": -1612.1195068359375, "logps/real": -368.24957275390625, "loss": 0.0415, "rewards/accuracies": 0.987500011920929, "rewards/generated": -84.96308135986328, "rewards/margins": 87.89927673339844, "rewards/real": 2.9361941814422607, "step": 1250 }, { "epoch": 0.4, "learning_rate": 4.809173876970487e-07, "logits/generated": 7.305488586425781, "logits/real": 8.486207962036133, "logps/generated": -1559.564208984375, "logps/real": -356.2183532714844, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/generated": -77.98143005371094, "rewards/margins": 79.35613250732422, "rewards/real": 1.374709129333496, "step": 1260 }, { "epoch": 0.41, "learning_rate": 4.80324759985777e-07, "logits/generated": 7.77454137802124, "logits/real": 6.736929893493652, "logps/generated": -1733.6539306640625, "logps/real": -384.4520263671875, "loss": 0.036, "rewards/accuracies": 1.0, "rewards/generated": -98.25541687011719, "rewards/margins": 100.38082885742188, "rewards/real": 2.1254167556762695, "step": 1270 }, { "epoch": 0.41, "learning_rate": 4.797321322745052e-07, "logits/generated": 8.091390609741211, "logits/real": 7.0590057373046875, "logps/generated": -1703.6513671875, "logps/real": -350.0121765136719, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/generated": -100.15773010253906, "rewards/margins": 104.73829650878906, "rewards/real": 4.580559253692627, "step": 1280 }, { "epoch": 0.41, "learning_rate": 4.791395045632333e-07, "logits/generated": 6.159231662750244, "logits/real": 7.0309906005859375, "logps/generated": -1432.9888916015625, "logps/real": -390.4908447265625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/generated": -71.69100189208984, "rewards/margins": 74.86009216308594, "rewards/real": 3.1690943241119385, "step": 1290 }, { "epoch": 0.42, "learning_rate": 4.785468768519616e-07, "logits/generated": 6.193885326385498, "logits/real": 7.325434684753418, "logps/generated": -1543.7459716796875, "logps/real": -430.8418884277344, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/generated": -85.52909851074219, "rewards/margins": 86.00538635253906, "rewards/real": 0.47629013657569885, "step": 1300 }, { "epoch": 0.42, "learning_rate": 4.779542491406898e-07, "logits/generated": 7.849607944488525, "logits/real": 8.128145217895508, "logps/generated": -1636.0498046875, "logps/real": -383.40130615234375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -86.2497329711914, "rewards/margins": 87.80460357666016, "rewards/real": 1.5548782348632812, "step": 1310 }, { "epoch": 0.42, "learning_rate": 4.77361621429418e-07, "logits/generated": 7.275670051574707, "logits/real": 7.423336982727051, "logps/generated": -1526.717529296875, "logps/real": -405.1219177246094, "loss": 0.0242, "rewards/accuracies": 1.0, "rewards/generated": -74.88166809082031, "rewards/margins": 76.43697357177734, "rewards/real": 1.5553076267242432, "step": 1320 }, { "epoch": 0.43, "learning_rate": 4.7676899371814624e-07, "logits/generated": 7.710183620452881, "logits/real": 7.779460906982422, "logps/generated": -1766.2353515625, "logps/real": -431.310791015625, "loss": 0.0056, "rewards/accuracies": 0.987500011920929, "rewards/generated": -100.26522064208984, "rewards/margins": 99.07828521728516, "rewards/real": -1.1869385242462158, "step": 1330 }, { "epoch": 0.43, "learning_rate": 4.7617636600687443e-07, "logits/generated": 9.235677719116211, "logits/real": 10.369451522827148, "logps/generated": -2474.69482421875, "logps/real": -497.82635498046875, "loss": 0.0452, "rewards/accuracies": 0.987500011920929, "rewards/generated": -171.40396118164062, "rewards/margins": 159.05528259277344, "rewards/real": -12.348679542541504, "step": 1340 }, { "epoch": 0.43, "learning_rate": 4.755837382956027e-07, "logits/generated": 8.521450996398926, "logits/real": 8.76947021484375, "logps/generated": -2592.934814453125, "logps/real": -437.44146728515625, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/generated": -188.50926208496094, "rewards/margins": 180.18515014648438, "rewards/real": -8.32408618927002, "step": 1350 }, { "epoch": 0.44, "learning_rate": 4.7499111058433086e-07, "logits/generated": 8.796676635742188, "logits/real": 8.608227729797363, "logps/generated": -2836.316650390625, "logps/real": -514.9736328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -209.5281982421875, "rewards/margins": 201.2119140625, "rewards/real": -8.316272735595703, "step": 1360 }, { "epoch": 0.44, "learning_rate": 4.743984828730591e-07, "logits/generated": 8.266585350036621, "logits/real": 8.26628303527832, "logps/generated": -2721.52587890625, "logps/real": -469.38677978515625, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/generated": -193.70838928222656, "rewards/margins": 188.19512939453125, "rewards/real": -5.51325798034668, "step": 1370 }, { "epoch": 0.44, "learning_rate": 4.7380585516178735e-07, "logits/generated": 7.231026649475098, "logits/real": 8.198832511901855, "logps/generated": -3111.35986328125, "logps/real": -439.9920349121094, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -228.37203979492188, "rewards/margins": 222.70742797851562, "rewards/real": -5.664615631103516, "step": 1380 }, { "epoch": 0.44, "learning_rate": 4.7321322745051554e-07, "logits/generated": 8.564990043640137, "logits/real": 8.50342082977295, "logps/generated": -2735.14990234375, "logps/real": -474.56964111328125, "loss": 0.1275, "rewards/accuracies": 0.987500011920929, "rewards/generated": -201.07533264160156, "rewards/margins": 194.74795532226562, "rewards/real": -6.327368259429932, "step": 1390 }, { "epoch": 0.45, "learning_rate": 4.726205997392438e-07, "logits/generated": 7.585750579833984, "logits/real": 5.573246955871582, "logps/generated": -1425.8900146484375, "logps/real": -358.2833557128906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -69.26428985595703, "rewards/margins": 73.16354370117188, "rewards/real": 3.8992507457733154, "step": 1400 }, { "epoch": 0.45, "learning_rate": 4.72027972027972e-07, "logits/generated": 6.801863670349121, "logits/real": 8.10703182220459, "logps/generated": -1682.92578125, "logps/real": -404.78778076171875, "loss": 0.0185, "rewards/accuracies": 0.987500011920929, "rewards/generated": -95.6246566772461, "rewards/margins": 94.83091735839844, "rewards/real": -0.7937443852424622, "step": 1410 }, { "epoch": 0.45, "learning_rate": 4.714353443167002e-07, "logits/generated": 7.464466094970703, "logits/real": 7.753902435302734, "logps/generated": -2037.3714599609375, "logps/real": -487.7086486816406, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/generated": -128.26170349121094, "rewards/margins": 121.76007080078125, "rewards/real": -6.501637935638428, "step": 1420 }, { "epoch": 0.46, "learning_rate": 4.7084271660542845e-07, "logits/generated": 6.661843299865723, "logits/real": 7.411501407623291, "logps/generated": -2050.517822265625, "logps/real": -422.6263732910156, "loss": 0.0637, "rewards/accuracies": 0.987500011920929, "rewards/generated": -126.44803619384766, "rewards/margins": 124.22566986083984, "rewards/real": -2.222370147705078, "step": 1430 }, { "epoch": 0.46, "learning_rate": 4.702500888941567e-07, "logits/generated": 7.033213138580322, "logits/real": 5.575243949890137, "logps/generated": -1966.0765380859375, "logps/real": -424.07025146484375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/generated": -118.12532806396484, "rewards/margins": 119.83488464355469, "rewards/real": 1.7095727920532227, "step": 1440 }, { "epoch": 0.46, "learning_rate": 4.696574611828849e-07, "logits/generated": 6.584895133972168, "logits/real": 4.746676921844482, "logps/generated": -1669.005126953125, "logps/real": -413.2723693847656, "loss": 0.133, "rewards/accuracies": 1.0, "rewards/generated": -86.57002258300781, "rewards/margins": 87.1453628540039, "rewards/real": 0.5753453969955444, "step": 1450 }, { "epoch": 0.47, "learning_rate": 4.690648334716131e-07, "logits/generated": 6.015892028808594, "logits/real": 3.704094409942627, "logps/generated": -1711.6673583984375, "logps/real": -454.239501953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -94.16633605957031, "rewards/margins": 93.29696655273438, "rewards/real": -0.8693593144416809, "step": 1460 }, { "epoch": 0.47, "learning_rate": 4.6847220576034137e-07, "logits/generated": 6.046257495880127, "logits/real": 4.501250267028809, "logps/generated": -1970.72265625, "logps/real": -381.8661193847656, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/generated": -117.3022689819336, "rewards/margins": 116.90910339355469, "rewards/real": -0.3931578993797302, "step": 1470 }, { "epoch": 0.47, "learning_rate": 4.6787957804906955e-07, "logits/generated": 6.744679927825928, "logits/real": 4.350931167602539, "logps/generated": -1828.2568359375, "logps/real": -401.0418395996094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -111.74235534667969, "rewards/margins": 110.8074722290039, "rewards/real": -0.9348929524421692, "step": 1480 }, { "epoch": 0.48, "learning_rate": 4.6728695033779774e-07, "logits/generated": 6.826358795166016, "logits/real": 4.718294143676758, "logps/generated": -1665.8203125, "logps/real": -419.61138916015625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/generated": -93.27421569824219, "rewards/margins": 92.86730194091797, "rewards/real": -0.40692320466041565, "step": 1490 }, { "epoch": 0.48, "learning_rate": 4.66694322626526e-07, "logits/generated": 5.406618595123291, "logits/real": 3.868990421295166, "logps/generated": -1514.18359375, "logps/real": -442.0494079589844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -80.8555908203125, "rewards/margins": 78.36438751220703, "rewards/real": -2.491199016571045, "step": 1500 }, { "epoch": 0.48, "learning_rate": 4.661016949152542e-07, "logits/generated": 5.743460655212402, "logits/real": 4.388308525085449, "logps/generated": -1806.068603515625, "logps/real": -405.2660217285156, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -98.4692153930664, "rewards/margins": 97.9910888671875, "rewards/real": -0.47812119126319885, "step": 1510 }, { "epoch": 0.49, "learning_rate": 4.655090672039824e-07, "logits/generated": 6.437834739685059, "logits/real": 5.133035182952881, "logps/generated": -1925.789794921875, "logps/real": -425.3108825683594, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/generated": -116.57472229003906, "rewards/margins": 114.18778991699219, "rewards/real": -2.3869223594665527, "step": 1520 }, { "epoch": 0.49, "learning_rate": 4.6491643949271066e-07, "logits/generated": 7.827972412109375, "logits/real": 6.223818778991699, "logps/generated": -2432.001708984375, "logps/real": -444.8968200683594, "loss": 0.0165, "rewards/accuracies": 1.0, "rewards/generated": -167.03622436523438, "rewards/margins": 161.264404296875, "rewards/real": -5.7718505859375, "step": 1530 }, { "epoch": 0.49, "learning_rate": 4.6432381178143885e-07, "logits/generated": 6.748221397399902, "logits/real": 6.071878433227539, "logps/generated": -2538.463134765625, "logps/real": -502.3291015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -182.07183837890625, "rewards/margins": 170.7902374267578, "rewards/real": -11.281615257263184, "step": 1540 }, { "epoch": 0.5, "learning_rate": 4.637311840701671e-07, "logits/generated": 6.978383541107178, "logits/real": 6.301913261413574, "logps/generated": -2651.951416015625, "logps/real": -510.86529541015625, "loss": 0.0656, "rewards/accuracies": 1.0, "rewards/generated": -190.27798461914062, "rewards/margins": 177.31613159179688, "rewards/real": -12.96185302734375, "step": 1550 }, { "epoch": 0.5, "learning_rate": 4.6313855635889533e-07, "logits/generated": 7.651279449462891, "logits/real": 6.141505718231201, "logps/generated": -2475.251708984375, "logps/real": -469.6460876464844, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/generated": -168.2834014892578, "rewards/margins": 163.4281005859375, "rewards/real": -4.8552985191345215, "step": 1560 }, { "epoch": 0.5, "learning_rate": 4.625459286476235e-07, "logits/generated": 7.951920986175537, "logits/real": 6.738547325134277, "logps/generated": -2341.68017578125, "logps/real": -401.46484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -159.2347869873047, "rewards/margins": 159.83419799804688, "rewards/real": 0.5993956327438354, "step": 1570 }, { "epoch": 0.51, "learning_rate": 4.6195330093635176e-07, "logits/generated": 6.18333625793457, "logits/real": 7.671307563781738, "logps/generated": -2618.4453125, "logps/real": -458.42669677734375, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/generated": -182.8665771484375, "rewards/margins": 175.51409912109375, "rewards/real": -7.352487087249756, "step": 1580 }, { "epoch": 0.51, "learning_rate": 4.6136067322508e-07, "logits/generated": 8.084421157836914, "logits/real": 6.96505069732666, "logps/generated": -2414.919189453125, "logps/real": -478.39129638671875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -169.3032989501953, "rewards/margins": 161.32534790039062, "rewards/real": -7.977950096130371, "step": 1590 }, { "epoch": 0.51, "learning_rate": 4.607680455138082e-07, "logits/generated": 7.2923078536987305, "logits/real": 7.166507720947266, "logps/generated": -1981.747314453125, "logps/real": -534.5364990234375, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/generated": -126.0785140991211, "rewards/margins": 117.16666412353516, "rewards/real": -8.911840438842773, "step": 1600 }, { "epoch": 0.52, "learning_rate": 4.6017541780253643e-07, "logits/generated": 7.935206413269043, "logits/real": 8.971375465393066, "logps/generated": -1730.422607421875, "logps/real": -416.55645751953125, "loss": 0.0732, "rewards/accuracies": 0.987500011920929, "rewards/generated": -95.6416244506836, "rewards/margins": 92.68531799316406, "rewards/real": -2.956296682357788, "step": 1610 }, { "epoch": 0.52, "learning_rate": 4.595827900912647e-07, "logits/generated": 7.048170566558838, "logits/real": 7.731119632720947, "logps/generated": -1589.19189453125, "logps/real": -402.20501708984375, "loss": 0.0532, "rewards/accuracies": 1.0, "rewards/generated": -82.79731750488281, "rewards/margins": 81.95187377929688, "rewards/real": -0.8454355001449585, "step": 1620 }, { "epoch": 0.52, "learning_rate": 4.5899016237999286e-07, "logits/generated": 6.504340171813965, "logits/real": 6.2024006843566895, "logps/generated": -1731.291015625, "logps/real": -380.2846984863281, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -103.094482421875, "rewards/margins": 102.10359191894531, "rewards/real": -0.9908885955810547, "step": 1630 }, { "epoch": 0.52, "learning_rate": 4.583975346687211e-07, "logits/generated": 6.782822608947754, "logits/real": 5.985361576080322, "logps/generated": -1759.400390625, "logps/real": -445.1105041503906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -98.38119506835938, "rewards/margins": 96.50190734863281, "rewards/real": -1.8792959451675415, "step": 1640 }, { "epoch": 0.53, "learning_rate": 4.5780490695744935e-07, "logits/generated": 7.261011600494385, "logits/real": 6.093020439147949, "logps/generated": -1582.3985595703125, "logps/real": -407.5309753417969, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/generated": -83.1889877319336, "rewards/margins": 84.24644470214844, "rewards/real": 1.0574579238891602, "step": 1650 }, { "epoch": 0.53, "learning_rate": 4.5721227924617754e-07, "logits/generated": 7.264209747314453, "logits/real": 6.821252346038818, "logps/generated": -1635.547119140625, "logps/real": -360.8280334472656, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -90.70443725585938, "rewards/margins": 91.89486694335938, "rewards/real": 1.1904274225234985, "step": 1660 }, { "epoch": 0.53, "learning_rate": 4.566196515349057e-07, "logits/generated": 6.224570274353027, "logits/real": 8.132176399230957, "logps/generated": -1805.053466796875, "logps/real": -388.6532287597656, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/generated": -98.39375305175781, "rewards/margins": 98.033935546875, "rewards/real": -0.35981038212776184, "step": 1670 }, { "epoch": 0.54, "learning_rate": 4.5602702382363397e-07, "logits/generated": 7.861311912536621, "logits/real": 7.087871551513672, "logps/generated": -1724.3603515625, "logps/real": -429.913330078125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -91.12671661376953, "rewards/margins": 91.84036254882812, "rewards/real": 0.7136380672454834, "step": 1680 }, { "epoch": 0.54, "learning_rate": 4.5543439611236216e-07, "logits/generated": 6.408571720123291, "logits/real": 8.383275032043457, "logps/generated": -1583.8828125, "logps/real": -397.1928405761719, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -75.3954086303711, "rewards/margins": 74.784423828125, "rewards/real": -0.6109753847122192, "step": 1690 }, { "epoch": 0.54, "learning_rate": 4.548417684010904e-07, "logits/generated": 6.631665229797363, "logits/real": 7.4394426345825195, "logps/generated": -1396.921142578125, "logps/real": -434.23828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -61.41584396362305, "rewards/margins": 60.0539436340332, "rewards/real": -1.3619048595428467, "step": 1700 }, { "epoch": 0.55, "learning_rate": 4.5424914068981864e-07, "logits/generated": 6.465456485748291, "logits/real": 7.164095878601074, "logps/generated": -1473.395751953125, "logps/real": -429.63092041015625, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/generated": -74.75288391113281, "rewards/margins": 73.91975402832031, "rewards/real": -0.8331371545791626, "step": 1710 }, { "epoch": 0.55, "learning_rate": 4.5365651297854683e-07, "logits/generated": 6.394671440124512, "logits/real": 5.954339027404785, "logps/generated": -1523.9407958984375, "logps/real": -430.29376220703125, "loss": 0.0336, "rewards/accuracies": 1.0, "rewards/generated": -73.30815124511719, "rewards/margins": 71.48025512695312, "rewards/real": -1.8279060125350952, "step": 1720 }, { "epoch": 0.55, "learning_rate": 4.5306388526727507e-07, "logits/generated": 7.5385236740112305, "logits/real": 5.001319885253906, "logps/generated": -2405.43994140625, "logps/real": -439.265869140625, "loss": 0.116, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -162.85476684570312, "rewards/margins": 158.1502685546875, "rewards/real": -4.704501152038574, "step": 1730 }, { "epoch": 0.56, "learning_rate": 4.524712575560033e-07, "logits/generated": 7.151799201965332, "logits/real": 5.976263523101807, "logps/generated": -2726.41943359375, "logps/real": -470.226806640625, "loss": 0.0834, "rewards/accuracies": 1.0, "rewards/generated": -196.91787719726562, "rewards/margins": 187.83782958984375, "rewards/real": -9.080057144165039, "step": 1740 }, { "epoch": 0.56, "learning_rate": 4.518786298447315e-07, "logits/generated": 6.134486198425293, "logits/real": 2.1602425575256348, "logps/generated": -1484.0953369140625, "logps/real": -376.6103515625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -74.49567413330078, "rewards/margins": 79.66047668457031, "rewards/real": 5.16480827331543, "step": 1750 }, { "epoch": 0.56, "learning_rate": 4.5128600213345974e-07, "logits/generated": 6.2718024253845215, "logits/real": 3.8840439319610596, "logps/generated": -1753.3333740234375, "logps/real": -368.0438537597656, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/generated": -101.2076187133789, "rewards/margins": 103.9970474243164, "rewards/real": 2.7894322872161865, "step": 1760 }, { "epoch": 0.57, "learning_rate": 4.50693374422188e-07, "logits/generated": 7.41119384765625, "logits/real": 4.549746513366699, "logps/generated": -2022.5950927734375, "logps/real": -489.665771484375, "loss": 0.0482, "rewards/accuracies": 0.987500011920929, "rewards/generated": -123.73014831542969, "rewards/margins": 117.5861587524414, "rewards/real": -6.143997669219971, "step": 1770 }, { "epoch": 0.57, "learning_rate": 4.501007467109162e-07, "logits/generated": 7.79290771484375, "logits/real": 4.867085933685303, "logps/generated": -2400.0107421875, "logps/real": -486.2530212402344, "loss": 0.0048, "rewards/accuracies": 0.987500011920929, "rewards/generated": -160.72952270507812, "rewards/margins": 152.98086547851562, "rewards/real": -7.748651027679443, "step": 1780 }, { "epoch": 0.57, "learning_rate": 4.495081189996444e-07, "logits/generated": 7.24993371963501, "logits/real": 4.015860080718994, "logps/generated": -2023.630615234375, "logps/real": -473.1576232910156, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -133.7988739013672, "rewards/margins": 127.13224792480469, "rewards/real": -6.666635036468506, "step": 1790 }, { "epoch": 0.58, "learning_rate": 4.4891549128837266e-07, "logits/generated": 7.231659889221191, "logits/real": 4.220739364624023, "logps/generated": -2135.278564453125, "logps/real": -429.91522216796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -141.5944366455078, "rewards/margins": 136.72096252441406, "rewards/real": -4.873473167419434, "step": 1800 }, { "epoch": 0.58, "learning_rate": 4.4832286357710085e-07, "logits/generated": 6.928205966949463, "logits/real": 5.240578651428223, "logps/generated": -2044.7525634765625, "logps/real": -477.6776428222656, "loss": 0.0249, "rewards/accuracies": 0.987500011920929, "rewards/generated": -135.68801879882812, "rewards/margins": 129.0793914794922, "rewards/real": -6.608643531799316, "step": 1810 }, { "epoch": 0.58, "learning_rate": 4.477302358658291e-07, "logits/generated": 7.026902675628662, "logits/real": 5.504720687866211, "logps/generated": -1736.563232421875, "logps/real": -468.8472595214844, "loss": 0.1755, "rewards/accuracies": 1.0, "rewards/generated": -103.11873626708984, "rewards/margins": 98.29622650146484, "rewards/real": -4.822514533996582, "step": 1820 }, { "epoch": 0.59, "learning_rate": 4.4713760815455733e-07, "logits/generated": 5.341125965118408, "logits/real": 4.524205207824707, "logps/generated": -1286.26513671875, "logps/real": -372.8796691894531, "loss": 0.0192, "rewards/accuracies": 1.0, "rewards/generated": -51.488983154296875, "rewards/margins": 52.87274932861328, "rewards/real": 1.3837699890136719, "step": 1830 }, { "epoch": 0.59, "learning_rate": 4.465449804432855e-07, "logits/generated": 7.118363857269287, "logits/real": 5.447571754455566, "logps/generated": -1575.646728515625, "logps/real": -391.85565185546875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/generated": -78.01790618896484, "rewards/margins": 79.09579467773438, "rewards/real": 1.077885627746582, "step": 1840 }, { "epoch": 0.59, "learning_rate": 4.459523527320137e-07, "logits/generated": 6.846796989440918, "logits/real": 4.341604709625244, "logps/generated": -1327.3349609375, "logps/real": -456.3392028808594, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/generated": -64.98074340820312, "rewards/margins": 65.35833740234375, "rewards/real": 0.3775813579559326, "step": 1850 }, { "epoch": 0.6, "learning_rate": 4.4535972502074195e-07, "logits/generated": 6.6305694580078125, "logits/real": 4.20559024810791, "logps/generated": -1358.21923828125, "logps/real": -402.3564453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -62.9098014831543, "rewards/margins": 63.326072692871094, "rewards/real": 0.4162742495536804, "step": 1860 }, { "epoch": 0.6, "learning_rate": 4.4476709730947014e-07, "logits/generated": 6.641879081726074, "logits/real": 4.711638927459717, "logps/generated": -1396.1529541015625, "logps/real": -344.8717346191406, "loss": 0.0235, "rewards/accuracies": 1.0, "rewards/generated": -64.91830444335938, "rewards/margins": 68.1943588256836, "rewards/real": 3.2760519981384277, "step": 1870 }, { "epoch": 0.6, "learning_rate": 4.441744695981984e-07, "logits/generated": 5.805679798126221, "logits/real": 3.2094268798828125, "logps/generated": -1318.959228515625, "logps/real": -404.006591796875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -60.11090087890625, "rewards/margins": 63.470970153808594, "rewards/real": 3.360062837600708, "step": 1880 }, { "epoch": 0.6, "learning_rate": 4.435818418869266e-07, "logits/generated": 5.928086280822754, "logits/real": 3.5910098552703857, "logps/generated": -1417.7279052734375, "logps/real": -324.94818115234375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -63.648597717285156, "rewards/margins": 67.14154052734375, "rewards/real": 3.4929356575012207, "step": 1890 }, { "epoch": 0.61, "learning_rate": 4.429892141756548e-07, "logits/generated": 6.485024452209473, "logits/real": 4.106749534606934, "logps/generated": -1445.7119140625, "logps/real": -328.7547912597656, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/generated": -71.72577667236328, "rewards/margins": 76.15248107910156, "rewards/real": 4.426706314086914, "step": 1900 }, { "epoch": 0.61, "learning_rate": 4.4239658646438306e-07, "logits/generated": 6.306286334991455, "logits/real": 3.622171401977539, "logps/generated": -1508.0594482421875, "logps/real": -362.8956298828125, "loss": 0.0104, "rewards/accuracies": 0.987500011920929, "rewards/generated": -72.13937377929688, "rewards/margins": 74.80626678466797, "rewards/real": 2.666901111602783, "step": 1910 }, { "epoch": 0.61, "learning_rate": 4.418039587531113e-07, "logits/generated": 6.146115303039551, "logits/real": 3.365912675857544, "logps/generated": -1893.528076171875, "logps/real": -409.42205810546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -110.10377502441406, "rewards/margins": 109.90974426269531, "rewards/real": -0.1940377652645111, "step": 1920 }, { "epoch": 0.62, "learning_rate": 4.412113310418395e-07, "logits/generated": 7.063846588134766, "logits/real": 3.7589688301086426, "logps/generated": -1758.856689453125, "logps/real": -416.207275390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -100.57762145996094, "rewards/margins": 99.22016143798828, "rewards/real": -1.3574641942977905, "step": 1930 }, { "epoch": 0.62, "learning_rate": 4.4061870333056773e-07, "logits/generated": 6.8724260330200195, "logits/real": 3.150197744369507, "logps/generated": -1649.0625, "logps/real": -399.8758850097656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -87.595458984375, "rewards/margins": 87.64045715332031, "rewards/real": 0.04500775411725044, "step": 1940 }, { "epoch": 0.62, "learning_rate": 4.4002607561929597e-07, "logits/generated": 7.001265048980713, "logits/real": 3.438425064086914, "logps/generated": -1636.22119140625, "logps/real": -425.7616271972656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -94.06513977050781, "rewards/margins": 96.71174621582031, "rewards/real": 2.6466081142425537, "step": 1950 }, { "epoch": 0.63, "learning_rate": 4.3943344790802416e-07, "logits/generated": 6.962561130523682, "logits/real": 3.6104698181152344, "logps/generated": -1629.7708740234375, "logps/real": -356.288818359375, "loss": 0.0835, "rewards/accuracies": 0.987500011920929, "rewards/generated": -81.76075744628906, "rewards/margins": 85.72692108154297, "rewards/real": 3.966153621673584, "step": 1960 }, { "epoch": 0.63, "learning_rate": 4.388408201967524e-07, "logits/generated": 6.078118801116943, "logits/real": 3.798320770263672, "logps/generated": -1572.626220703125, "logps/real": -354.7767333984375, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/generated": -81.60746765136719, "rewards/margins": 86.85923767089844, "rewards/real": 5.25177001953125, "step": 1970 }, { "epoch": 0.63, "learning_rate": 4.3824819248548064e-07, "logits/generated": 7.424140930175781, "logits/real": 4.554934024810791, "logps/generated": -1562.614990234375, "logps/real": -398.14495849609375, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/generated": -80.14607238769531, "rewards/margins": 82.84638214111328, "rewards/real": 2.700307607650757, "step": 1980 }, { "epoch": 0.64, "learning_rate": 4.3765556477420883e-07, "logits/generated": 7.725550651550293, "logits/real": 4.81028938293457, "logps/generated": -1647.4967041015625, "logps/real": -370.8022155761719, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -88.14925384521484, "rewards/margins": 89.777099609375, "rewards/real": 1.627847671508789, "step": 1990 }, { "epoch": 0.64, "learning_rate": 4.3706293706293707e-07, "logits/generated": 6.470284938812256, "logits/real": 3.723733901977539, "logps/generated": -1804.39453125, "logps/real": -376.9502258300781, "loss": 0.0542, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -99.6127700805664, "rewards/margins": 102.87458801269531, "rewards/real": 3.2618141174316406, "step": 2000 }, { "epoch": 0.64, "learning_rate": 4.364703093516653e-07, "logits/generated": 6.4981369972229, "logits/real": 4.045376777648926, "logps/generated": -1662.384033203125, "logps/real": -343.97772216796875, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/generated": -89.25303649902344, "rewards/margins": 94.84370422363281, "rewards/real": 5.590662956237793, "step": 2010 }, { "epoch": 0.65, "learning_rate": 4.3587768164039345e-07, "logits/generated": 6.971539497375488, "logits/real": 3.9020907878875732, "logps/generated": -1603.117431640625, "logps/real": -406.10455322265625, "loss": 0.0539, "rewards/accuracies": 0.987500011920929, "rewards/generated": -86.11524200439453, "rewards/margins": 89.37966918945312, "rewards/real": 3.264432430267334, "step": 2020 }, { "epoch": 0.65, "learning_rate": 4.352850539291217e-07, "logits/generated": 7.180318355560303, "logits/real": 4.499087333679199, "logps/generated": -1575.615478515625, "logps/real": -362.96441650390625, "loss": 0.0262, "rewards/accuracies": 1.0, "rewards/generated": -82.51438903808594, "rewards/margins": 85.6214599609375, "rewards/real": 3.1070730686187744, "step": 2030 }, { "epoch": 0.65, "learning_rate": 4.346924262178499e-07, "logits/generated": 6.130655288696289, "logits/real": 3.522177219390869, "logps/generated": -1633.759521484375, "logps/real": -373.85687255859375, "loss": 0.0084, "rewards/accuracies": 0.987500011920929, "rewards/generated": -86.10863494873047, "rewards/margins": 88.05864715576172, "rewards/real": 1.9500116109848022, "step": 2040 }, { "epoch": 0.66, "learning_rate": 4.340997985065781e-07, "logits/generated": 7.123037815093994, "logits/real": 2.991910457611084, "logps/generated": -1619.687744140625, "logps/real": -402.8570861816406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -83.2574234008789, "rewards/margins": 85.04276275634766, "rewards/real": 1.7853460311889648, "step": 2050 }, { "epoch": 0.66, "learning_rate": 4.3350717079530637e-07, "logits/generated": 6.764345645904541, "logits/real": 3.0377116203308105, "logps/generated": -1475.3280029296875, "logps/real": -416.43402099609375, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/generated": -72.91661071777344, "rewards/margins": 77.6825942993164, "rewards/real": 4.765993118286133, "step": 2060 }, { "epoch": 0.66, "learning_rate": 4.3291454308403455e-07, "logits/generated": 5.997592926025391, "logits/real": 2.493969440460205, "logps/generated": -1558.3643798828125, "logps/real": -322.02911376953125, "loss": 0.0313, "rewards/accuracies": 0.987500011920929, "rewards/generated": -73.19859313964844, "rewards/margins": 79.51622009277344, "rewards/real": 6.317626953125, "step": 2070 }, { "epoch": 0.67, "learning_rate": 4.323219153727628e-07, "logits/generated": 5.551691055297852, "logits/real": 2.2926106452941895, "logps/generated": -1455.988037109375, "logps/real": -357.25421142578125, "loss": 0.0253, "rewards/accuracies": 1.0, "rewards/generated": -74.82169342041016, "rewards/margins": 80.64076232910156, "rewards/real": 5.8190813064575195, "step": 2080 }, { "epoch": 0.67, "learning_rate": 4.3172928766149104e-07, "logits/generated": 6.637547492980957, "logits/real": 2.304105758666992, "logps/generated": -1605.7271728515625, "logps/real": -392.0152893066406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -86.64134216308594, "rewards/margins": 92.24010467529297, "rewards/real": 5.598758220672607, "step": 2090 }, { "epoch": 0.67, "learning_rate": 4.3113665995021923e-07, "logits/generated": 6.272039413452148, "logits/real": 2.820873260498047, "logps/generated": -1447.038818359375, "logps/real": -379.21258544921875, "loss": 0.0198, "rewards/accuracies": 0.987500011920929, "rewards/generated": -73.52703857421875, "rewards/margins": 78.1285629272461, "rewards/real": 4.601529598236084, "step": 2100 }, { "epoch": 0.68, "learning_rate": 4.3054403223894747e-07, "logits/generated": 6.534933567047119, "logits/real": 3.3231303691864014, "logps/generated": -1555.7955322265625, "logps/real": -369.7391052246094, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -82.95806884765625, "rewards/margins": 88.36368560791016, "rewards/real": 5.40561580657959, "step": 2110 }, { "epoch": 0.68, "learning_rate": 4.299514045276757e-07, "logits/generated": 5.455320358276367, "logits/real": 2.8272032737731934, "logps/generated": -1667.7991943359375, "logps/real": -326.4131774902344, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/generated": -90.74116516113281, "rewards/margins": 94.6767807006836, "rewards/real": 3.9356207847595215, "step": 2120 }, { "epoch": 0.68, "learning_rate": 4.293587768164039e-07, "logits/generated": 7.863619327545166, "logits/real": 4.736595630645752, "logps/generated": -1923.447509765625, "logps/real": -419.0160217285156, "loss": 0.0275, "rewards/accuracies": 0.987500011920929, "rewards/generated": -118.23887634277344, "rewards/margins": 115.51255798339844, "rewards/real": -2.7263171672821045, "step": 2130 }, { "epoch": 0.68, "learning_rate": 4.2876614910513214e-07, "logits/generated": 7.507322788238525, "logits/real": 6.206465721130371, "logps/generated": -2218.25, "logps/real": -522.2913818359375, "loss": 0.0046, "rewards/accuracies": 0.987500011920929, "rewards/generated": -154.6689453125, "rewards/margins": 138.54177856445312, "rewards/real": -16.127178192138672, "step": 2140 }, { "epoch": 0.69, "learning_rate": 4.281735213938604e-07, "logits/generated": 7.527505397796631, "logits/real": 6.246472358703613, "logps/generated": -2437.65673828125, "logps/real": -529.3317260742188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -162.62030029296875, "rewards/margins": 149.6444549560547, "rewards/real": -12.975835800170898, "step": 2150 }, { "epoch": 0.69, "learning_rate": 4.2758089368258857e-07, "logits/generated": 8.30752182006836, "logits/real": 6.466645240783691, "logps/generated": -2316.99951171875, "logps/real": -559.2007446289062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -157.75094604492188, "rewards/margins": 144.11534118652344, "rewards/real": -13.635614395141602, "step": 2160 }, { "epoch": 0.69, "learning_rate": 4.269882659713168e-07, "logits/generated": 8.329998016357422, "logits/real": 6.1490092277526855, "logps/generated": -2423.94482421875, "logps/real": -607.797119140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -165.50381469726562, "rewards/margins": 152.3792724609375, "rewards/real": -13.124551773071289, "step": 2170 }, { "epoch": 0.7, "learning_rate": 4.2639563826004506e-07, "logits/generated": 8.2555513381958, "logits/real": 6.447617530822754, "logps/generated": -2261.85888671875, "logps/real": -539.4599609375, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/generated": -155.73788452148438, "rewards/margins": 140.4088897705078, "rewards/real": -15.328999519348145, "step": 2180 }, { "epoch": 0.7, "learning_rate": 4.2580301054877325e-07, "logits/generated": 7.219870090484619, "logits/real": 6.3502116203308105, "logps/generated": -2281.720458984375, "logps/real": -575.0437622070312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -155.9979248046875, "rewards/margins": 140.00108337402344, "rewards/real": -15.996847152709961, "step": 2190 }, { "epoch": 0.7, "learning_rate": 4.2521038283750143e-07, "logits/generated": 8.33784008026123, "logits/real": 6.53687047958374, "logps/generated": -2249.629638671875, "logps/real": -520.8570556640625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -150.51608276367188, "rewards/margins": 139.0610809326172, "rewards/real": -11.454991340637207, "step": 2200 }, { "epoch": 0.71, "learning_rate": 4.246177551262297e-07, "logits/generated": 8.54228401184082, "logits/real": 6.26255464553833, "logps/generated": -2219.932861328125, "logps/real": -571.289794921875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -151.71279907226562, "rewards/margins": 139.66696166992188, "rewards/real": -12.045828819274902, "step": 2210 }, { "epoch": 0.71, "learning_rate": 4.2402512741495787e-07, "logits/generated": 8.038786888122559, "logits/real": 6.717850685119629, "logps/generated": -2329.355712890625, "logps/real": -511.0064392089844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -160.65087890625, "rewards/margins": 147.8884735107422, "rewards/real": -12.762407302856445, "step": 2220 }, { "epoch": 0.71, "learning_rate": 4.234324997036861e-07, "logits/generated": 8.91793155670166, "logits/real": 5.9069318771362305, "logps/generated": -2339.47607421875, "logps/real": -484.7416076660156, "loss": 0.0631, "rewards/accuracies": 1.0, "rewards/generated": -155.7845916748047, "rewards/margins": 147.94015502929688, "rewards/real": -7.8444342613220215, "step": 2230 }, { "epoch": 0.72, "learning_rate": 4.2283987199241435e-07, "logits/generated": 8.031610488891602, "logits/real": 6.5913519859313965, "logps/generated": -1735.3453369140625, "logps/real": -397.38330078125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -90.72663879394531, "rewards/margins": 92.34837341308594, "rewards/real": 1.6217377185821533, "step": 2240 }, { "epoch": 0.72, "learning_rate": 4.2224724428114254e-07, "logits/generated": 7.496441841125488, "logits/real": 6.460533142089844, "logps/generated": -1728.5865478515625, "logps/real": -374.142333984375, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/generated": -102.20568084716797, "rewards/margins": 103.11724853515625, "rewards/real": 0.9115570783615112, "step": 2250 }, { "epoch": 0.72, "learning_rate": 4.216546165698708e-07, "logits/generated": 8.06144905090332, "logits/real": 6.671972751617432, "logps/generated": -1940.5015869140625, "logps/real": -432.64794921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -122.03468322753906, "rewards/margins": 121.94331359863281, "rewards/real": -0.09136152267456055, "step": 2260 }, { "epoch": 0.73, "learning_rate": 4.21061988858599e-07, "logits/generated": 7.962967872619629, "logits/real": 6.657961845397949, "logps/generated": -2204.759033203125, "logps/real": -387.04034423828125, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/generated": -148.03817749023438, "rewards/margins": 147.7255401611328, "rewards/real": -0.31262603402137756, "step": 2270 }, { "epoch": 0.73, "learning_rate": 4.204693611473272e-07, "logits/generated": 8.462556838989258, "logits/real": 6.342984199523926, "logps/generated": -2474.34228515625, "logps/real": -480.97259521484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -177.25599670410156, "rewards/margins": 167.22470092773438, "rewards/real": -10.031294822692871, "step": 2280 }, { "epoch": 0.73, "learning_rate": 4.1987673343605545e-07, "logits/generated": 8.898576736450195, "logits/real": 6.45833683013916, "logps/generated": -2734.031005859375, "logps/real": -523.716064453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -195.55360412597656, "rewards/margins": 183.6600799560547, "rewards/real": -11.893523216247559, "step": 2290 }, { "epoch": 0.74, "learning_rate": 4.192841057247837e-07, "logits/generated": 7.585541725158691, "logits/real": 6.641376495361328, "logps/generated": -2549.35009765625, "logps/real": -507.84857177734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -180.36965942382812, "rewards/margins": 168.60366821289062, "rewards/real": -11.765992164611816, "step": 2300 }, { "epoch": 0.74, "learning_rate": 4.186914780135119e-07, "logits/generated": 8.432327270507812, "logits/real": 6.6050310134887695, "logps/generated": -2526.220703125, "logps/real": -584.2586669921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -176.08438110351562, "rewards/margins": 162.0506134033203, "rewards/real": -14.033808708190918, "step": 2310 }, { "epoch": 0.74, "learning_rate": 4.180988503022401e-07, "logits/generated": 8.032669067382812, "logits/real": 6.313161373138428, "logps/generated": -2716.517822265625, "logps/real": -493.9371643066406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -197.44577026367188, "rewards/margins": 185.5958251953125, "rewards/real": -11.849949836730957, "step": 2320 }, { "epoch": 0.75, "learning_rate": 4.1750622259096837e-07, "logits/generated": 9.379237174987793, "logits/real": 6.628178596496582, "logps/generated": -2601.35546875, "logps/real": -549.10888671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -187.670166015625, "rewards/margins": 174.0565643310547, "rewards/real": -13.613619804382324, "step": 2330 }, { "epoch": 0.75, "learning_rate": 4.1691359487969656e-07, "logits/generated": 8.686020851135254, "logits/real": 7.33512020111084, "logps/generated": -2386.8828125, "logps/real": -491.61883544921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -165.7841033935547, "rewards/margins": 156.48590087890625, "rewards/real": -9.298215866088867, "step": 2340 }, { "epoch": 0.75, "learning_rate": 4.163209671684248e-07, "logits/generated": 8.934557914733887, "logits/real": 6.8540215492248535, "logps/generated": -2766.52587890625, "logps/real": -488.03228759765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -202.7220916748047, "rewards/margins": 192.42478942871094, "rewards/real": -10.297308921813965, "step": 2350 }, { "epoch": 0.76, "learning_rate": 4.1572833945715304e-07, "logits/generated": 8.042403221130371, "logits/real": 5.955174446105957, "logps/generated": -2409.816162109375, "logps/real": -622.64501953125, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/generated": -167.20181274414062, "rewards/margins": 149.732177734375, "rewards/real": -17.46963119506836, "step": 2360 }, { "epoch": 0.76, "learning_rate": 4.1513571174588123e-07, "logits/generated": 7.378883361816406, "logits/real": 4.417344570159912, "logps/generated": -2050.104248046875, "logps/real": -444.76116943359375, "loss": 0.1787, "rewards/accuracies": 0.987500011920929, "rewards/generated": -127.433837890625, "rewards/margins": 123.74485778808594, "rewards/real": -3.6890053749084473, "step": 2370 }, { "epoch": 0.76, "learning_rate": 4.145430840346094e-07, "logits/generated": 6.24300479888916, "logits/real": 4.903298854827881, "logps/generated": -1765.8125, "logps/real": -331.2713928222656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -105.06922912597656, "rewards/margins": 109.94560241699219, "rewards/real": 4.876380920410156, "step": 2380 }, { "epoch": 0.76, "learning_rate": 4.1395045632333766e-07, "logits/generated": 6.882040500640869, "logits/real": 5.327009677886963, "logps/generated": -2033.096435546875, "logps/real": -356.7655334472656, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/generated": -128.7213592529297, "rewards/margins": 132.7244110107422, "rewards/real": 4.003042697906494, "step": 2390 }, { "epoch": 0.77, "learning_rate": 4.1335782861206585e-07, "logits/generated": 7.003479957580566, "logits/real": 4.670861721038818, "logps/generated": -1902.3763427734375, "logps/real": -400.540771484375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -117.39128112792969, "rewards/margins": 118.16578674316406, "rewards/real": 0.7745150327682495, "step": 2400 }, { "epoch": 0.77, "learning_rate": 4.127652009007941e-07, "logits/generated": 7.714796543121338, "logits/real": 4.497018337249756, "logps/generated": -2319.64501953125, "logps/real": -414.4706115722656, "loss": 0.0368, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -154.17709350585938, "rewards/margins": 155.20709228515625, "rewards/real": 1.0300065279006958, "step": 2410 }, { "epoch": 0.77, "learning_rate": 4.1217257318952233e-07, "logits/generated": 6.9251203536987305, "logits/real": 3.8276195526123047, "logps/generated": -1543.0440673828125, "logps/real": -448.8999938964844, "loss": 0.014, "rewards/accuracies": 0.987500011920929, "rewards/generated": -84.08116149902344, "rewards/margins": 85.13139343261719, "rewards/real": 1.0502389669418335, "step": 2420 }, { "epoch": 0.78, "learning_rate": 4.115799454782505e-07, "logits/generated": 5.472304344177246, "logits/real": 3.6805262565612793, "logps/generated": -1557.1878662109375, "logps/real": -356.34503173828125, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/generated": -78.15277862548828, "rewards/margins": 81.7288589477539, "rewards/real": 3.576084852218628, "step": 2430 }, { "epoch": 0.78, "learning_rate": 4.1098731776697876e-07, "logits/generated": 7.142279624938965, "logits/real": 4.758624076843262, "logps/generated": -1534.67724609375, "logps/real": -401.4478759765625, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/generated": -75.74860382080078, "rewards/margins": 78.55433654785156, "rewards/real": 2.8057284355163574, "step": 2440 }, { "epoch": 0.78, "learning_rate": 4.10394690055707e-07, "logits/generated": 7.1053338050842285, "logits/real": 5.054806709289551, "logps/generated": -1531.07763671875, "logps/real": -389.58673095703125, "loss": 0.0151, "rewards/accuracies": 1.0, "rewards/generated": -79.07850646972656, "rewards/margins": 83.4716567993164, "rewards/real": 4.39314603805542, "step": 2450 }, { "epoch": 0.79, "learning_rate": 4.098020623444352e-07, "logits/generated": 7.784437656402588, "logits/real": 5.0880937576293945, "logps/generated": -1685.110595703125, "logps/real": -325.0840148925781, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/generated": -90.38352966308594, "rewards/margins": 95.30231475830078, "rewards/real": 4.91878604888916, "step": 2460 }, { "epoch": 0.79, "learning_rate": 4.0920943463316344e-07, "logits/generated": 7.17263126373291, "logits/real": 5.019092559814453, "logps/generated": -1525.548583984375, "logps/real": -482.87823486328125, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/generated": -77.80412292480469, "rewards/margins": 76.81108093261719, "rewards/real": -0.9930347204208374, "step": 2470 }, { "epoch": 0.79, "learning_rate": 4.086168069218917e-07, "logits/generated": 6.884474754333496, "logits/real": 4.759303092956543, "logps/generated": -1590.6064453125, "logps/real": -388.37322998046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -85.10469055175781, "rewards/margins": 86.87979888916016, "rewards/real": 1.7751047611236572, "step": 2480 }, { "epoch": 0.8, "learning_rate": 4.0802417921061987e-07, "logits/generated": 7.089468955993652, "logits/real": 4.742917537689209, "logps/generated": -1664.8173828125, "logps/real": -340.1806335449219, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -94.84940338134766, "rewards/margins": 97.1686019897461, "rewards/real": 2.3192145824432373, "step": 2490 }, { "epoch": 0.8, "learning_rate": 4.074315514993481e-07, "logits/generated": 6.968369483947754, "logits/real": 4.612686634063721, "logps/generated": -1566.339111328125, "logps/real": -398.97418212890625, "loss": 0.01, "rewards/accuracies": 1.0, "rewards/generated": -83.38458251953125, "rewards/margins": 84.9498062133789, "rewards/real": 1.5652233362197876, "step": 2500 }, { "epoch": 0.8, "learning_rate": 4.0683892378807635e-07, "logits/generated": 7.161648750305176, "logits/real": 6.543487548828125, "logps/generated": -1733.8109130859375, "logps/real": -392.29083251953125, "loss": 0.0232, "rewards/accuracies": 0.987500011920929, "rewards/generated": -93.97492980957031, "rewards/margins": 94.35612487792969, "rewards/real": 0.38121718168258667, "step": 2510 }, { "epoch": 0.81, "learning_rate": 4.0624629607680454e-07, "logits/generated": 7.949463844299316, "logits/real": 6.042777061462402, "logps/generated": -1825.9849853515625, "logps/real": -409.8646545410156, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/generated": -110.38475036621094, "rewards/margins": 112.93035888671875, "rewards/real": 2.5455965995788574, "step": 2520 }, { "epoch": 0.81, "learning_rate": 4.056536683655328e-07, "logits/generated": 6.9065842628479, "logits/real": 6.891690254211426, "logps/generated": -1696.6517333984375, "logps/real": -475.15228271484375, "loss": 0.0195, "rewards/accuracies": 1.0, "rewards/generated": -97.05827331542969, "rewards/margins": 96.46576690673828, "rewards/real": -0.5925186276435852, "step": 2530 }, { "epoch": 0.81, "learning_rate": 4.05061040654261e-07, "logits/generated": 8.263236999511719, "logits/real": 8.052950859069824, "logps/generated": -1747.196533203125, "logps/real": -445.8377380371094, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/generated": -99.40019226074219, "rewards/margins": 96.42891693115234, "rewards/real": -2.97127366065979, "step": 2540 }, { "epoch": 0.82, "learning_rate": 4.044684129429892e-07, "logits/generated": 8.327539443969727, "logits/real": 8.985136032104492, "logps/generated": -2089.04248046875, "logps/real": -423.9291076660156, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -133.99082946777344, "rewards/margins": 133.5138397216797, "rewards/real": -0.4769778251647949, "step": 2550 }, { "epoch": 0.82, "learning_rate": 4.038757852317174e-07, "logits/generated": 7.514575958251953, "logits/real": 8.398503303527832, "logps/generated": -1924.1943359375, "logps/real": -432.2705993652344, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/generated": -112.48478698730469, "rewards/margins": 111.21502685546875, "rewards/real": -1.2697560787200928, "step": 2560 }, { "epoch": 0.82, "learning_rate": 4.032831575204456e-07, "logits/generated": 8.133955001831055, "logits/real": 8.772761344909668, "logps/generated": -1754.6097412109375, "logps/real": -435.04638671875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -94.4659423828125, "rewards/margins": 93.41787719726562, "rewards/real": -1.0480563640594482, "step": 2570 }, { "epoch": 0.83, "learning_rate": 4.0269052980917383e-07, "logits/generated": 8.068957328796387, "logits/real": 7.656062126159668, "logps/generated": -1774.894775390625, "logps/real": -405.76434326171875, "loss": 0.0197, "rewards/accuracies": 0.987500011920929, "rewards/generated": -107.682861328125, "rewards/margins": 106.41868591308594, "rewards/real": -1.2641700506210327, "step": 2580 }, { "epoch": 0.83, "learning_rate": 4.0209790209790207e-07, "logits/generated": 7.125296115875244, "logits/real": 5.822979927062988, "logps/generated": -1805.764892578125, "logps/real": -402.42578125, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/generated": -99.54673767089844, "rewards/margins": 100.21654510498047, "rewards/real": 0.66978919506073, "step": 2590 }, { "epoch": 0.83, "learning_rate": 4.0150527438663026e-07, "logits/generated": 7.0955352783203125, "logits/real": 5.060500144958496, "logps/generated": -2041.953125, "logps/real": -353.2672119140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -134.10455322265625, "rewards/margins": 135.74862670898438, "rewards/real": 1.6440492868423462, "step": 2600 }, { "epoch": 0.84, "learning_rate": 4.009126466753585e-07, "logits/generated": 7.4304399490356445, "logits/real": 4.976125240325928, "logps/generated": -1875.729736328125, "logps/real": -444.6436462402344, "loss": 0.0086, "rewards/accuracies": 0.987500011920929, "rewards/generated": -109.2206039428711, "rewards/margins": 107.97645568847656, "rewards/real": -1.244168996810913, "step": 2610 }, { "epoch": 0.84, "learning_rate": 4.0032001896408675e-07, "logits/generated": 7.609724521636963, "logits/real": 6.377804279327393, "logps/generated": -1885.1090087890625, "logps/real": -423.56854248046875, "loss": 0.0226, "rewards/accuracies": 1.0, "rewards/generated": -116.46875, "rewards/margins": 115.512451171875, "rewards/real": -0.9562975168228149, "step": 2620 }, { "epoch": 0.84, "learning_rate": 3.9972739125281494e-07, "logits/generated": 7.7724761962890625, "logits/real": 6.130133628845215, "logps/generated": -1704.9449462890625, "logps/real": -483.021240234375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -99.24137115478516, "rewards/margins": 93.52515411376953, "rewards/real": -5.7162065505981445, "step": 2630 }, { "epoch": 0.84, "learning_rate": 3.991347635415432e-07, "logits/generated": 7.229278564453125, "logits/real": 6.683267116546631, "logps/generated": -1982.401611328125, "logps/real": -384.04022216796875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/generated": -121.3022689819336, "rewards/margins": 118.17756652832031, "rewards/real": -3.1246984004974365, "step": 2640 }, { "epoch": 0.85, "learning_rate": 3.985421358302714e-07, "logits/generated": 7.445556640625, "logits/real": 6.822421073913574, "logps/generated": -1919.6865234375, "logps/real": -408.66864013671875, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -118.59580993652344, "rewards/margins": 115.75093841552734, "rewards/real": -2.8448777198791504, "step": 2650 }, { "epoch": 0.85, "learning_rate": 3.979495081189996e-07, "logits/generated": 7.230729579925537, "logits/real": 6.410356044769287, "logps/generated": -1971.067626953125, "logps/real": -425.5, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -119.92021179199219, "rewards/margins": 118.44563293457031, "rewards/real": -1.4745844602584839, "step": 2660 }, { "epoch": 0.85, "learning_rate": 3.9735688040772785e-07, "logits/generated": 7.504670143127441, "logits/real": 6.936696529388428, "logps/generated": -2039.7376708984375, "logps/real": -457.81982421875, "loss": 0.0598, "rewards/accuracies": 0.987500011920929, "rewards/generated": -128.49566650390625, "rewards/margins": 126.29817962646484, "rewards/real": -2.1974940299987793, "step": 2670 }, { "epoch": 0.86, "learning_rate": 3.967642526964561e-07, "logits/generated": 7.429982662200928, "logits/real": 5.989014148712158, "logps/generated": -1936.280517578125, "logps/real": -429.1781311035156, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -114.4945068359375, "rewards/margins": 113.2079086303711, "rewards/real": -1.2866039276123047, "step": 2680 }, { "epoch": 0.86, "learning_rate": 3.961716249851843e-07, "logits/generated": 6.5781049728393555, "logits/real": 4.8761515617370605, "logps/generated": -1851.1253662109375, "logps/real": -467.67620849609375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -110.03443908691406, "rewards/margins": 107.62968444824219, "rewards/real": -2.404752731323242, "step": 2690 }, { "epoch": 0.86, "learning_rate": 3.955789972739125e-07, "logits/generated": 7.270517826080322, "logits/real": 4.675835609436035, "logps/generated": -2189.3427734375, "logps/real": -469.34320068359375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/generated": -145.4514923095703, "rewards/margins": 139.77618408203125, "rewards/real": -5.6753106117248535, "step": 2700 }, { "epoch": 0.87, "learning_rate": 3.9498636956264076e-07, "logits/generated": 7.215243339538574, "logits/real": 5.138672828674316, "logps/generated": -2254.13916015625, "logps/real": -492.1170349121094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -155.18048095703125, "rewards/margins": 145.93588256835938, "rewards/real": -9.244604110717773, "step": 2710 }, { "epoch": 0.87, "learning_rate": 3.9439374185136895e-07, "logits/generated": 6.797721862792969, "logits/real": 5.2962327003479, "logps/generated": -2622.736328125, "logps/real": -477.12841796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -181.09841918945312, "rewards/margins": 171.22848510742188, "rewards/real": -9.869913101196289, "step": 2720 }, { "epoch": 0.87, "learning_rate": 3.9380111414009714e-07, "logits/generated": 5.894318103790283, "logits/real": 6.219625473022461, "logps/generated": -2491.125244140625, "logps/real": -444.674072265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -171.95787048339844, "rewards/margins": 164.18603515625, "rewards/real": -7.771859169006348, "step": 2730 }, { "epoch": 0.88, "learning_rate": 3.932084864288254e-07, "logits/generated": 8.46430778503418, "logits/real": 7.4018683433532715, "logps/generated": -2230.9892578125, "logps/real": -431.27471923828125, "loss": 0.0379, "rewards/accuracies": 1.0, "rewards/generated": -145.44973754882812, "rewards/margins": 139.92214965820312, "rewards/real": -5.5276079177856445, "step": 2740 }, { "epoch": 0.88, "learning_rate": 3.9261585871755357e-07, "logits/generated": 8.02825927734375, "logits/real": 7.3891167640686035, "logps/generated": -2273.37158203125, "logps/real": -447.066162109375, "loss": 0.0293, "rewards/accuracies": 0.987500011920929, "rewards/generated": -153.27127075195312, "rewards/margins": 150.23300170898438, "rewards/real": -3.0382723808288574, "step": 2750 }, { "epoch": 0.88, "learning_rate": 3.920232310062818e-07, "logits/generated": 6.972275733947754, "logits/real": 7.739026069641113, "logps/generated": -2320.798828125, "logps/real": -459.5245666503906, "loss": 0.0407, "rewards/accuracies": 1.0, "rewards/generated": -152.60403442382812, "rewards/margins": 146.185791015625, "rewards/real": -6.418251037597656, "step": 2760 }, { "epoch": 0.89, "learning_rate": 3.9143060329501006e-07, "logits/generated": 8.222471237182617, "logits/real": 7.6385674476623535, "logps/generated": -2186.031982421875, "logps/real": -395.16119384765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -135.86843872070312, "rewards/margins": 135.62330627441406, "rewards/real": -0.24513721466064453, "step": 2770 }, { "epoch": 0.89, "learning_rate": 3.9083797558373825e-07, "logits/generated": 6.6677069664001465, "logits/real": 7.318799018859863, "logps/generated": -2265.687744140625, "logps/real": -371.3304443359375, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/generated": -148.7872314453125, "rewards/margins": 148.88082885742188, "rewards/real": 0.09360866248607635, "step": 2780 }, { "epoch": 0.89, "learning_rate": 3.902453478724665e-07, "logits/generated": 7.819890022277832, "logits/real": 7.978717803955078, "logps/generated": -2049.8525390625, "logps/real": -496.234619140625, "loss": 0.0332, "rewards/accuracies": 1.0, "rewards/generated": -133.93821716308594, "rewards/margins": 128.24244689941406, "rewards/real": -5.695760250091553, "step": 2790 }, { "epoch": 0.9, "learning_rate": 3.8965272016119473e-07, "logits/generated": 7.854443550109863, "logits/real": 8.5520658493042, "logps/generated": -2509.93603515625, "logps/real": -497.317138671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -168.92526245117188, "rewards/margins": 158.01710510253906, "rewards/real": -10.908143997192383, "step": 2800 }, { "epoch": 0.9, "learning_rate": 3.890600924499229e-07, "logits/generated": 7.876428127288818, "logits/real": 8.051704406738281, "logps/generated": -2154.73779296875, "logps/real": -454.4645080566406, "loss": 0.0398, "rewards/accuracies": 1.0, "rewards/generated": -135.34112548828125, "rewards/margins": 130.8585662841797, "rewards/real": -4.4825592041015625, "step": 2810 }, { "epoch": 0.9, "learning_rate": 3.8846746473865116e-07, "logits/generated": 7.247229099273682, "logits/real": 8.76618480682373, "logps/generated": -2001.330810546875, "logps/real": -429.25665283203125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/generated": -120.38185119628906, "rewards/margins": 116.9828109741211, "rewards/real": -3.3990395069122314, "step": 2820 }, { "epoch": 0.91, "learning_rate": 3.878748370273794e-07, "logits/generated": 8.500656127929688, "logits/real": 7.6904778480529785, "logps/generated": -2105.495849609375, "logps/real": -479.367431640625, "loss": 0.0089, "rewards/accuracies": 0.987500011920929, "rewards/generated": -127.18016052246094, "rewards/margins": 122.7814712524414, "rewards/real": -4.398708820343018, "step": 2830 }, { "epoch": 0.91, "learning_rate": 3.872822093161076e-07, "logits/generated": 7.388060092926025, "logits/real": 6.007934093475342, "logps/generated": -1970.892822265625, "logps/real": -428.55572509765625, "loss": 0.0648, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -122.74332427978516, "rewards/margins": 122.36564636230469, "rewards/real": -0.37767037749290466, "step": 2840 }, { "epoch": 0.91, "learning_rate": 3.8668958160483583e-07, "logits/generated": 6.678569793701172, "logits/real": 6.136555194854736, "logps/generated": -1821.3310546875, "logps/real": -398.6175842285156, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/generated": -109.9161605834961, "rewards/margins": 110.63786315917969, "rewards/real": 0.7217038869857788, "step": 2850 }, { "epoch": 0.92, "learning_rate": 3.860969538935641e-07, "logits/generated": 7.2053937911987305, "logits/real": 6.368350028991699, "logps/generated": -1930.046630859375, "logps/real": -428.8641052246094, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/generated": -117.28592681884766, "rewards/margins": 115.02290344238281, "rewards/real": -2.2630252838134766, "step": 2860 }, { "epoch": 0.92, "learning_rate": 3.8550432618229226e-07, "logits/generated": 6.71401834487915, "logits/real": 5.5848774909973145, "logps/generated": -2074.6552734375, "logps/real": -441.4336853027344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -134.6691131591797, "rewards/margins": 131.0623321533203, "rewards/real": -3.606782913208008, "step": 2870 }, { "epoch": 0.92, "learning_rate": 3.849116984710205e-07, "logits/generated": 7.437613010406494, "logits/real": 6.52271032333374, "logps/generated": -2107.801513671875, "logps/real": -443.77947998046875, "loss": 0.0201, "rewards/accuracies": 0.987500011920929, "rewards/generated": -134.4829864501953, "rewards/margins": 130.29397583007812, "rewards/real": -4.189007759094238, "step": 2880 }, { "epoch": 0.92, "learning_rate": 3.8431907075974875e-07, "logits/generated": 7.219000339508057, "logits/real": 5.966986179351807, "logps/generated": -1900.358642578125, "logps/real": -489.49969482421875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -116.76822662353516, "rewards/margins": 110.37727355957031, "rewards/real": -6.3909406661987305, "step": 2890 }, { "epoch": 0.93, "learning_rate": 3.8372644304847694e-07, "logits/generated": 7.36328125, "logits/real": 6.565067291259766, "logps/generated": -2342.643798828125, "logps/real": -461.516357421875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -154.68568420410156, "rewards/margins": 149.6314239501953, "rewards/real": -5.0542683601379395, "step": 2900 }, { "epoch": 0.93, "learning_rate": 3.831338153372051e-07, "logits/generated": 7.420237064361572, "logits/real": 5.979673862457275, "logps/generated": -2316.98779296875, "logps/real": -513.9208984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -159.48678588867188, "rewards/margins": 151.86727905273438, "rewards/real": -7.619516849517822, "step": 2910 }, { "epoch": 0.93, "learning_rate": 3.8254118762593337e-07, "logits/generated": 6.774755001068115, "logits/real": 4.837088584899902, "logps/generated": -1903.572265625, "logps/real": -478.4046936035156, "loss": 0.0823, "rewards/accuracies": 1.0, "rewards/generated": -118.935791015625, "rewards/margins": 114.51701354980469, "rewards/real": -4.418778419494629, "step": 2920 }, { "epoch": 0.94, "learning_rate": 3.8194855991466156e-07, "logits/generated": 6.620291233062744, "logits/real": 4.534934043884277, "logps/generated": -1864.6468505859375, "logps/real": -429.945556640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -110.39591217041016, "rewards/margins": 108.98558044433594, "rewards/real": -1.4103460311889648, "step": 2930 }, { "epoch": 0.94, "learning_rate": 3.813559322033898e-07, "logits/generated": 6.311997413635254, "logits/real": 4.962859153747559, "logps/generated": -1704.8138427734375, "logps/real": -414.84033203125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -93.86205291748047, "rewards/margins": 90.76737976074219, "rewards/real": -3.0946693420410156, "step": 2940 }, { "epoch": 0.94, "learning_rate": 3.8076330449211804e-07, "logits/generated": 5.281089782714844, "logits/real": 3.750222682952881, "logps/generated": -1691.012451171875, "logps/real": -414.16802978515625, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -102.80133056640625, "rewards/margins": 100.86408996582031, "rewards/real": -1.937251091003418, "step": 2950 }, { "epoch": 0.95, "learning_rate": 3.8017067678084623e-07, "logits/generated": 7.117042541503906, "logits/real": 4.97611141204834, "logps/generated": -1897.001220703125, "logps/real": -437.01422119140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -110.62623596191406, "rewards/margins": 108.7616958618164, "rewards/real": -1.864546537399292, "step": 2960 }, { "epoch": 0.95, "learning_rate": 3.7957804906957447e-07, "logits/generated": 7.287625312805176, "logits/real": 5.334795951843262, "logps/generated": -1771.1168212890625, "logps/real": -422.09271240234375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/generated": -97.45755767822266, "rewards/margins": 94.75927734375, "rewards/real": -2.698286294937134, "step": 2970 }, { "epoch": 0.95, "learning_rate": 3.789854213583027e-07, "logits/generated": 6.152033805847168, "logits/real": 5.531871795654297, "logps/generated": -2108.67138671875, "logps/real": -449.92950439453125, "loss": 0.0166, "rewards/accuracies": 0.987500011920929, "rewards/generated": -131.58340454101562, "rewards/margins": 127.50468444824219, "rewards/real": -4.0787153244018555, "step": 2980 }, { "epoch": 0.96, "learning_rate": 3.783927936470309e-07, "logits/generated": 6.6034722328186035, "logits/real": 5.082990646362305, "logps/generated": -1801.1337890625, "logps/real": -495.9305725097656, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/generated": -106.8523178100586, "rewards/margins": 100.25078582763672, "rewards/real": -6.60153865814209, "step": 2990 }, { "epoch": 0.96, "learning_rate": 3.7780016593575914e-07, "logits/generated": 5.591456413269043, "logits/real": 3.7118568420410156, "logps/generated": -1949.508056640625, "logps/real": -376.5047912597656, "loss": 0.0715, "rewards/accuracies": 0.987500011920929, "rewards/generated": -114.06121826171875, "rewards/margins": 114.37994384765625, "rewards/real": 0.31873559951782227, "step": 3000 }, { "epoch": 0.96, "learning_rate": 3.772075382244874e-07, "logits/generated": 5.380688667297363, "logits/real": 3.035019874572754, "logps/generated": -1998.5491943359375, "logps/real": -450.69140625, "loss": 0.0172, "rewards/accuracies": 1.0, "rewards/generated": -122.2056655883789, "rewards/margins": 119.5589370727539, "rewards/real": -2.6467204093933105, "step": 3010 }, { "epoch": 0.97, "learning_rate": 3.766149105132156e-07, "logits/generated": 4.884156227111816, "logits/real": 2.522505283355713, "logps/generated": -2339.6318359375, "logps/real": -407.40081787109375, "loss": 0.003, "rewards/accuracies": 0.987500011920929, "rewards/generated": -159.5628662109375, "rewards/margins": 159.01651000976562, "rewards/real": -0.5463663339614868, "step": 3020 }, { "epoch": 0.97, "learning_rate": 3.760222828019438e-07, "logits/generated": 5.076139450073242, "logits/real": 2.0761497020721436, "logps/generated": -2207.162109375, "logps/real": -505.5235900878906, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -139.22259521484375, "rewards/margins": 132.81076049804688, "rewards/real": -6.411847114562988, "step": 3030 }, { "epoch": 0.97, "learning_rate": 3.7542965509067206e-07, "logits/generated": 5.2502031326293945, "logits/real": 2.119757890701294, "logps/generated": -2089.50732421875, "logps/real": -527.8182983398438, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/generated": -135.6993865966797, "rewards/margins": 126.79927062988281, "rewards/real": -8.900120735168457, "step": 3040 }, { "epoch": 0.98, "learning_rate": 3.7483702737940025e-07, "logits/generated": 5.416511058807373, "logits/real": 3.019786834716797, "logps/generated": -2270.36865234375, "logps/real": -478.73455810546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -151.02877807617188, "rewards/margins": 138.66802978515625, "rewards/real": -12.360757827758789, "step": 3050 }, { "epoch": 0.98, "learning_rate": 3.742443996681285e-07, "logits/generated": 6.317105293273926, "logits/real": 3.495959520339966, "logps/generated": -2169.806640625, "logps/real": -432.0508728027344, "loss": 0.0352, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -138.8908233642578, "rewards/margins": 137.25186157226562, "rewards/real": -1.6389669179916382, "step": 3060 }, { "epoch": 0.98, "learning_rate": 3.7365177195685673e-07, "logits/generated": 6.06216287612915, "logits/real": 3.9362540245056152, "logps/generated": -2037.2064208984375, "logps/real": -449.86114501953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -132.3472442626953, "rewards/margins": 127.20819091796875, "rewards/real": -5.1390485763549805, "step": 3070 }, { "epoch": 0.99, "learning_rate": 3.730591442455849e-07, "logits/generated": 6.6954169273376465, "logits/real": 5.0613861083984375, "logps/generated": -2058.672119140625, "logps/real": -494.8243713378906, "loss": 0.0136, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -133.3071746826172, "rewards/margins": 124.30979919433594, "rewards/real": -8.99737548828125, "step": 3080 }, { "epoch": 0.99, "learning_rate": 3.724665165343131e-07, "logits/generated": 7.130950927734375, "logits/real": 5.8318257331848145, "logps/generated": -2037.149658203125, "logps/real": -475.92193603515625, "loss": 0.0241, "rewards/accuracies": 1.0, "rewards/generated": -126.69212341308594, "rewards/margins": 122.94071197509766, "rewards/real": -3.7514195442199707, "step": 3090 }, { "epoch": 0.99, "learning_rate": 3.7187388882304135e-07, "logits/generated": 8.520161628723145, "logits/real": 7.568430423736572, "logps/generated": -2170.773681640625, "logps/real": -492.49560546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -144.9098663330078, "rewards/margins": 134.10421752929688, "rewards/real": -10.805635452270508, "step": 3100 }, { "epoch": 1.0, "learning_rate": 3.7128126111176954e-07, "logits/generated": 7.606919288635254, "logits/real": 6.629879951477051, "logps/generated": -2144.69970703125, "logps/real": -538.4136352539062, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -139.36074829101562, "rewards/margins": 127.45263671875, "rewards/real": -11.908119201660156, "step": 3110 }, { "epoch": 1.0, "learning_rate": 3.706886334004978e-07, "logits/generated": 7.2392683029174805, "logits/real": 7.044112205505371, "logps/generated": -2382.393310546875, "logps/real": -495.7548828125, "loss": 0.0542, "rewards/accuracies": 1.0, "rewards/generated": -161.22470092773438, "rewards/margins": 151.97532653808594, "rewards/real": -9.249357223510742, "step": 3120 }, { "epoch": 1.0, "learning_rate": 3.70096005689226e-07, "logits/generated": 7.227835178375244, "logits/real": 6.492308139801025, "logps/generated": -2274.2421875, "logps/real": -422.503662109375, "loss": 0.0641, "rewards/accuracies": 1.0, "rewards/generated": -150.91702270507812, "rewards/margins": 148.579345703125, "rewards/real": -2.337679624557495, "step": 3130 }, { "epoch": 1.0, "learning_rate": 3.695033779779542e-07, "logits/generated": 8.245325088500977, "logits/real": 7.268572807312012, "logps/generated": -2163.85888671875, "logps/real": -443.30352783203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -143.15231323242188, "rewards/margins": 141.80905151367188, "rewards/real": -1.3432495594024658, "step": 3140 }, { "epoch": 1.01, "learning_rate": 3.6891075026668245e-07, "logits/generated": 8.210981369018555, "logits/real": 6.877530097961426, "logps/generated": -2263.630615234375, "logps/real": -456.61151123046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -148.24899291992188, "rewards/margins": 146.1547088623047, "rewards/real": -2.0942556858062744, "step": 3150 }, { "epoch": 1.01, "learning_rate": 3.683181225554107e-07, "logits/generated": 8.625000953674316, "logits/real": 6.838540077209473, "logps/generated": -2059.194091796875, "logps/real": -394.0699157714844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -128.9686737060547, "rewards/margins": 127.20646667480469, "rewards/real": -1.7622144222259521, "step": 3160 }, { "epoch": 1.01, "learning_rate": 3.677254948441389e-07, "logits/generated": 7.912031650543213, "logits/real": 6.5322265625, "logps/generated": -2007.5855712890625, "logps/real": -432.9109802246094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -130.9169464111328, "rewards/margins": 127.86932373046875, "rewards/real": -3.0476105213165283, "step": 3170 }, { "epoch": 1.02, "learning_rate": 3.6713286713286713e-07, "logits/generated": 8.230499267578125, "logits/real": 6.921290397644043, "logps/generated": -2131.596923828125, "logps/real": -503.610595703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -138.99026489257812, "rewards/margins": 133.64512634277344, "rewards/real": -5.345118522644043, "step": 3180 }, { "epoch": 1.02, "learning_rate": 3.6654023942159537e-07, "logits/generated": 8.203896522521973, "logits/real": 7.58255672454834, "logps/generated": -2387.236572265625, "logps/real": -470.61651611328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -157.9770965576172, "rewards/margins": 155.04483032226562, "rewards/real": -2.9322619438171387, "step": 3190 }, { "epoch": 1.02, "learning_rate": 3.6594761171032356e-07, "logits/generated": 8.058852195739746, "logits/real": 7.482379913330078, "logps/generated": -2117.32568359375, "logps/real": -379.31787109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -137.16989135742188, "rewards/margins": 134.96548461914062, "rewards/real": -2.2044036388397217, "step": 3200 }, { "epoch": 1.03, "learning_rate": 3.653549839990518e-07, "logits/generated": 8.05280590057373, "logits/real": 6.988609313964844, "logps/generated": -2236.70068359375, "logps/real": -472.2269592285156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -144.18368530273438, "rewards/margins": 137.79049682617188, "rewards/real": -6.393176078796387, "step": 3210 }, { "epoch": 1.03, "learning_rate": 3.6476235628778004e-07, "logits/generated": 7.528282165527344, "logits/real": 7.0225324630737305, "logps/generated": -2226.84423828125, "logps/real": -410.18695068359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -147.95018005371094, "rewards/margins": 146.70765686035156, "rewards/real": -1.2425076961517334, "step": 3220 }, { "epoch": 1.03, "learning_rate": 3.6416972857650823e-07, "logits/generated": 7.818196773529053, "logits/real": 6.67322301864624, "logps/generated": -2087.44384765625, "logps/real": -481.9576110839844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -134.80380249023438, "rewards/margins": 131.165283203125, "rewards/real": -3.638521194458008, "step": 3230 }, { "epoch": 1.04, "learning_rate": 3.6357710086523647e-07, "logits/generated": 8.077546119689941, "logits/real": 7.552044868469238, "logps/generated": -2491.186279296875, "logps/real": -405.01129150390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -169.06704711914062, "rewards/margins": 165.4234161376953, "rewards/real": -3.6436171531677246, "step": 3240 }, { "epoch": 1.04, "learning_rate": 3.629844731539647e-07, "logits/generated": 8.240315437316895, "logits/real": 6.852290153503418, "logps/generated": -1935.8037109375, "logps/real": -461.125, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/generated": -124.88533020019531, "rewards/margins": 121.94560241699219, "rewards/real": -2.939728260040283, "step": 3250 }, { "epoch": 1.04, "learning_rate": 3.6239184544269285e-07, "logits/generated": 8.120722770690918, "logits/real": 7.790104866027832, "logps/generated": -2057.318603515625, "logps/real": -438.0986328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -136.71005249023438, "rewards/margins": 135.92068481445312, "rewards/real": -0.7893767356872559, "step": 3260 }, { "epoch": 1.05, "learning_rate": 3.617992177314211e-07, "logits/generated": 7.898737907409668, "logits/real": 7.4209465980529785, "logps/generated": -2316.689697265625, "logps/real": -405.5880432128906, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -161.36532592773438, "rewards/margins": 160.29000854492188, "rewards/real": -1.075333595275879, "step": 3270 }, { "epoch": 1.05, "learning_rate": 3.612065900201493e-07, "logits/generated": 7.5320305824279785, "logits/real": 7.813826084136963, "logps/generated": -2063.35107421875, "logps/real": -426.50146484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -130.80470275878906, "rewards/margins": 128.6910400390625, "rewards/real": -2.1136579513549805, "step": 3280 }, { "epoch": 1.05, "learning_rate": 3.606139623088775e-07, "logits/generated": 7.391462802886963, "logits/real": 7.617012023925781, "logps/generated": -2201.526123046875, "logps/real": -407.93597412109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -148.3252410888672, "rewards/margins": 147.9222869873047, "rewards/real": -0.4029542803764343, "step": 3290 }, { "epoch": 1.06, "learning_rate": 3.6002133459760576e-07, "logits/generated": 8.121091842651367, "logits/real": 7.354173183441162, "logps/generated": -2059.38623046875, "logps/real": -390.0628967285156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -132.35372924804688, "rewards/margins": 132.42770385742188, "rewards/real": 0.0740075558423996, "step": 3300 }, { "epoch": 1.06, "learning_rate": 3.5942870688633395e-07, "logits/generated": 8.633135795593262, "logits/real": 8.149147033691406, "logps/generated": -2167.35107421875, "logps/real": -406.9624938964844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -141.9747772216797, "rewards/margins": 138.97418212890625, "rewards/real": -3.0006103515625, "step": 3310 }, { "epoch": 1.06, "learning_rate": 3.588360791750622e-07, "logits/generated": 8.252182960510254, "logits/real": 7.865628719329834, "logps/generated": -1988.6519775390625, "logps/real": -419.7942810058594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -123.46451568603516, "rewards/margins": 121.886474609375, "rewards/real": -1.5780527591705322, "step": 3320 }, { "epoch": 1.07, "learning_rate": 3.5824345146379044e-07, "logits/generated": 8.250871658325195, "logits/real": 7.6787428855896, "logps/generated": -2149.06201171875, "logps/real": -434.17694091796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -136.47994995117188, "rewards/margins": 135.41737365722656, "rewards/real": -1.062567114830017, "step": 3330 }, { "epoch": 1.07, "learning_rate": 3.576508237525186e-07, "logits/generated": 8.092613220214844, "logits/real": 7.00723123550415, "logps/generated": -2024.137451171875, "logps/real": -461.96551513671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -126.69620513916016, "rewards/margins": 122.9664077758789, "rewards/real": -3.7297935485839844, "step": 3340 }, { "epoch": 1.07, "learning_rate": 3.5705819604124687e-07, "logits/generated": 8.30765151977539, "logits/real": 6.879611015319824, "logps/generated": -2323.9560546875, "logps/real": -475.6388244628906, "loss": 0.0035, "rewards/accuracies": 0.987500011920929, "rewards/generated": -156.30352783203125, "rewards/margins": 149.93048095703125, "rewards/real": -6.373032569885254, "step": 3350 }, { "epoch": 1.08, "learning_rate": 3.564655683299751e-07, "logits/generated": 7.8970136642456055, "logits/real": 7.4187493324279785, "logps/generated": -2488.395751953125, "logps/real": -458.22003173828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -167.81588745117188, "rewards/margins": 163.28758239746094, "rewards/real": -4.528337001800537, "step": 3360 }, { "epoch": 1.08, "learning_rate": 3.558729406187033e-07, "logits/generated": 7.870539665222168, "logits/real": 7.458841800689697, "logps/generated": -2369.377685546875, "logps/real": -406.3343505859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -158.065185546875, "rewards/margins": 153.1325225830078, "rewards/real": -4.932667255401611, "step": 3370 }, { "epoch": 1.08, "learning_rate": 3.5528031290743154e-07, "logits/generated": 7.931563377380371, "logits/real": 7.4532270431518555, "logps/generated": -2364.41796875, "logps/real": -469.8768005371094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -160.8697509765625, "rewards/margins": 157.14244079589844, "rewards/real": -3.727304458618164, "step": 3380 }, { "epoch": 1.08, "learning_rate": 3.546876851961598e-07, "logits/generated": 6.952210426330566, "logits/real": 6.691428184509277, "logps/generated": -2173.52392578125, "logps/real": -526.5530395507812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -144.51626586914062, "rewards/margins": 136.79635620117188, "rewards/real": -7.719913482666016, "step": 3390 }, { "epoch": 1.09, "learning_rate": 3.5409505748488797e-07, "logits/generated": 7.105051517486572, "logits/real": 6.9299635887146, "logps/generated": -2334.846435546875, "logps/real": -501.2283630371094, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -154.8046112060547, "rewards/margins": 146.57061767578125, "rewards/real": -8.234001159667969, "step": 3400 }, { "epoch": 1.09, "learning_rate": 3.535024297736162e-07, "logits/generated": 7.477653503417969, "logits/real": 8.60663890838623, "logps/generated": -2462.01611328125, "logps/real": -449.1451110839844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -171.15975952148438, "rewards/margins": 163.90805053710938, "rewards/real": -7.251717567443848, "step": 3410 }, { "epoch": 1.09, "learning_rate": 3.5290980206234446e-07, "logits/generated": 8.262357711791992, "logits/real": 8.276512145996094, "logps/generated": -2227.71923828125, "logps/real": -464.9203186035156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -144.92666625976562, "rewards/margins": 139.07029724121094, "rewards/real": -5.856364727020264, "step": 3420 }, { "epoch": 1.1, "learning_rate": 3.5231717435107264e-07, "logits/generated": 6.800032615661621, "logits/real": 8.175691604614258, "logps/generated": -2441.8203125, "logps/real": -419.430419921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -167.755615234375, "rewards/margins": 160.95294189453125, "rewards/real": -6.8026885986328125, "step": 3430 }, { "epoch": 1.1, "learning_rate": 3.5172454663980083e-07, "logits/generated": 7.7401533126831055, "logits/real": 7.7639265060424805, "logps/generated": -2305.062255859375, "logps/real": -450.3014221191406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -150.31991577148438, "rewards/margins": 142.94200134277344, "rewards/real": -7.377904415130615, "step": 3440 }, { "epoch": 1.1, "learning_rate": 3.511319189285291e-07, "logits/generated": 7.384925842285156, "logits/real": 7.576319694519043, "logps/generated": -2394.87255859375, "logps/real": -466.79705810546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -161.463623046875, "rewards/margins": 151.77377319335938, "rewards/real": -9.689836502075195, "step": 3450 }, { "epoch": 1.11, "learning_rate": 3.5053929121725726e-07, "logits/generated": 8.108686447143555, "logits/real": 8.013384819030762, "logps/generated": -2321.23193359375, "logps/real": -453.08447265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -157.33694458007812, "rewards/margins": 148.54241943359375, "rewards/real": -8.794524192810059, "step": 3460 }, { "epoch": 1.11, "learning_rate": 3.499466635059855e-07, "logits/generated": 8.2467679977417, "logits/real": 7.522176265716553, "logps/generated": -2249.79541015625, "logps/real": -525.9573974609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -156.43511962890625, "rewards/margins": 148.72494506835938, "rewards/real": -7.710177421569824, "step": 3470 }, { "epoch": 1.11, "learning_rate": 3.4935403579471375e-07, "logits/generated": 7.851830959320068, "logits/real": 7.384527683258057, "logps/generated": -2232.7998046875, "logps/real": -454.78118896484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -149.19894409179688, "rewards/margins": 142.48025512695312, "rewards/real": -6.718689918518066, "step": 3480 }, { "epoch": 1.12, "learning_rate": 3.4876140808344194e-07, "logits/generated": 7.584040641784668, "logits/real": 6.891657829284668, "logps/generated": -2334.17529296875, "logps/real": -482.6087951660156, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -156.69923400878906, "rewards/margins": 149.88772583007812, "rewards/real": -6.811507225036621, "step": 3490 }, { "epoch": 1.12, "learning_rate": 3.481687803721702e-07, "logits/generated": 8.4055757522583, "logits/real": 7.680420875549316, "logps/generated": -2091.34375, "logps/real": -521.2897338867188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -136.05178833007812, "rewards/margins": 125.3792724609375, "rewards/real": -10.672523498535156, "step": 3500 }, { "epoch": 1.12, "learning_rate": 3.475761526608984e-07, "logits/generated": 8.414507865905762, "logits/real": 7.343222618103027, "logps/generated": -2092.346435546875, "logps/real": -562.0968017578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -134.81484985351562, "rewards/margins": 124.8052978515625, "rewards/real": -10.009557723999023, "step": 3510 }, { "epoch": 1.13, "learning_rate": 3.469835249496266e-07, "logits/generated": 7.129863739013672, "logits/real": 6.661769866943359, "logps/generated": -2159.75732421875, "logps/real": -453.82861328125, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/generated": -139.74143981933594, "rewards/margins": 135.64511108398438, "rewards/real": -4.096342086791992, "step": 3520 }, { "epoch": 1.13, "learning_rate": 3.4639089723835485e-07, "logits/generated": 8.130109786987305, "logits/real": 7.891880035400391, "logps/generated": -2204.785888671875, "logps/real": -479.8753967285156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -142.4805908203125, "rewards/margins": 137.27613830566406, "rewards/real": -5.2044267654418945, "step": 3530 }, { "epoch": 1.13, "learning_rate": 3.457982695270831e-07, "logits/generated": 7.8183417320251465, "logits/real": 7.079860687255859, "logps/generated": -1813.3118896484375, "logps/real": -495.20965576171875, "loss": 0.0264, "rewards/accuracies": 1.0, "rewards/generated": -105.1306381225586, "rewards/margins": 100.369873046875, "rewards/real": -4.760771751403809, "step": 3540 }, { "epoch": 1.14, "learning_rate": 3.452056418158113e-07, "logits/generated": 7.80575704574585, "logits/real": 7.509347438812256, "logps/generated": -2056.756591796875, "logps/real": -428.09027099609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -135.2467498779297, "rewards/margins": 130.22933959960938, "rewards/real": -5.0174174308776855, "step": 3550 }, { "epoch": 1.14, "learning_rate": 3.446130141045395e-07, "logits/generated": 8.60452938079834, "logits/real": 7.832653999328613, "logps/generated": -2107.294921875, "logps/real": -469.9111328125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -134.81814575195312, "rewards/margins": 130.37423706054688, "rewards/real": -4.443899154663086, "step": 3560 }, { "epoch": 1.14, "learning_rate": 3.4402038639326777e-07, "logits/generated": 7.493406772613525, "logits/real": 8.468154907226562, "logps/generated": -2310.4326171875, "logps/real": -437.82806396484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -153.2908172607422, "rewards/margins": 148.427978515625, "rewards/real": -4.862843990325928, "step": 3570 }, { "epoch": 1.15, "learning_rate": 3.4342775868199595e-07, "logits/generated": 8.075286865234375, "logits/real": 8.409788131713867, "logps/generated": -2275.07470703125, "logps/real": -477.04290771484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -152.14108276367188, "rewards/margins": 144.21859741210938, "rewards/real": -7.922501564025879, "step": 3580 }, { "epoch": 1.15, "learning_rate": 3.428351309707242e-07, "logits/generated": 8.453285217285156, "logits/real": 8.174100875854492, "logps/generated": -2283.738037109375, "logps/real": -451.0909118652344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -145.06253051757812, "rewards/margins": 139.6310272216797, "rewards/real": -5.431509971618652, "step": 3590 }, { "epoch": 1.15, "learning_rate": 3.4224250325945244e-07, "logits/generated": 7.517498970031738, "logits/real": 7.011659145355225, "logps/generated": -2088.999267578125, "logps/real": -469.9163513183594, "loss": 0.0146, "rewards/accuracies": 0.987500011920929, "rewards/generated": -131.9836883544922, "rewards/margins": 127.3974380493164, "rewards/real": -4.586251735687256, "step": 3600 }, { "epoch": 1.16, "learning_rate": 3.4164987554818063e-07, "logits/generated": 8.463109970092773, "logits/real": 7.597643852233887, "logps/generated": -2544.93798828125, "logps/real": -578.7266235351562, "loss": 0.1112, "rewards/accuracies": 1.0, "rewards/generated": -181.51116943359375, "rewards/margins": 166.95777893066406, "rewards/real": -14.553400039672852, "step": 3610 }, { "epoch": 1.16, "learning_rate": 3.410572478369088e-07, "logits/generated": 7.981478691101074, "logits/real": 7.424731254577637, "logps/generated": -2649.03076171875, "logps/real": -529.9971923828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -192.17166137695312, "rewards/margins": 179.45034790039062, "rewards/real": -12.721323013305664, "step": 3620 }, { "epoch": 1.16, "learning_rate": 3.4046462012563706e-07, "logits/generated": 7.834013938903809, "logits/real": 8.125712394714355, "logps/generated": -2535.3134765625, "logps/real": -523.1331787109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -176.6103973388672, "rewards/margins": 163.4051513671875, "rewards/real": -13.205245971679688, "step": 3630 }, { "epoch": 1.16, "learning_rate": 3.3987199241436525e-07, "logits/generated": 8.760353088378906, "logits/real": 9.001749992370605, "logps/generated": -2900.2958984375, "logps/real": -497.83587646484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -212.9998016357422, "rewards/margins": 202.25247192382812, "rewards/real": -10.747337341308594, "step": 3640 }, { "epoch": 1.17, "learning_rate": 3.392793647030935e-07, "logits/generated": 8.55837345123291, "logits/real": 7.914141654968262, "logps/generated": -2406.06982421875, "logps/real": -603.4434204101562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -165.0146942138672, "rewards/margins": 149.5556640625, "rewards/real": -15.45905876159668, "step": 3650 }, { "epoch": 1.17, "learning_rate": 3.3868673699182173e-07, "logits/generated": 8.680868148803711, "logits/real": 7.643444061279297, "logps/generated": -2313.84375, "logps/real": -524.50390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -156.5612335205078, "rewards/margins": 145.9801025390625, "rewards/real": -10.581113815307617, "step": 3660 }, { "epoch": 1.17, "learning_rate": 3.380941092805499e-07, "logits/generated": 8.154035568237305, "logits/real": 7.307318687438965, "logps/generated": -2493.177734375, "logps/real": -582.906494140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -178.28811645507812, "rewards/margins": 163.68043518066406, "rewards/real": -14.607686042785645, "step": 3670 }, { "epoch": 1.18, "learning_rate": 3.3750148156927816e-07, "logits/generated": 8.487099647521973, "logits/real": 7.052435398101807, "logps/generated": -2483.33056640625, "logps/real": -577.37060546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -174.8097686767578, "rewards/margins": 162.47177124023438, "rewards/real": -12.337991714477539, "step": 3680 }, { "epoch": 1.18, "learning_rate": 3.369088538580064e-07, "logits/generated": 7.8405585289001465, "logits/real": 7.616133213043213, "logps/generated": -2704.9150390625, "logps/real": -554.3448486328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -189.26527404785156, "rewards/margins": 177.07789611816406, "rewards/real": -12.187393188476562, "step": 3690 }, { "epoch": 1.18, "learning_rate": 3.363162261467346e-07, "logits/generated": 8.566651344299316, "logits/real": 7.1055402755737305, "logps/generated": -2475.217041015625, "logps/real": -503.64898681640625, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -174.28546142578125, "rewards/margins": 163.02313232421875, "rewards/real": -11.262336730957031, "step": 3700 }, { "epoch": 1.19, "learning_rate": 3.3572359843546283e-07, "logits/generated": 9.018073081970215, "logits/real": 8.728063583374023, "logps/generated": -2448.410888671875, "logps/real": -533.4593505859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -169.00119018554688, "rewards/margins": 158.25381469726562, "rewards/real": -10.747369766235352, "step": 3710 }, { "epoch": 1.19, "learning_rate": 3.351309707241911e-07, "logits/generated": 7.788723945617676, "logits/real": 7.269866943359375, "logps/generated": -2437.468017578125, "logps/real": -538.6864624023438, "loss": 0.0074, "rewards/accuracies": 0.987500011920929, "rewards/generated": -168.24961853027344, "rewards/margins": 155.3504638671875, "rewards/real": -12.899144172668457, "step": 3720 }, { "epoch": 1.19, "learning_rate": 3.3453834301291927e-07, "logits/generated": 8.312154769897461, "logits/real": 7.965063571929932, "logps/generated": -2445.66162109375, "logps/real": -428.6332092285156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -174.08578491210938, "rewards/margins": 170.88629150390625, "rewards/real": -3.1994924545288086, "step": 3730 }, { "epoch": 1.2, "learning_rate": 3.339457153016475e-07, "logits/generated": 8.448351860046387, "logits/real": 8.019471168518066, "logps/generated": -2624.515625, "logps/real": -415.50006103515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -184.7956085205078, "rewards/margins": 180.40687561035156, "rewards/real": -4.388728141784668, "step": 3740 }, { "epoch": 1.2, "learning_rate": 3.3335308759037575e-07, "logits/generated": 7.5610809326171875, "logits/real": 6.8924126625061035, "logps/generated": -2408.282470703125, "logps/real": -482.810791015625, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/generated": -168.9614715576172, "rewards/margins": 162.51416015625, "rewards/real": -6.447300910949707, "step": 3750 }, { "epoch": 1.2, "learning_rate": 3.3276045987910394e-07, "logits/generated": 7.4078779220581055, "logits/real": 7.950962066650391, "logps/generated": -2527.2138671875, "logps/real": -414.17333984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -174.94175720214844, "rewards/margins": 171.24240112304688, "rewards/real": -3.699375629425049, "step": 3760 }, { "epoch": 1.21, "learning_rate": 3.321678321678322e-07, "logits/generated": 8.551604270935059, "logits/real": 7.888591766357422, "logps/generated": -2355.670166015625, "logps/real": -428.29541015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -163.4680938720703, "rewards/margins": 160.6470184326172, "rewards/real": -2.82106614112854, "step": 3770 }, { "epoch": 1.21, "learning_rate": 3.315752044565604e-07, "logits/generated": 7.468926429748535, "logits/real": 7.2237958908081055, "logps/generated": -2458.83642578125, "logps/real": -446.80462646484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -168.6776123046875, "rewards/margins": 163.36419677734375, "rewards/real": -5.313417911529541, "step": 3780 }, { "epoch": 1.21, "learning_rate": 3.309825767452886e-07, "logits/generated": 5.96670389175415, "logits/real": 6.715690612792969, "logps/generated": -2409.82568359375, "logps/real": -418.70599365234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -158.387451171875, "rewards/margins": 155.29000854492188, "rewards/real": -3.0974433422088623, "step": 3790 }, { "epoch": 1.22, "learning_rate": 3.303899490340168e-07, "logits/generated": 7.565770626068115, "logits/real": 7.342195987701416, "logps/generated": -2268.38623046875, "logps/real": -464.6393127441406, "loss": 0.0171, "rewards/accuracies": 0.987500011920929, "rewards/generated": -146.79144287109375, "rewards/margins": 142.926025390625, "rewards/real": -3.8654417991638184, "step": 3800 }, { "epoch": 1.22, "learning_rate": 3.29797321322745e-07, "logits/generated": 7.656620979309082, "logits/real": 6.679084777832031, "logps/generated": -2342.98046875, "logps/real": -454.09674072265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -158.6143341064453, "rewards/margins": 157.6035919189453, "rewards/real": -1.0107345581054688, "step": 3810 }, { "epoch": 1.22, "learning_rate": 3.2920469361147323e-07, "logits/generated": 7.635922908782959, "logits/real": 7.084968566894531, "logps/generated": -2323.62353515625, "logps/real": -388.5582580566406, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/generated": -157.06417846679688, "rewards/margins": 155.43411254882812, "rewards/real": -1.6300512552261353, "step": 3820 }, { "epoch": 1.23, "learning_rate": 3.2861206590020147e-07, "logits/generated": 7.098822116851807, "logits/real": 6.906205654144287, "logps/generated": -2258.292236328125, "logps/real": -436.6798400878906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -154.81719970703125, "rewards/margins": 150.68896484375, "rewards/real": -4.12822961807251, "step": 3830 }, { "epoch": 1.23, "learning_rate": 3.2801943818892966e-07, "logits/generated": 7.2831130027771, "logits/real": 6.414669036865234, "logps/generated": -2313.18310546875, "logps/real": -480.239990234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -157.49911499023438, "rewards/margins": 154.04835510253906, "rewards/real": -3.4507784843444824, "step": 3840 }, { "epoch": 1.23, "learning_rate": 3.274268104776579e-07, "logits/generated": 7.388009071350098, "logits/real": 7.895294189453125, "logps/generated": -2304.59326171875, "logps/real": -371.0569763183594, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -155.8150177001953, "rewards/margins": 154.19638061523438, "rewards/real": -1.6186469793319702, "step": 3850 }, { "epoch": 1.24, "learning_rate": 3.2683418276638614e-07, "logits/generated": 6.581626892089844, "logits/real": 6.9503302574157715, "logps/generated": -2273.49267578125, "logps/real": -432.5542907714844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -151.2398223876953, "rewards/margins": 144.92550659179688, "rewards/real": -6.314304828643799, "step": 3860 }, { "epoch": 1.24, "learning_rate": 3.2624155505511433e-07, "logits/generated": 7.4986066818237305, "logits/real": 7.687048435211182, "logps/generated": -2493.4013671875, "logps/real": -458.4097595214844, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/generated": -175.29180908203125, "rewards/margins": 171.19918823242188, "rewards/real": -4.092643737792969, "step": 3870 }, { "epoch": 1.24, "learning_rate": 3.256489273438426e-07, "logits/generated": 6.777361869812012, "logits/real": 6.528800010681152, "logps/generated": -2262.51171875, "logps/real": -472.7518005371094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -151.38906860351562, "rewards/margins": 146.32057189941406, "rewards/real": -5.068513870239258, "step": 3880 }, { "epoch": 1.24, "learning_rate": 3.250562996325708e-07, "logits/generated": 7.279388427734375, "logits/real": 6.833481788635254, "logps/generated": -2367.91845703125, "logps/real": -413.55615234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -165.45962524414062, "rewards/margins": 161.93597412109375, "rewards/real": -3.523651599884033, "step": 3890 }, { "epoch": 1.25, "learning_rate": 3.24463671921299e-07, "logits/generated": 7.897918701171875, "logits/real": 6.473425388336182, "logps/generated": -2353.12744140625, "logps/real": -481.5474548339844, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -162.37850952148438, "rewards/margins": 156.157470703125, "rewards/real": -6.221034049987793, "step": 3900 }, { "epoch": 1.25, "learning_rate": 3.2387104421002725e-07, "logits/generated": 6.675393581390381, "logits/real": 6.795167446136475, "logps/generated": -2372.889404296875, "logps/real": -432.44390869140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -165.90406799316406, "rewards/margins": 161.68272399902344, "rewards/real": -4.2213311195373535, "step": 3910 }, { "epoch": 1.25, "learning_rate": 3.232784164987555e-07, "logits/generated": 7.021627902984619, "logits/real": 5.929322719573975, "logps/generated": -2781.975341796875, "logps/real": -433.00604248046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -203.5199737548828, "rewards/margins": 200.44808959960938, "rewards/real": -3.0719106197357178, "step": 3920 }, { "epoch": 1.26, "learning_rate": 3.226857887874837e-07, "logits/generated": 8.148340225219727, "logits/real": 7.0328545570373535, "logps/generated": -2545.451416015625, "logps/real": -491.960205078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -177.5174102783203, "rewards/margins": 171.87539672851562, "rewards/real": -5.642013072967529, "step": 3930 }, { "epoch": 1.26, "learning_rate": 3.220931610762119e-07, "logits/generated": 6.777157783508301, "logits/real": 6.852158546447754, "logps/generated": -2362.10498046875, "logps/real": -448.08819580078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -164.5742645263672, "rewards/margins": 160.95407104492188, "rewards/real": -3.6201977729797363, "step": 3940 }, { "epoch": 1.26, "learning_rate": 3.2150053336494016e-07, "logits/generated": 6.816174507141113, "logits/real": 7.267861843109131, "logps/generated": -2718.28076171875, "logps/real": -442.25030517578125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/generated": -196.23947143554688, "rewards/margins": 191.14906311035156, "rewards/real": -5.090404510498047, "step": 3950 }, { "epoch": 1.27, "learning_rate": 3.2090790565366835e-07, "logits/generated": 5.682728290557861, "logits/real": 6.6203293800354, "logps/generated": -2534.62255859375, "logps/real": -425.3125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -177.59129333496094, "rewards/margins": 170.9332733154297, "rewards/real": -6.658015251159668, "step": 3960 }, { "epoch": 1.27, "learning_rate": 3.2031527794239654e-07, "logits/generated": 6.843630313873291, "logits/real": 7.117356777191162, "logps/generated": -2691.87158203125, "logps/real": -466.4651794433594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -191.8715057373047, "rewards/margins": 184.27410888671875, "rewards/real": -7.59740686416626, "step": 3970 }, { "epoch": 1.27, "learning_rate": 3.197226502311248e-07, "logits/generated": 7.673319339752197, "logits/real": 6.387598037719727, "logps/generated": -2496.671630859375, "logps/real": -559.0091552734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -172.75875854492188, "rewards/margins": 164.2571563720703, "rewards/real": -8.501619338989258, "step": 3980 }, { "epoch": 1.28, "learning_rate": 3.1913002251985297e-07, "logits/generated": 6.5598578453063965, "logits/real": 6.61072301864624, "logps/generated": -2428.232421875, "logps/real": -465.3314514160156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -172.5666046142578, "rewards/margins": 166.1865692138672, "rewards/real": -6.380029678344727, "step": 3990 }, { "epoch": 1.28, "learning_rate": 3.185373948085812e-07, "logits/generated": 6.523262023925781, "logits/real": 6.9822540283203125, "logps/generated": -2460.596435546875, "logps/real": -441.5111389160156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -173.28562927246094, "rewards/margins": 167.23583984375, "rewards/real": -6.049801826477051, "step": 4000 }, { "epoch": 1.28, "learning_rate": 3.1794476709730946e-07, "logits/generated": 6.357859134674072, "logits/real": 7.629593849182129, "logps/generated": -2833.427490234375, "logps/real": -428.63519287109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -198.49942016601562, "rewards/margins": 192.42971801757812, "rewards/real": -6.06970739364624, "step": 4010 }, { "epoch": 1.29, "learning_rate": 3.1735213938603764e-07, "logits/generated": 6.717772006988525, "logits/real": 7.786855220794678, "logps/generated": -2412.14208984375, "logps/real": -415.9322204589844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -163.54840087890625, "rewards/margins": 159.63636779785156, "rewards/real": -3.912038803100586, "step": 4020 }, { "epoch": 1.29, "learning_rate": 3.167595116747659e-07, "logits/generated": 6.648594856262207, "logits/real": 6.840667724609375, "logps/generated": -2667.9111328125, "logps/real": -434.1949768066406, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -188.69412231445312, "rewards/margins": 183.03176879882812, "rewards/real": -5.66234827041626, "step": 4030 }, { "epoch": 1.29, "learning_rate": 3.1616688396349413e-07, "logits/generated": 7.558392524719238, "logits/real": 7.1805267333984375, "logps/generated": -2603.394287109375, "logps/real": -484.1072692871094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -182.5196075439453, "rewards/margins": 175.5684356689453, "rewards/real": -6.9511542320251465, "step": 4040 }, { "epoch": 1.3, "learning_rate": 3.155742562522223e-07, "logits/generated": 8.282312393188477, "logits/real": 7.401039123535156, "logps/generated": -2387.93017578125, "logps/real": -477.3822326660156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -166.53565979003906, "rewards/margins": 159.41375732421875, "rewards/real": -7.121930122375488, "step": 4050 }, { "epoch": 1.3, "learning_rate": 3.1498162854095056e-07, "logits/generated": 7.722745418548584, "logits/real": 7.224579811096191, "logps/generated": -2611.69287109375, "logps/real": -488.61334228515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -189.8408660888672, "rewards/margins": 182.3244171142578, "rewards/real": -7.516472816467285, "step": 4060 }, { "epoch": 1.3, "learning_rate": 3.143890008296788e-07, "logits/generated": 6.895688533782959, "logits/real": 7.316477298736572, "logps/generated": -2776.512939453125, "logps/real": -441.9691467285156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -197.48301696777344, "rewards/margins": 192.2646484375, "rewards/real": -5.218371391296387, "step": 4070 }, { "epoch": 1.31, "learning_rate": 3.13796373118407e-07, "logits/generated": 6.771543025970459, "logits/real": 6.567793846130371, "logps/generated": -2591.949462890625, "logps/real": -494.53863525390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -177.2228546142578, "rewards/margins": 168.33941650390625, "rewards/real": -8.883442878723145, "step": 4080 }, { "epoch": 1.31, "learning_rate": 3.1320374540713523e-07, "logits/generated": 7.506363868713379, "logits/real": 7.731897830963135, "logps/generated": -2318.86572265625, "logps/real": -514.2827758789062, "loss": 0.0079, "rewards/accuracies": 0.987500011920929, "rewards/generated": -156.94406127929688, "rewards/margins": 144.8009490966797, "rewards/real": -12.14311408996582, "step": 4090 }, { "epoch": 1.31, "learning_rate": 3.126111176958635e-07, "logits/generated": 8.115839958190918, "logits/real": 8.030169486999512, "logps/generated": -2271.76025390625, "logps/real": -515.77490234375, "loss": 0.0791, "rewards/accuracies": 1.0, "rewards/generated": -145.9998016357422, "rewards/margins": 132.3118133544922, "rewards/real": -13.688000679016113, "step": 4100 }, { "epoch": 1.32, "learning_rate": 3.1201848998459166e-07, "logits/generated": 8.120139122009277, "logits/real": 7.988180637359619, "logps/generated": -2277.80517578125, "logps/real": -556.2723999023438, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/generated": -152.77484130859375, "rewards/margins": 140.80984497070312, "rewards/real": -11.964984893798828, "step": 4110 }, { "epoch": 1.32, "learning_rate": 3.114258622733199e-07, "logits/generated": 8.126527786254883, "logits/real": 6.9796552658081055, "logps/generated": -2155.88671875, "logps/real": -507.7671813964844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -134.9789276123047, "rewards/margins": 127.06071472167969, "rewards/real": -7.918220520019531, "step": 4120 }, { "epoch": 1.32, "learning_rate": 3.1083323456204815e-07, "logits/generated": 7.0497026443481445, "logits/real": 7.499265193939209, "logps/generated": -2420.482177734375, "logps/real": -463.6477966308594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -167.57839965820312, "rewards/margins": 160.1279754638672, "rewards/real": -7.4504241943359375, "step": 4130 }, { "epoch": 1.32, "learning_rate": 3.1024060685077634e-07, "logits/generated": 7.964949131011963, "logits/real": 6.729344367980957, "logps/generated": -2112.84912109375, "logps/real": -502.67303466796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -132.1100616455078, "rewards/margins": 126.28788757324219, "rewards/real": -5.8221917152404785, "step": 4140 }, { "epoch": 1.33, "learning_rate": 3.096479791395045e-07, "logits/generated": 8.390281677246094, "logits/real": 7.142557621002197, "logps/generated": -2174.401123046875, "logps/real": -464.7776794433594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -137.80874633789062, "rewards/margins": 130.60086059570312, "rewards/real": -7.207865238189697, "step": 4150 }, { "epoch": 1.33, "learning_rate": 3.0905535142823277e-07, "logits/generated": 7.566963195800781, "logits/real": 7.848105430603027, "logps/generated": -2594.806396484375, "logps/real": -455.6148376464844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -185.42282104492188, "rewards/margins": 181.20835876464844, "rewards/real": -4.2144575119018555, "step": 4160 }, { "epoch": 1.33, "learning_rate": 3.0846272371696095e-07, "logits/generated": 7.269606113433838, "logits/real": 7.383882999420166, "logps/generated": -2192.77783203125, "logps/real": -459.9957580566406, "loss": 0.0405, "rewards/accuracies": 1.0, "rewards/generated": -142.8792724609375, "rewards/margins": 134.2354736328125, "rewards/real": -8.643797874450684, "step": 4170 }, { "epoch": 1.34, "learning_rate": 3.078700960056892e-07, "logits/generated": 7.301158905029297, "logits/real": 8.111259460449219, "logps/generated": -2968.607421875, "logps/real": -521.5896606445312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -223.33218383789062, "rewards/margins": 210.4127197265625, "rewards/real": -12.919461250305176, "step": 4180 }, { "epoch": 1.34, "learning_rate": 3.0727746829441744e-07, "logits/generated": 8.484736442565918, "logits/real": 8.390558242797852, "logps/generated": -2837.841064453125, "logps/real": -581.3633422851562, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -211.1939697265625, "rewards/margins": 192.199951171875, "rewards/real": -18.9940185546875, "step": 4190 }, { "epoch": 1.34, "learning_rate": 3.0668484058314563e-07, "logits/generated": 6.443826198577881, "logits/real": 7.47738790512085, "logps/generated": -1975.098388671875, "logps/real": -456.21502685546875, "loss": 0.0702, "rewards/accuracies": 1.0, "rewards/generated": -117.27449798583984, "rewards/margins": 107.92955017089844, "rewards/real": -9.344947814941406, "step": 4200 }, { "epoch": 1.35, "learning_rate": 3.0609221287187387e-07, "logits/generated": 5.852715969085693, "logits/real": 6.890575408935547, "logps/generated": -1731.0277099609375, "logps/real": -516.9658813476562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -102.2426986694336, "rewards/margins": 88.90367126464844, "rewards/real": -13.339022636413574, "step": 4210 }, { "epoch": 1.35, "learning_rate": 3.054995851606021e-07, "logits/generated": 6.863028049468994, "logits/real": 7.549463748931885, "logps/generated": -1958.519775390625, "logps/real": -519.3914794921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -114.84622955322266, "rewards/margins": 100.8033676147461, "rewards/real": -14.042851448059082, "step": 4220 }, { "epoch": 1.35, "learning_rate": 3.049069574493303e-07, "logits/generated": 7.271523952484131, "logits/real": 7.303505897521973, "logps/generated": -1960.366455078125, "logps/real": -481.2220153808594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -119.73619079589844, "rewards/margins": 109.40213775634766, "rewards/real": -10.33407211303711, "step": 4230 }, { "epoch": 1.36, "learning_rate": 3.0431432973805854e-07, "logits/generated": 7.341439723968506, "logits/real": 6.710594177246094, "logps/generated": -1788.5347900390625, "logps/real": -529.9918212890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -107.76602935791016, "rewards/margins": 94.6612319946289, "rewards/real": -13.1048002243042, "step": 4240 }, { "epoch": 1.36, "learning_rate": 3.037217020267868e-07, "logits/generated": 6.401370048522949, "logits/real": 6.758606910705566, "logps/generated": -1762.8291015625, "logps/real": -464.9837341308594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -100.84037017822266, "rewards/margins": 90.28343963623047, "rewards/real": -10.556928634643555, "step": 4250 }, { "epoch": 1.36, "learning_rate": 3.0312907431551497e-07, "logits/generated": 6.804314613342285, "logits/real": 7.5946550369262695, "logps/generated": -1815.3638916015625, "logps/real": -532.9848022460938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -109.94930267333984, "rewards/margins": 98.46034240722656, "rewards/real": -11.488958358764648, "step": 4260 }, { "epoch": 1.37, "learning_rate": 3.025364466042432e-07, "logits/generated": 6.5734663009643555, "logits/real": 7.820903778076172, "logps/generated": -2998.07080078125, "logps/real": -644.0723876953125, "loss": 0.0409, "rewards/accuracies": 0.987500011920929, "rewards/generated": -219.39102172851562, "rewards/margins": 194.19070434570312, "rewards/real": -25.2003231048584, "step": 4270 }, { "epoch": 1.37, "learning_rate": 3.0194381889297146e-07, "logits/generated": 7.589621067047119, "logits/real": 8.210054397583008, "logps/generated": -2814.91845703125, "logps/real": -605.0895385742188, "loss": 0.0311, "rewards/accuracies": 1.0, "rewards/generated": -204.48526000976562, "rewards/margins": 185.1283721923828, "rewards/real": -19.356874465942383, "step": 4280 }, { "epoch": 1.37, "learning_rate": 3.0135119118169965e-07, "logits/generated": 7.246214866638184, "logits/real": 7.674818992614746, "logps/generated": -2459.682861328125, "logps/real": -565.787353515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -166.65927124023438, "rewards/margins": 154.8263397216797, "rewards/real": -11.832939147949219, "step": 4290 }, { "epoch": 1.38, "learning_rate": 3.007585634704279e-07, "logits/generated": 7.402412414550781, "logits/real": 8.648283004760742, "logps/generated": -2308.35107421875, "logps/real": -483.53265380859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -153.52896118164062, "rewards/margins": 142.6757049560547, "rewards/real": -10.853252410888672, "step": 4300 }, { "epoch": 1.38, "learning_rate": 3.0016593575915613e-07, "logits/generated": 7.074282646179199, "logits/real": 7.691385746002197, "logps/generated": -2258.10986328125, "logps/real": -554.7071533203125, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/generated": -150.49899291992188, "rewards/margins": 143.2213897705078, "rewards/real": -7.2776360511779785, "step": 4310 }, { "epoch": 1.38, "learning_rate": 2.995733080478843e-07, "logits/generated": 6.5355024337768555, "logits/real": 6.9088134765625, "logps/generated": -2212.018798828125, "logps/real": -534.315185546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -148.31167602539062, "rewards/margins": 137.9415283203125, "rewards/real": -10.370162010192871, "step": 4320 }, { "epoch": 1.39, "learning_rate": 2.989806803366125e-07, "logits/generated": 6.952958583831787, "logits/real": 8.264680862426758, "logps/generated": -2330.53125, "logps/real": -539.1000366210938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -154.36407470703125, "rewards/margins": 140.4483642578125, "rewards/real": -13.915702819824219, "step": 4330 }, { "epoch": 1.39, "learning_rate": 2.9838805262534075e-07, "logits/generated": 7.5016679763793945, "logits/real": 8.173989295959473, "logps/generated": -2236.009521484375, "logps/real": -469.66583251953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -152.7703399658203, "rewards/margins": 144.41993713378906, "rewards/real": -8.350407600402832, "step": 4340 }, { "epoch": 1.39, "learning_rate": 2.9779542491406894e-07, "logits/generated": 8.017586708068848, "logits/real": 7.663585662841797, "logps/generated": -2705.009521484375, "logps/real": -443.62298583984375, "loss": 0.0055, "rewards/accuracies": 0.987500011920929, "rewards/generated": -196.3282470703125, "rewards/margins": 187.56753540039062, "rewards/real": -8.760734558105469, "step": 4350 }, { "epoch": 1.4, "learning_rate": 2.972027972027972e-07, "logits/generated": 7.786769866943359, "logits/real": 7.8234100341796875, "logps/generated": -2880.47216796875, "logps/real": -486.453369140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -215.488525390625, "rewards/margins": 209.78616333007812, "rewards/real": -5.702385425567627, "step": 4360 }, { "epoch": 1.4, "learning_rate": 2.966101694915254e-07, "logits/generated": 8.415030479431152, "logits/real": 8.320170402526855, "logps/generated": -3315.02001953125, "logps/real": -495.68292236328125, "loss": 0.0489, "rewards/accuracies": 1.0, "rewards/generated": -250.76968383789062, "rewards/margins": 242.88296508789062, "rewards/real": -7.886709690093994, "step": 4370 }, { "epoch": 1.4, "learning_rate": 2.960175417802536e-07, "logits/generated": 7.243955135345459, "logits/real": 7.814364433288574, "logps/generated": -2542.96240234375, "logps/real": -407.92120361328125, "loss": 0.0154, "rewards/accuracies": 0.987500011920929, "rewards/generated": -179.1906280517578, "rewards/margins": 177.7555694580078, "rewards/real": -1.435058832168579, "step": 4380 }, { "epoch": 1.4, "learning_rate": 2.9542491406898185e-07, "logits/generated": 7.680856227874756, "logits/real": 7.6158318519592285, "logps/generated": -2723.21630859375, "logps/real": -425.43597412109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -195.77938842773438, "rewards/margins": 194.69406127929688, "rewards/real": -1.0853259563446045, "step": 4390 }, { "epoch": 1.41, "learning_rate": 2.948322863577101e-07, "logits/generated": 7.492241859436035, "logits/real": 6.759080410003662, "logps/generated": -2588.543701171875, "logps/real": -437.13385009765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -190.91671752929688, "rewards/margins": 188.39358520507812, "rewards/real": -2.5231311321258545, "step": 4400 }, { "epoch": 1.41, "learning_rate": 2.942396586464383e-07, "logits/generated": 7.66998815536499, "logits/real": 7.117323875427246, "logps/generated": -2512.29736328125, "logps/real": -399.9268493652344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -173.94284057617188, "rewards/margins": 173.9367218017578, "rewards/real": -0.006123733706772327, "step": 4410 }, { "epoch": 1.41, "learning_rate": 2.936470309351665e-07, "logits/generated": 7.837294101715088, "logits/real": 7.6137375831604, "logps/generated": -3012.117919921875, "logps/real": -397.12384033203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -220.60427856445312, "rewards/margins": 219.8526611328125, "rewards/real": -0.7516233921051025, "step": 4420 }, { "epoch": 1.42, "learning_rate": 2.9305440322389477e-07, "logits/generated": 8.811724662780762, "logits/real": 7.3468828201293945, "logps/generated": -2903.438720703125, "logps/real": -409.2351379394531, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -215.64688110351562, "rewards/margins": 215.282958984375, "rewards/real": -0.3639170825481415, "step": 4430 }, { "epoch": 1.42, "learning_rate": 2.9246177551262296e-07, "logits/generated": 7.446253776550293, "logits/real": 8.035263061523438, "logps/generated": -2718.13916015625, "logps/real": -400.2939758300781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -194.1878662109375, "rewards/margins": 193.2230987548828, "rewards/real": -0.9647554159164429, "step": 4440 }, { "epoch": 1.42, "learning_rate": 2.918691478013512e-07, "logits/generated": 7.747132778167725, "logits/real": 7.592209815979004, "logps/generated": -2645.40380859375, "logps/real": -396.52777099609375, "loss": 0.0107, "rewards/accuracies": 0.987500011920929, "rewards/generated": -188.10723876953125, "rewards/margins": 185.81295776367188, "rewards/real": -2.2942795753479004, "step": 4450 }, { "epoch": 1.43, "learning_rate": 2.9127652009007944e-07, "logits/generated": 7.9106645584106445, "logits/real": 7.4236345291137695, "logps/generated": -2623.307861328125, "logps/real": -426.29827880859375, "loss": 0.0117, "rewards/accuracies": 1.0, "rewards/generated": -183.82473754882812, "rewards/margins": 180.20974731445312, "rewards/real": -3.61498761177063, "step": 4460 }, { "epoch": 1.43, "learning_rate": 2.9068389237880763e-07, "logits/generated": 8.156518936157227, "logits/real": 7.277922630310059, "logps/generated": -2512.991455078125, "logps/real": -433.336669921875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -173.69650268554688, "rewards/margins": 169.3484649658203, "rewards/real": -4.348060607910156, "step": 4470 }, { "epoch": 1.43, "learning_rate": 2.9009126466753587e-07, "logits/generated": 8.33106803894043, "logits/real": 7.400674343109131, "logps/generated": -2617.16162109375, "logps/real": -444.24322509765625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -181.6017608642578, "rewards/margins": 177.15853881835938, "rewards/real": -4.443192958831787, "step": 4480 }, { "epoch": 1.44, "learning_rate": 2.8949863695626406e-07, "logits/generated": 7.472479820251465, "logits/real": 6.314203262329102, "logps/generated": -2646.64990234375, "logps/real": -443.6861267089844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -191.76394653320312, "rewards/margins": 188.41502380371094, "rewards/real": -3.3489203453063965, "step": 4490 }, { "epoch": 1.44, "learning_rate": 2.889060092449923e-07, "logits/generated": 8.076459884643555, "logits/real": 7.433452606201172, "logps/generated": -2658.54248046875, "logps/real": -408.8074951171875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -191.07286071777344, "rewards/margins": 187.4102783203125, "rewards/real": -3.6625759601593018, "step": 4500 }, { "epoch": 1.44, "learning_rate": 2.883133815337205e-07, "logits/generated": 6.999411106109619, "logits/real": 6.942862510681152, "logps/generated": -2863.99853515625, "logps/real": -452.23614501953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -208.03857421875, "rewards/margins": 203.42771911621094, "rewards/real": -4.610849857330322, "step": 4510 }, { "epoch": 1.45, "learning_rate": 2.877207538224487e-07, "logits/generated": 7.795876502990723, "logits/real": 7.5247802734375, "logps/generated": -2747.62060546875, "logps/real": -441.92431640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -193.22499084472656, "rewards/margins": 189.99136352539062, "rewards/real": -3.23362398147583, "step": 4520 }, { "epoch": 1.45, "learning_rate": 2.871281261111769e-07, "logits/generated": 7.909980773925781, "logits/real": 7.120866298675537, "logps/generated": -2779.100341796875, "logps/real": -423.2982482910156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -202.66387939453125, "rewards/margins": 199.01553344726562, "rewards/real": -3.6483471393585205, "step": 4530 }, { "epoch": 1.45, "learning_rate": 2.8653549839990516e-07, "logits/generated": 7.661166191101074, "logits/real": 7.883735656738281, "logps/generated": -2676.65087890625, "logps/real": -412.61376953125, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/generated": -191.3967742919922, "rewards/margins": 188.47116088867188, "rewards/real": -2.925626277923584, "step": 4540 }, { "epoch": 1.46, "learning_rate": 2.8594287068863335e-07, "logits/generated": 7.539450645446777, "logits/real": 6.881582736968994, "logps/generated": -2565.171142578125, "logps/real": -471.1728515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -181.65054321289062, "rewards/margins": 178.79022216796875, "rewards/real": -2.8603320121765137, "step": 4550 }, { "epoch": 1.46, "learning_rate": 2.853502429773616e-07, "logits/generated": 7.967037200927734, "logits/real": 7.338030815124512, "logps/generated": -2603.243408203125, "logps/real": -449.6656188964844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -188.92459106445312, "rewards/margins": 185.304931640625, "rewards/real": -3.6196701526641846, "step": 4560 }, { "epoch": 1.46, "learning_rate": 2.8475761526608984e-07, "logits/generated": 7.325234413146973, "logits/real": 7.4631781578063965, "logps/generated": -2588.20068359375, "logps/real": -465.1485290527344, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/generated": -180.02182006835938, "rewards/margins": 173.5731658935547, "rewards/real": -6.448664665222168, "step": 4570 }, { "epoch": 1.47, "learning_rate": 2.84164987554818e-07, "logits/generated": 6.864066123962402, "logits/real": 6.22031831741333, "logps/generated": -2175.380615234375, "logps/real": -479.5748596191406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -143.86856079101562, "rewards/margins": 137.9842071533203, "rewards/real": -5.884347915649414, "step": 4580 }, { "epoch": 1.47, "learning_rate": 2.8357235984354627e-07, "logits/generated": 7.411283016204834, "logits/real": 7.3131818771362305, "logps/generated": -2320.52978515625, "logps/real": -552.3718872070312, "loss": 0.02, "rewards/accuracies": 1.0, "rewards/generated": -155.20518493652344, "rewards/margins": 141.52383422851562, "rewards/real": -13.681355476379395, "step": 4590 }, { "epoch": 1.47, "learning_rate": 2.829797321322745e-07, "logits/generated": 6.938172340393066, "logits/real": 7.293942928314209, "logps/generated": -2462.74462890625, "logps/real": -593.510986328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -169.77130126953125, "rewards/margins": 154.20863342285156, "rewards/real": -15.562655448913574, "step": 4600 }, { "epoch": 1.48, "learning_rate": 2.823871044210027e-07, "logits/generated": 6.751898765563965, "logits/real": 7.314972877502441, "logps/generated": -2624.970458984375, "logps/real": -549.8123779296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -184.32334899902344, "rewards/margins": 173.7210235595703, "rewards/real": -10.602333068847656, "step": 4610 }, { "epoch": 1.48, "learning_rate": 2.8179447670973094e-07, "logits/generated": 7.3045806884765625, "logits/real": 7.236529350280762, "logps/generated": -2422.36669921875, "logps/real": -522.1522216796875, "loss": 0.0326, "rewards/accuracies": 0.987500011920929, "rewards/generated": -166.92813110351562, "rewards/margins": 155.78106689453125, "rewards/real": -11.147059440612793, "step": 4620 }, { "epoch": 1.48, "learning_rate": 2.812018489984592e-07, "logits/generated": 7.0456695556640625, "logits/real": 7.839809417724609, "logps/generated": -2818.94482421875, "logps/real": -550.6907958984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -202.38600158691406, "rewards/margins": 189.027099609375, "rewards/real": -13.358906745910645, "step": 4630 }, { "epoch": 1.48, "learning_rate": 2.8060922128718737e-07, "logits/generated": 6.0119524002075195, "logits/real": 7.09613561630249, "logps/generated": -2804.71826171875, "logps/real": -507.01739501953125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -201.51409912109375, "rewards/margins": 187.14071655273438, "rewards/real": -14.37339973449707, "step": 4640 }, { "epoch": 1.49, "learning_rate": 2.800165935759156e-07, "logits/generated": 7.277548313140869, "logits/real": 7.270073890686035, "logps/generated": -2621.72509765625, "logps/real": -555.7769775390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -189.48886108398438, "rewards/margins": 174.40721130371094, "rewards/real": -15.081639289855957, "step": 4650 }, { "epoch": 1.49, "learning_rate": 2.7942396586464385e-07, "logits/generated": 7.730276584625244, "logits/real": 7.625467777252197, "logps/generated": -2829.56201171875, "logps/real": -579.9598388671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -202.27479553222656, "rewards/margins": 188.8174285888672, "rewards/real": -13.457348823547363, "step": 4660 }, { "epoch": 1.49, "learning_rate": 2.7883133815337204e-07, "logits/generated": 6.669744968414307, "logits/real": 7.450567722320557, "logps/generated": -2698.950439453125, "logps/real": -496.260009765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -195.274658203125, "rewards/margins": 181.64817810058594, "rewards/real": -13.626482963562012, "step": 4670 }, { "epoch": 1.5, "learning_rate": 2.7823871044210023e-07, "logits/generated": 7.455049991607666, "logits/real": 7.259799957275391, "logps/generated": -2254.56103515625, "logps/real": -435.63458251953125, "loss": 0.1565, "rewards/accuracies": 1.0, "rewards/generated": -148.1844940185547, "rewards/margins": 143.66677856445312, "rewards/real": -4.517739295959473, "step": 4680 }, { "epoch": 1.5, "learning_rate": 2.776460827308285e-07, "logits/generated": 7.497802734375, "logits/real": 7.716940879821777, "logps/generated": -2246.366455078125, "logps/real": -465.3155822753906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -151.08238220214844, "rewards/margins": 144.60714721679688, "rewards/real": -6.475224494934082, "step": 4690 }, { "epoch": 1.5, "learning_rate": 2.7705345501955666e-07, "logits/generated": 7.054473876953125, "logits/real": 7.6123785972595215, "logps/generated": -2186.915771484375, "logps/real": -430.53167724609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -143.75955200195312, "rewards/margins": 137.17955017089844, "rewards/real": -6.57998514175415, "step": 4700 }, { "epoch": 1.51, "learning_rate": 2.764608273082849e-07, "logits/generated": 7.474231719970703, "logits/real": 6.612374305725098, "logps/generated": -2001.58203125, "logps/real": -440.1583557128906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -128.44528198242188, "rewards/margins": 125.77547454833984, "rewards/real": -2.669783115386963, "step": 4710 }, { "epoch": 1.51, "learning_rate": 2.7586819959701315e-07, "logits/generated": 6.0893378257751465, "logits/real": 6.608295440673828, "logps/generated": -2189.09130859375, "logps/real": -398.5833435058594, "loss": 0.0339, "rewards/accuracies": 1.0, "rewards/generated": -140.1101837158203, "rewards/margins": 137.45913696289062, "rewards/real": -2.651064157485962, "step": 4720 }, { "epoch": 1.51, "learning_rate": 2.7527557188574134e-07, "logits/generated": 7.037393093109131, "logits/real": 6.2926740646362305, "logps/generated": -1850.936767578125, "logps/real": -426.7577209472656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -114.68925476074219, "rewards/margins": 112.43072509765625, "rewards/real": -2.258530378341675, "step": 4730 }, { "epoch": 1.52, "learning_rate": 2.746829441744696e-07, "logits/generated": 7.912131309509277, "logits/real": 6.785031795501709, "logps/generated": -2197.58349609375, "logps/real": -454.2235412597656, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/generated": -143.87266540527344, "rewards/margins": 140.72537231445312, "rewards/real": -3.147298574447632, "step": 4740 }, { "epoch": 1.52, "learning_rate": 2.740903164631978e-07, "logits/generated": 6.640511512756348, "logits/real": 7.748732089996338, "logps/generated": -2188.1953125, "logps/real": -411.6830139160156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -142.4846649169922, "rewards/margins": 139.3546600341797, "rewards/real": -3.129995584487915, "step": 4750 }, { "epoch": 1.52, "learning_rate": 2.73497688751926e-07, "logits/generated": 6.979972839355469, "logits/real": 7.557955265045166, "logps/generated": -2218.16357421875, "logps/real": -481.06951904296875, "loss": 0.0601, "rewards/accuracies": 1.0, "rewards/generated": -147.9777069091797, "rewards/margins": 139.67884826660156, "rewards/real": -8.298871994018555, "step": 4760 }, { "epoch": 1.53, "learning_rate": 2.7290506104065425e-07, "logits/generated": 7.050989627838135, "logits/real": 6.6954755783081055, "logps/generated": -2262.67724609375, "logps/real": -483.3704528808594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -149.915771484375, "rewards/margins": 143.09829711914062, "rewards/real": -6.8174591064453125, "step": 4770 }, { "epoch": 1.53, "learning_rate": 2.723124333293825e-07, "logits/generated": 7.4321112632751465, "logits/real": 7.436760902404785, "logps/generated": -2241.14990234375, "logps/real": -474.9242248535156, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -145.80288696289062, "rewards/margins": 137.92971801757812, "rewards/real": -7.873185157775879, "step": 4780 }, { "epoch": 1.53, "learning_rate": 2.717198056181107e-07, "logits/generated": 6.777296543121338, "logits/real": 7.176546573638916, "logps/generated": -2272.62744140625, "logps/real": -433.69305419921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -156.0865020751953, "rewards/margins": 150.05429077148438, "rewards/real": -6.032193183898926, "step": 4790 }, { "epoch": 1.54, "learning_rate": 2.711271779068389e-07, "logits/generated": 6.658607482910156, "logits/real": 7.5594000816345215, "logps/generated": -2327.70166015625, "logps/real": -484.10516357421875, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/generated": -154.7298583984375, "rewards/margins": 146.73245239257812, "rewards/real": -7.9973955154418945, "step": 4800 }, { "epoch": 1.54, "learning_rate": 2.7053455019556716e-07, "logits/generated": 7.463006019592285, "logits/real": 7.302262783050537, "logps/generated": -2172.80712890625, "logps/real": -473.29510498046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -144.82408142089844, "rewards/margins": 135.94667053222656, "rewards/real": -8.87741756439209, "step": 4810 }, { "epoch": 1.54, "learning_rate": 2.6994192248429535e-07, "logits/generated": 6.971390724182129, "logits/real": 6.913613796234131, "logps/generated": -2189.219482421875, "logps/real": -489.13812255859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -142.93511962890625, "rewards/margins": 133.18112182617188, "rewards/real": -9.75399398803711, "step": 4820 }, { "epoch": 1.55, "learning_rate": 2.693492947730236e-07, "logits/generated": 6.997686862945557, "logits/real": 7.718747138977051, "logps/generated": -2337.818359375, "logps/real": -439.18890380859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -156.95449829101562, "rewards/margins": 149.87142944335938, "rewards/real": -7.0830559730529785, "step": 4830 }, { "epoch": 1.55, "learning_rate": 2.6875666706175184e-07, "logits/generated": 7.199464321136475, "logits/real": 7.205402374267578, "logps/generated": -2197.54248046875, "logps/real": -481.455078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -147.22976684570312, "rewards/margins": 138.8229217529297, "rewards/real": -8.406850814819336, "step": 4840 }, { "epoch": 1.55, "learning_rate": 2.6816403935048e-07, "logits/generated": 7.755878448486328, "logits/real": 7.682600498199463, "logps/generated": -2308.74951171875, "logps/real": -422.48333740234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -157.76828002929688, "rewards/margins": 151.66796875, "rewards/real": -6.100300312042236, "step": 4850 }, { "epoch": 1.56, "learning_rate": 2.675714116392082e-07, "logits/generated": 6.853369235992432, "logits/real": 6.925137996673584, "logps/generated": -2307.2275390625, "logps/real": -499.6365661621094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -153.62332153320312, "rewards/margins": 146.478759765625, "rewards/real": -7.144566535949707, "step": 4860 }, { "epoch": 1.56, "learning_rate": 2.6697878392793646e-07, "logits/generated": 7.5412421226501465, "logits/real": 7.760954856872559, "logps/generated": -2252.853759765625, "logps/real": -503.976318359375, "loss": 0.0424, "rewards/accuracies": 0.987500011920929, "rewards/generated": -154.0491180419922, "rewards/margins": 146.37046813964844, "rewards/real": -7.678671360015869, "step": 4870 }, { "epoch": 1.56, "learning_rate": 2.6638615621666465e-07, "logits/generated": 7.391852378845215, "logits/real": 6.3344221115112305, "logps/generated": -2129.658935546875, "logps/real": -482.0923767089844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -139.52426147460938, "rewards/margins": 134.957275390625, "rewards/real": -4.567004203796387, "step": 4880 }, { "epoch": 1.56, "learning_rate": 2.657935285053929e-07, "logits/generated": 7.258772850036621, "logits/real": 6.633763790130615, "logps/generated": -1791.5784912109375, "logps/real": -404.0260009765625, "loss": 0.0421, "rewards/accuracies": 1.0, "rewards/generated": -104.17185974121094, "rewards/margins": 103.7143325805664, "rewards/real": -0.4575316905975342, "step": 4890 }, { "epoch": 1.57, "learning_rate": 2.6520090079412113e-07, "logits/generated": 7.232030391693115, "logits/real": 6.52506160736084, "logps/generated": -1824.328369140625, "logps/real": -355.64373779296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -109.07852935791016, "rewards/margins": 114.36016845703125, "rewards/real": 5.281638145446777, "step": 4900 }, { "epoch": 1.57, "learning_rate": 2.646082730828493e-07, "logits/generated": 7.473818778991699, "logits/real": 6.791534423828125, "logps/generated": -1847.702880859375, "logps/real": -350.39422607421875, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/generated": -108.6343002319336, "rewards/margins": 111.06181335449219, "rewards/real": 2.4275214672088623, "step": 4910 }, { "epoch": 1.57, "learning_rate": 2.6401564537157756e-07, "logits/generated": 7.597383975982666, "logits/real": 7.524439334869385, "logps/generated": -2059.44970703125, "logps/real": -344.98089599609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -133.8095703125, "rewards/margins": 137.49562072753906, "rewards/real": 3.6860454082489014, "step": 4920 }, { "epoch": 1.58, "learning_rate": 2.634230176603058e-07, "logits/generated": 6.819690704345703, "logits/real": 6.521054267883301, "logps/generated": -2235.029296875, "logps/real": -382.4305725097656, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -143.39163208007812, "rewards/margins": 145.2124481201172, "rewards/real": 1.8208144903182983, "step": 4930 }, { "epoch": 1.58, "learning_rate": 2.62830389949034e-07, "logits/generated": 7.091064453125, "logits/real": 6.902243137359619, "logps/generated": -2088.286865234375, "logps/real": -348.2473449707031, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -136.1367950439453, "rewards/margins": 138.35032653808594, "rewards/real": 2.213547706604004, "step": 4940 }, { "epoch": 1.58, "learning_rate": 2.6223776223776223e-07, "logits/generated": 8.405268669128418, "logits/real": 7.483437538146973, "logps/generated": -2317.490478515625, "logps/real": -453.6044006347656, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/generated": -153.71353149414062, "rewards/margins": 153.86129760742188, "rewards/real": 0.14775371551513672, "step": 4950 }, { "epoch": 1.59, "learning_rate": 2.616451345264905e-07, "logits/generated": 7.668790340423584, "logits/real": 7.184802055358887, "logps/generated": -2167.385009765625, "logps/real": -438.9215393066406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -143.56048583984375, "rewards/margins": 140.9470672607422, "rewards/real": -2.613417387008667, "step": 4960 }, { "epoch": 1.59, "learning_rate": 2.6105250681521866e-07, "logits/generated": 7.745580196380615, "logits/real": 6.172548770904541, "logps/generated": -2122.049072265625, "logps/real": -451.3775329589844, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/generated": -139.67771911621094, "rewards/margins": 137.90335083007812, "rewards/real": -1.774374008178711, "step": 4970 }, { "epoch": 1.59, "learning_rate": 2.604598791039469e-07, "logits/generated": 8.271980285644531, "logits/real": 7.178069114685059, "logps/generated": -2186.977294921875, "logps/real": -548.88427734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -144.94638061523438, "rewards/margins": 138.5152130126953, "rewards/real": -6.431172847747803, "step": 4980 }, { "epoch": 1.6, "learning_rate": 2.5986725139267515e-07, "logits/generated": 7.306417942047119, "logits/real": 7.827292442321777, "logps/generated": -2435.127685546875, "logps/real": -443.4646911621094, "loss": 0.0276, "rewards/accuracies": 1.0, "rewards/generated": -162.09469604492188, "rewards/margins": 156.48226928710938, "rewards/real": -5.612419128417969, "step": 4990 }, { "epoch": 1.6, "learning_rate": 2.5927462368140334e-07, "logits/generated": 7.162152290344238, "logits/real": 4.843944072723389, "logps/generated": -2044.205810546875, "logps/real": -459.22137451171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -127.7347183227539, "rewards/margins": 126.4423599243164, "rewards/real": -1.2923494577407837, "step": 5000 }, { "epoch": 1.6, "learning_rate": 2.586819959701316e-07, "logits/generated": 7.989349365234375, "logits/real": 7.042845726013184, "logps/generated": -2141.964599609375, "logps/real": -502.6659240722656, "loss": 0.0097, "rewards/accuracies": 1.0, "rewards/generated": -140.75088500976562, "rewards/margins": 134.47994995117188, "rewards/real": -6.270931243896484, "step": 5010 }, { "epoch": 1.61, "learning_rate": 2.580893682588598e-07, "logits/generated": 7.6411895751953125, "logits/real": 7.9897613525390625, "logps/generated": -2526.83984375, "logps/real": -510.6293029785156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -173.56887817382812, "rewards/margins": 163.55758666992188, "rewards/real": -10.011324882507324, "step": 5020 }, { "epoch": 1.61, "learning_rate": 2.57496740547588e-07, "logits/generated": 7.8199968338012695, "logits/real": 7.815465450286865, "logps/generated": -2247.60693359375, "logps/real": -504.68585205078125, "loss": 0.0034, "rewards/accuracies": 0.987500011920929, "rewards/generated": -151.43939208984375, "rewards/margins": 141.21299743652344, "rewards/real": -10.22639274597168, "step": 5030 }, { "epoch": 1.61, "learning_rate": 2.569041128363162e-07, "logits/generated": 8.091730117797852, "logits/real": 7.535861968994141, "logps/generated": -2376.37548828125, "logps/real": -485.2613830566406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -164.04910278320312, "rewards/margins": 156.67401123046875, "rewards/real": -7.375074863433838, "step": 5040 }, { "epoch": 1.62, "learning_rate": 2.563114851250444e-07, "logits/generated": 7.85870885848999, "logits/real": 7.771730899810791, "logps/generated": -2381.43505859375, "logps/real": -499.9580078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -161.9745330810547, "rewards/margins": 151.8955078125, "rewards/real": -10.079025268554688, "step": 5050 }, { "epoch": 1.62, "learning_rate": 2.5571885741377263e-07, "logits/generated": 8.072257995605469, "logits/real": 7.819342136383057, "logps/generated": -2683.27001953125, "logps/real": -472.4505310058594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -190.0775604248047, "rewards/margins": 181.41465759277344, "rewards/real": -8.662893295288086, "step": 5060 }, { "epoch": 1.62, "learning_rate": 2.5512622970250087e-07, "logits/generated": 7.998743534088135, "logits/real": 7.717855930328369, "logps/generated": -2780.219970703125, "logps/real": -460.5574645996094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -191.01040649414062, "rewards/margins": 182.91395568847656, "rewards/real": -8.096458435058594, "step": 5070 }, { "epoch": 1.63, "learning_rate": 2.5453360199122906e-07, "logits/generated": 8.046201705932617, "logits/real": 7.556464195251465, "logps/generated": -2279.37158203125, "logps/real": -512.0670776367188, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -154.32720947265625, "rewards/margins": 143.8090057373047, "rewards/real": -10.518208503723145, "step": 5080 }, { "epoch": 1.63, "learning_rate": 2.539409742799573e-07, "logits/generated": 7.850809574127197, "logits/real": 7.626922607421875, "logps/generated": -2212.55810546875, "logps/real": -517.0748901367188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -148.17117309570312, "rewards/margins": 137.22512817382812, "rewards/real": -10.946049690246582, "step": 5090 }, { "epoch": 1.63, "learning_rate": 2.5334834656868554e-07, "logits/generated": 7.53781270980835, "logits/real": 7.264245510101318, "logps/generated": -2217.155517578125, "logps/real": -443.34649658203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -147.2423858642578, "rewards/margins": 140.56182861328125, "rewards/real": -6.6805572509765625, "step": 5100 }, { "epoch": 1.64, "learning_rate": 2.5275571885741373e-07, "logits/generated": 8.794476509094238, "logits/real": 7.927506446838379, "logps/generated": -2229.360595703125, "logps/real": -541.6312255859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -153.00613403320312, "rewards/margins": 141.44790649414062, "rewards/real": -11.558216094970703, "step": 5110 }, { "epoch": 1.64, "learning_rate": 2.52163091146142e-07, "logits/generated": 8.627805709838867, "logits/real": 7.943991184234619, "logps/generated": -2598.455810546875, "logps/real": -535.0739135742188, "loss": 0.0337, "rewards/accuracies": 1.0, "rewards/generated": -184.5675506591797, "rewards/margins": 175.76707458496094, "rewards/real": -8.80048656463623, "step": 5120 }, { "epoch": 1.64, "learning_rate": 2.515704634348702e-07, "logits/generated": 8.627435684204102, "logits/real": 7.940188407897949, "logps/generated": -2737.425537109375, "logps/real": -540.2377319335938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -195.58572387695312, "rewards/margins": 181.24163818359375, "rewards/real": -14.344070434570312, "step": 5130 }, { "epoch": 1.64, "learning_rate": 2.509778357235984e-07, "logits/generated": 7.8834028244018555, "logits/real": 8.102129936218262, "logps/generated": -3115.322265625, "logps/real": -503.1044006347656, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/generated": -233.7810516357422, "rewards/margins": 221.2608642578125, "rewards/real": -12.520197868347168, "step": 5140 }, { "epoch": 1.65, "learning_rate": 2.5038520801232665e-07, "logits/generated": 8.645820617675781, "logits/real": 7.603280067443848, "logps/generated": -3009.056396484375, "logps/real": -626.7496948242188, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/generated": -226.4141387939453, "rewards/margins": 204.79544067382812, "rewards/real": -21.61870574951172, "step": 5150 }, { "epoch": 1.65, "learning_rate": 2.497925803010549e-07, "logits/generated": 7.811500549316406, "logits/real": 7.1565656661987305, "logps/generated": -3015.593505859375, "logps/real": -629.2822265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -231.2169189453125, "rewards/margins": 208.8342742919922, "rewards/real": -22.382633209228516, "step": 5160 }, { "epoch": 1.65, "learning_rate": 2.491999525897831e-07, "logits/generated": 7.861269474029541, "logits/real": 7.1248459815979, "logps/generated": -2768.34814453125, "logps/real": -599.864990234375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/generated": -210.48867797851562, "rewards/margins": 188.94786071777344, "rewards/real": -21.540815353393555, "step": 5170 }, { "epoch": 1.66, "learning_rate": 2.486073248785113e-07, "logits/generated": 6.7409563064575195, "logits/real": 7.79912805557251, "logps/generated": -2849.6728515625, "logps/real": -656.2620849609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -210.5825653076172, "rewards/margins": 182.74166870117188, "rewards/real": -27.840911865234375, "step": 5180 }, { "epoch": 1.66, "learning_rate": 2.480146971672395e-07, "logits/generated": 7.318844795227051, "logits/real": 8.106212615966797, "logps/generated": -3591.212890625, "logps/real": -670.74560546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -287.09063720703125, "rewards/margins": 260.7122802734375, "rewards/real": -26.37836265563965, "step": 5190 }, { "epoch": 1.66, "learning_rate": 2.4742206945596775e-07, "logits/generated": 7.775576591491699, "logits/real": 6.9056220054626465, "logps/generated": -2697.70751953125, "logps/real": -661.2874755859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -195.13101196289062, "rewards/margins": 170.59951782226562, "rewards/real": -24.53151512145996, "step": 5200 }, { "epoch": 1.67, "learning_rate": 2.46829441744696e-07, "logits/generated": 7.6773576736450195, "logits/real": 7.135867118835449, "logps/generated": -2963.53662109375, "logps/real": -649.7005615234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -221.45870971679688, "rewards/margins": 196.55508422851562, "rewards/real": -24.903636932373047, "step": 5210 }, { "epoch": 1.67, "learning_rate": 2.462368140334242e-07, "logits/generated": 8.583096504211426, "logits/real": 7.516268253326416, "logps/generated": -3336.967529296875, "logps/real": -641.0547485351562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -256.8563232421875, "rewards/margins": 233.74984741210938, "rewards/real": -23.106481552124023, "step": 5220 }, { "epoch": 1.67, "learning_rate": 2.456441863221524e-07, "logits/generated": 7.5980939865112305, "logits/real": 7.531899929046631, "logps/generated": -3121.919677734375, "logps/real": -648.8644409179688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -235.12094116210938, "rewards/margins": 209.6457061767578, "rewards/real": -25.475238800048828, "step": 5230 }, { "epoch": 1.68, "learning_rate": 2.4505155861088067e-07, "logits/generated": 7.146268367767334, "logits/real": 6.935011386871338, "logps/generated": -3119.0625, "logps/real": -633.248046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -236.9759979248047, "rewards/margins": 213.655029296875, "rewards/real": -23.320941925048828, "step": 5240 }, { "epoch": 1.68, "learning_rate": 2.4445893089960885e-07, "logits/generated": 8.395730972290039, "logits/real": 7.060798645019531, "logps/generated": -3292.80322265625, "logps/real": -698.2159423828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -249.26412963867188, "rewards/margins": 225.86160278320312, "rewards/real": -23.402545928955078, "step": 5250 }, { "epoch": 1.68, "learning_rate": 2.4386630318833704e-07, "logits/generated": 7.52727746963501, "logits/real": 7.174862861633301, "logps/generated": -3100.005859375, "logps/real": -622.499267578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -237.8625946044922, "rewards/margins": 215.4132080078125, "rewards/real": -22.449413299560547, "step": 5260 }, { "epoch": 1.69, "learning_rate": 2.432736754770653e-07, "logits/generated": 7.919116973876953, "logits/real": 7.435271263122559, "logps/generated": -3144.9716796875, "logps/real": -677.0125732421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -232.2787628173828, "rewards/margins": 205.0349578857422, "rewards/real": -27.243799209594727, "step": 5270 }, { "epoch": 1.69, "learning_rate": 2.4268104776579353e-07, "logits/generated": 7.489559173583984, "logits/real": 6.947527885437012, "logps/generated": -2852.969482421875, "logps/real": -653.7357788085938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -212.3769073486328, "rewards/margins": 189.03085327148438, "rewards/real": -23.34604263305664, "step": 5280 }, { "epoch": 1.69, "learning_rate": 2.420884200545217e-07, "logits/generated": 7.307450294494629, "logits/real": 7.613791465759277, "logps/generated": -3120.351806640625, "logps/real": -616.216552734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -234.49575805664062, "rewards/margins": 210.36746215820312, "rewards/real": -24.12826919555664, "step": 5290 }, { "epoch": 1.7, "learning_rate": 2.4149579234324996e-07, "logits/generated": 7.608119010925293, "logits/real": 7.531047821044922, "logps/generated": -3130.969970703125, "logps/real": -580.1384887695312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -239.9478759765625, "rewards/margins": 219.30892944335938, "rewards/real": -20.638933181762695, "step": 5300 }, { "epoch": 1.7, "learning_rate": 2.409031646319782e-07, "logits/generated": 7.750949859619141, "logits/real": 7.5947136878967285, "logps/generated": -3124.27099609375, "logps/real": -615.40673828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -234.768798828125, "rewards/margins": 212.1469268798828, "rewards/real": -22.621868133544922, "step": 5310 }, { "epoch": 1.7, "learning_rate": 2.403105369207064e-07, "logits/generated": 7.8530755043029785, "logits/real": 8.569993019104004, "logps/generated": -3281.10498046875, "logps/real": -599.451171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -250.39797973632812, "rewards/margins": 226.7510528564453, "rewards/real": -23.646942138671875, "step": 5320 }, { "epoch": 1.71, "learning_rate": 2.3971790920943463e-07, "logits/generated": 8.281180381774902, "logits/real": 7.612637519836426, "logps/generated": -3156.73974609375, "logps/real": -612.8961181640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -243.5134735107422, "rewards/margins": 222.10971069335938, "rewards/real": -21.40375518798828, "step": 5330 }, { "epoch": 1.71, "learning_rate": 2.3912528149816287e-07, "logits/generated": 8.176630020141602, "logits/real": 8.179689407348633, "logps/generated": -3106.628662109375, "logps/real": -629.3592529296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -233.0629119873047, "rewards/margins": 206.72561645507812, "rewards/real": -26.33731460571289, "step": 5340 }, { "epoch": 1.71, "learning_rate": 2.3853265378689106e-07, "logits/generated": 7.556497097015381, "logits/real": 6.728703498840332, "logps/generated": -3309.32177734375, "logps/real": -680.9801635742188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -258.4127502441406, "rewards/margins": 233.92672729492188, "rewards/real": -24.486013412475586, "step": 5350 }, { "epoch": 1.72, "learning_rate": 2.3794002607561928e-07, "logits/generated": 8.364129066467285, "logits/real": 6.966351509094238, "logps/generated": -2823.2734375, "logps/real": -687.4066162109375, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -206.1614990234375, "rewards/margins": 183.3925323486328, "rewards/real": -22.768964767456055, "step": 5360 }, { "epoch": 1.72, "learning_rate": 2.373473983643475e-07, "logits/generated": 7.549553871154785, "logits/real": 8.134824752807617, "logps/generated": -3176.70458984375, "logps/real": -496.6285705566406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -246.53475952148438, "rewards/margins": 229.8188934326172, "rewards/real": -16.715877532958984, "step": 5370 }, { "epoch": 1.72, "learning_rate": 2.3675477065307573e-07, "logits/generated": 8.620078086853027, "logits/real": 7.244509220123291, "logps/generated": -3010.403076171875, "logps/real": -672.1813354492188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -220.75015258789062, "rewards/margins": 197.86483764648438, "rewards/real": -22.885311126708984, "step": 5380 }, { "epoch": 1.72, "learning_rate": 2.3616214294180395e-07, "logits/generated": 8.288283348083496, "logits/real": 7.4635725021362305, "logps/generated": -3042.53466796875, "logps/real": -601.9110107421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -229.18014526367188, "rewards/margins": 207.16043090820312, "rewards/real": -22.0197696685791, "step": 5390 }, { "epoch": 1.73, "learning_rate": 2.3556951523053216e-07, "logits/generated": 8.386857032775879, "logits/real": 8.175043106079102, "logps/generated": -3099.9921875, "logps/real": -595.1871337890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -235.2064208984375, "rewards/margins": 213.4975128173828, "rewards/real": -21.70889663696289, "step": 5400 }, { "epoch": 1.73, "learning_rate": 2.349768875192604e-07, "logits/generated": 8.30919361114502, "logits/real": 7.4633989334106445, "logps/generated": -3065.841552734375, "logps/real": -629.7460327148438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -229.97470092773438, "rewards/margins": 207.52880859375, "rewards/real": -22.445880889892578, "step": 5410 }, { "epoch": 1.73, "learning_rate": 2.3438425980798862e-07, "logits/generated": 8.085564613342285, "logits/real": 7.474585056304932, "logps/generated": -2975.020263671875, "logps/real": -662.0523681640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -223.0869903564453, "rewards/margins": 200.55972290039062, "rewards/real": -22.527267456054688, "step": 5420 }, { "epoch": 1.74, "learning_rate": 2.3379163209671684e-07, "logits/generated": 8.330001831054688, "logits/real": 8.116881370544434, "logps/generated": -3250.13330078125, "logps/real": -636.6494140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -243.0767059326172, "rewards/margins": 219.31173706054688, "rewards/real": -23.764951705932617, "step": 5430 }, { "epoch": 1.74, "learning_rate": 2.3319900438544505e-07, "logits/generated": 7.148618221282959, "logits/real": 7.733752250671387, "logps/generated": -3017.408203125, "logps/real": -621.2754516601562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -230.7698211669922, "rewards/margins": 208.3310089111328, "rewards/real": -22.438800811767578, "step": 5440 }, { "epoch": 1.74, "learning_rate": 2.3260637667417327e-07, "logits/generated": 8.512117385864258, "logits/real": 7.297312259674072, "logps/generated": -3151.544189453125, "logps/real": -578.3646240234375, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/generated": -242.8025360107422, "rewards/margins": 224.0908660888672, "rewards/real": -18.711660385131836, "step": 5450 }, { "epoch": 1.75, "learning_rate": 2.3201374896290148e-07, "logits/generated": 8.679183959960938, "logits/real": 7.714252471923828, "logps/generated": -3413.55615234375, "logps/real": -587.8182983398438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -268.77392578125, "rewards/margins": 248.6949005126953, "rewards/real": -20.079051971435547, "step": 5460 }, { "epoch": 1.75, "learning_rate": 2.3142112125162973e-07, "logits/generated": 8.211167335510254, "logits/real": 6.763476371765137, "logps/generated": -3031.783935546875, "logps/real": -625.2652587890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -234.4883270263672, "rewards/margins": 214.18014526367188, "rewards/real": -20.308183670043945, "step": 5470 }, { "epoch": 1.75, "learning_rate": 2.3082849354035794e-07, "logits/generated": 9.495027542114258, "logits/real": 7.7378668785095215, "logps/generated": -2957.58642578125, "logps/real": -614.4080200195312, "loss": 0.0068, "rewards/accuracies": 0.987500011920929, "rewards/generated": -227.18881225585938, "rewards/margins": 210.0166015625, "rewards/real": -17.1722412109375, "step": 5480 }, { "epoch": 1.76, "learning_rate": 2.3023586582908616e-07, "logits/generated": 8.557390213012695, "logits/real": 8.64154052734375, "logps/generated": -3195.19091796875, "logps/real": -572.6459350585938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -239.6927490234375, "rewards/margins": 222.95663452148438, "rewards/real": -16.7360782623291, "step": 5490 }, { "epoch": 1.76, "learning_rate": 2.296432381178144e-07, "logits/generated": 8.498029708862305, "logits/real": 8.618393898010254, "logps/generated": -3579.38134765625, "logps/real": -548.2843017578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -281.05084228515625, "rewards/margins": 264.20513916015625, "rewards/real": -16.845714569091797, "step": 5500 }, { "epoch": 1.76, "learning_rate": 2.2905061040654261e-07, "logits/generated": 9.008249282836914, "logits/real": 7.633172512054443, "logps/generated": -3118.22412109375, "logps/real": -613.5589599609375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -238.03982543945312, "rewards/margins": 221.30911254882812, "rewards/real": -16.730731964111328, "step": 5510 }, { "epoch": 1.77, "learning_rate": 2.284579826952708e-07, "logits/generated": 9.092446327209473, "logits/real": 8.395269393920898, "logps/generated": -2320.1328125, "logps/real": -528.311279296875, "loss": 0.293, "rewards/accuracies": 1.0, "rewards/generated": -156.64822387695312, "rewards/margins": 145.42884826660156, "rewards/real": -11.219393730163574, "step": 5520 }, { "epoch": 1.77, "learning_rate": 2.2786535498399902e-07, "logits/generated": 8.890094757080078, "logits/real": 7.784165859222412, "logps/generated": -2147.289794921875, "logps/real": -516.3419189453125, "loss": 0.035, "rewards/accuracies": 1.0, "rewards/generated": -140.5802459716797, "rewards/margins": 132.5713348388672, "rewards/real": -8.008905410766602, "step": 5530 }, { "epoch": 1.77, "learning_rate": 2.2727272727272726e-07, "logits/generated": 8.879764556884766, "logits/real": 7.9011125564575195, "logps/generated": -2307.35546875, "logps/real": -531.7626953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -149.95156860351562, "rewards/margins": 139.312744140625, "rewards/real": -10.638823509216309, "step": 5540 }, { "epoch": 1.78, "learning_rate": 2.2668009956145548e-07, "logits/generated": 8.632640838623047, "logits/real": 8.157415390014648, "logps/generated": -2178.864501953125, "logps/real": -494.8309020996094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -140.9723663330078, "rewards/margins": 132.74716186523438, "rewards/real": -8.22519588470459, "step": 5550 }, { "epoch": 1.78, "learning_rate": 2.260874718501837e-07, "logits/generated": 8.62189769744873, "logits/real": 7.6192193031311035, "logps/generated": -2038.129638671875, "logps/real": -559.3242797851562, "loss": 0.0024, "rewards/accuracies": 0.987500011920929, "rewards/generated": -131.58273315429688, "rewards/margins": 119.15191650390625, "rewards/real": -12.430806159973145, "step": 5560 }, { "epoch": 1.78, "learning_rate": 2.2549484413891193e-07, "logits/generated": 7.637378692626953, "logits/real": 7.984638214111328, "logps/generated": -2333.95361328125, "logps/real": -512.4053344726562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -160.73377990722656, "rewards/margins": 149.98101806640625, "rewards/real": -10.752765655517578, "step": 5570 }, { "epoch": 1.79, "learning_rate": 2.2490221642764015e-07, "logits/generated": 8.901959419250488, "logits/real": 7.9762115478515625, "logps/generated": -2142.17626953125, "logps/real": -541.9774780273438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -140.19671630859375, "rewards/margins": 129.3114013671875, "rewards/real": -10.885316848754883, "step": 5580 }, { "epoch": 1.79, "learning_rate": 2.2430958871636836e-07, "logits/generated": 8.070945739746094, "logits/real": 7.642614841461182, "logps/generated": -2312.77001953125, "logps/real": -497.6708068847656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -150.230712890625, "rewards/margins": 138.83248901367188, "rewards/real": -11.398209571838379, "step": 5590 }, { "epoch": 1.79, "learning_rate": 2.237169610050966e-07, "logits/generated": 8.170433044433594, "logits/real": 8.008584022521973, "logps/generated": -2382.813720703125, "logps/real": -498.89691162109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -162.6418914794922, "rewards/margins": 152.89523315429688, "rewards/real": -9.746663093566895, "step": 5600 }, { "epoch": 1.8, "learning_rate": 2.231243332938248e-07, "logits/generated": 7.907675743103027, "logits/real": 7.1721906661987305, "logps/generated": -2078.1884765625, "logps/real": -509.00634765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -139.03396606445312, "rewards/margins": 127.56270599365234, "rewards/real": -11.4712495803833, "step": 5610 }, { "epoch": 1.8, "learning_rate": 2.22531705582553e-07, "logits/generated": 8.083907127380371, "logits/real": 8.157910346984863, "logps/generated": -2345.047119140625, "logps/real": -460.0767517089844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -157.96621704101562, "rewards/margins": 148.7850341796875, "rewards/real": -9.18117904663086, "step": 5620 }, { "epoch": 1.8, "learning_rate": 2.2193907787128125e-07, "logits/generated": 8.987273216247559, "logits/real": 7.652749061584473, "logps/generated": -2205.064697265625, "logps/real": -573.953125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/generated": -144.36585998535156, "rewards/margins": 131.2270965576172, "rewards/real": -13.138751029968262, "step": 5630 }, { "epoch": 1.8, "learning_rate": 2.2134645016000947e-07, "logits/generated": 8.303384780883789, "logits/real": 7.869443416595459, "logps/generated": -2317.821044921875, "logps/real": -492.75177001953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -152.119384765625, "rewards/margins": 141.45797729492188, "rewards/real": -10.661397933959961, "step": 5640 }, { "epoch": 1.81, "learning_rate": 2.2075382244873768e-07, "logits/generated": 8.932966232299805, "logits/real": 8.766980171203613, "logps/generated": -2427.252685546875, "logps/real": -509.83258056640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -164.4025115966797, "rewards/margins": 153.43563842773438, "rewards/real": -10.96688461303711, "step": 5650 }, { "epoch": 1.81, "learning_rate": 2.2016119473746592e-07, "logits/generated": 7.980035305023193, "logits/real": 6.403542995452881, "logps/generated": -2035.5621337890625, "logps/real": -584.7792358398438, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/generated": -132.69680786132812, "rewards/margins": 118.48189544677734, "rewards/real": -14.214925765991211, "step": 5660 }, { "epoch": 1.81, "learning_rate": 2.1956856702619414e-07, "logits/generated": 7.129973411560059, "logits/real": 6.217714309692383, "logps/generated": -2150.37646484375, "logps/real": -513.825927734375, "loss": 0.018, "rewards/accuracies": 1.0, "rewards/generated": -144.7529296875, "rewards/margins": 132.71566772460938, "rewards/real": -12.037263870239258, "step": 5670 }, { "epoch": 1.82, "learning_rate": 2.1897593931492236e-07, "logits/generated": 8.409619331359863, "logits/real": 7.088288307189941, "logps/generated": -2246.23486328125, "logps/real": -567.7935791015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -147.49758911132812, "rewards/margins": 134.64016723632812, "rewards/real": -12.857426643371582, "step": 5680 }, { "epoch": 1.82, "learning_rate": 2.183833116036506e-07, "logits/generated": 8.112942695617676, "logits/real": 7.156019687652588, "logps/generated": -2154.040283203125, "logps/real": -469.5445251464844, "loss": 0.0123, "rewards/accuracies": 0.987500011920929, "rewards/generated": -143.0413818359375, "rewards/margins": 134.4564208984375, "rewards/real": -8.584959030151367, "step": 5690 }, { "epoch": 1.82, "learning_rate": 2.1779068389237879e-07, "logits/generated": 7.150473117828369, "logits/real": 6.543522834777832, "logps/generated": -2193.310302734375, "logps/real": -487.41796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -141.04823303222656, "rewards/margins": 131.74331665039062, "rewards/real": -9.304906845092773, "step": 5700 }, { "epoch": 1.83, "learning_rate": 2.17198056181107e-07, "logits/generated": 8.712777137756348, "logits/real": 6.982832908630371, "logps/generated": -2179.80517578125, "logps/real": -521.1646118164062, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -138.62652587890625, "rewards/margins": 130.46339416503906, "rewards/real": -8.163132667541504, "step": 5710 }, { "epoch": 1.83, "learning_rate": 2.1660542846983524e-07, "logits/generated": 6.988243103027344, "logits/real": 6.833265781402588, "logps/generated": -2195.94580078125, "logps/real": -469.60223388671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -150.5059814453125, "rewards/margins": 142.09829711914062, "rewards/real": -8.407708168029785, "step": 5720 }, { "epoch": 1.83, "learning_rate": 2.1601280075856346e-07, "logits/generated": 7.87545108795166, "logits/real": 7.021292686462402, "logps/generated": -1960.963623046875, "logps/real": -521.0220336914062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -123.77034759521484, "rewards/margins": 113.6063232421875, "rewards/real": -10.164027214050293, "step": 5730 }, { "epoch": 1.84, "learning_rate": 2.1542017304729167e-07, "logits/generated": 8.276737213134766, "logits/real": 7.058943271636963, "logps/generated": -2148.73486328125, "logps/real": -496.4242248535156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -139.19473266601562, "rewards/margins": 130.7823028564453, "rewards/real": -8.412437438964844, "step": 5740 }, { "epoch": 1.84, "learning_rate": 2.1482754533601992e-07, "logits/generated": 7.07004451751709, "logits/real": 6.417170524597168, "logps/generated": -2173.640625, "logps/real": -507.6236267089844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -140.36526489257812, "rewards/margins": 129.7715606689453, "rewards/real": -10.593737602233887, "step": 5750 }, { "epoch": 1.84, "learning_rate": 2.1423491762474813e-07, "logits/generated": 7.203627109527588, "logits/real": 6.8025617599487305, "logps/generated": -2246.26708984375, "logps/real": -451.2135314941406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -143.71173095703125, "rewards/margins": 137.4036407470703, "rewards/real": -6.30810022354126, "step": 5760 }, { "epoch": 1.85, "learning_rate": 2.1364228991347635e-07, "logits/generated": 7.932003021240234, "logits/real": 6.227166652679443, "logps/generated": -2101.328857421875, "logps/real": -568.9916381835938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -136.2963409423828, "rewards/margins": 125.7856674194336, "rewards/real": -10.510665893554688, "step": 5770 }, { "epoch": 1.85, "learning_rate": 2.130496622022046e-07, "logits/generated": 6.748912811279297, "logits/real": 6.644819736480713, "logps/generated": -2270.43212890625, "logps/real": -464.3555603027344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -152.12002563476562, "rewards/margins": 142.7029266357422, "rewards/real": -9.417085647583008, "step": 5780 }, { "epoch": 1.85, "learning_rate": 2.1245703449093278e-07, "logits/generated": 7.538140773773193, "logits/real": 6.661645412445068, "logps/generated": -2224.94970703125, "logps/real": -500.4996643066406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -146.2611541748047, "rewards/margins": 137.1682891845703, "rewards/real": -9.092841148376465, "step": 5790 }, { "epoch": 1.86, "learning_rate": 2.11864406779661e-07, "logits/generated": 7.538527011871338, "logits/real": 7.197164058685303, "logps/generated": -2443.523681640625, "logps/real": -453.081298828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -162.47962951660156, "rewards/margins": 153.80459594726562, "rewards/real": -8.675023078918457, "step": 5800 }, { "epoch": 1.86, "learning_rate": 2.1127177906838923e-07, "logits/generated": 8.245162963867188, "logits/real": 6.768731117248535, "logps/generated": -2411.24951171875, "logps/real": -494.40924072265625, "loss": 0.0201, "rewards/accuracies": 1.0, "rewards/generated": -165.36782836914062, "rewards/margins": 158.23434448242188, "rewards/real": -7.133492946624756, "step": 5810 }, { "epoch": 1.86, "learning_rate": 2.1067915135711745e-07, "logits/generated": 7.780963897705078, "logits/real": 6.986047267913818, "logps/generated": -2392.13427734375, "logps/real": -479.14288330078125, "loss": 0.0313, "rewards/accuracies": 1.0, "rewards/generated": -164.5125732421875, "rewards/margins": 153.99156188964844, "rewards/real": -10.521004676818848, "step": 5820 }, { "epoch": 1.87, "learning_rate": 2.1008652364584567e-07, "logits/generated": 8.22807502746582, "logits/real": 6.6075239181518555, "logps/generated": -2355.71630859375, "logps/real": -477.9898986816406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -162.53538513183594, "rewards/margins": 156.29937744140625, "rewards/real": -6.2360053062438965, "step": 5830 }, { "epoch": 1.87, "learning_rate": 2.0949389593457388e-07, "logits/generated": 8.712141990661621, "logits/real": 7.219460487365723, "logps/generated": -2550.68359375, "logps/real": -484.91973876953125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -177.86204528808594, "rewards/margins": 170.07687377929688, "rewards/real": -7.785178184509277, "step": 5840 }, { "epoch": 1.87, "learning_rate": 2.0890126822330212e-07, "logits/generated": 8.300684928894043, "logits/real": 7.038590908050537, "logps/generated": -2472.04052734375, "logps/real": -494.251220703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -169.96817016601562, "rewards/margins": 162.02548217773438, "rewards/real": -7.942681789398193, "step": 5850 }, { "epoch": 1.88, "learning_rate": 2.0830864051203034e-07, "logits/generated": 9.073741912841797, "logits/real": 7.532196998596191, "logps/generated": -2390.30810546875, "logps/real": -486.24871826171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -157.63858032226562, "rewards/margins": 149.52740478515625, "rewards/real": -8.111166954040527, "step": 5860 }, { "epoch": 1.88, "learning_rate": 2.0771601280075855e-07, "logits/generated": 8.451375961303711, "logits/real": 6.772467136383057, "logps/generated": -2342.857421875, "logps/real": -480.47589111328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -163.57447814941406, "rewards/margins": 156.74050903320312, "rewards/real": -6.833975791931152, "step": 5870 }, { "epoch": 1.88, "learning_rate": 2.0712338508948677e-07, "logits/generated": 9.072281837463379, "logits/real": 7.421594142913818, "logps/generated": -2316.70654296875, "logps/real": -498.35638427734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -157.2632293701172, "rewards/margins": 147.93661499023438, "rewards/real": -9.326593399047852, "step": 5880 }, { "epoch": 1.88, "learning_rate": 2.0653075737821498e-07, "logits/generated": 8.996503829956055, "logits/real": 7.009947776794434, "logps/generated": -2350.185302734375, "logps/real": -478.5960998535156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -158.18609619140625, "rewards/margins": 151.5810089111328, "rewards/real": -6.605088233947754, "step": 5890 }, { "epoch": 1.89, "learning_rate": 2.059381296669432e-07, "logits/generated": 8.120866775512695, "logits/real": 6.561476230621338, "logps/generated": -2373.29248046875, "logps/real": -473.65185546875, "loss": 0.0511, "rewards/accuracies": 1.0, "rewards/generated": -162.34181213378906, "rewards/margins": 154.72328186035156, "rewards/real": -7.6185302734375, "step": 5900 }, { "epoch": 1.89, "learning_rate": 2.0534550195567144e-07, "logits/generated": 8.982381820678711, "logits/real": 7.275885581970215, "logps/generated": -2732.32763671875, "logps/real": -546.2401123046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -198.81259155273438, "rewards/margins": 186.55137634277344, "rewards/real": -12.261204719543457, "step": 5910 }, { "epoch": 1.89, "learning_rate": 2.0475287424439966e-07, "logits/generated": 8.609407424926758, "logits/real": 7.61367130279541, "logps/generated": -2572.026611328125, "logps/real": -558.9483032226562, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -182.45216369628906, "rewards/margins": 167.79185485839844, "rewards/real": -14.660322189331055, "step": 5920 }, { "epoch": 1.9, "learning_rate": 2.0416024653312787e-07, "logits/generated": 8.647912979125977, "logits/real": 8.261164665222168, "logps/generated": -2859.31494140625, "logps/real": -499.607177734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -211.8936004638672, "rewards/margins": 197.9434356689453, "rewards/real": -13.950170516967773, "step": 5930 }, { "epoch": 1.9, "learning_rate": 2.0356761882185611e-07, "logits/generated": 7.972177028656006, "logits/real": 6.481428623199463, "logps/generated": -2579.8212890625, "logps/real": -548.7396240234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -183.79672241210938, "rewards/margins": 169.787109375, "rewards/real": -14.009631156921387, "step": 5940 }, { "epoch": 1.9, "learning_rate": 2.0297499111058433e-07, "logits/generated": 8.738970756530762, "logits/real": 6.167162895202637, "logps/generated": -2575.65478515625, "logps/real": -634.230224609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -182.9365692138672, "rewards/margins": 167.52041625976562, "rewards/real": -15.41615104675293, "step": 5950 }, { "epoch": 1.91, "learning_rate": 2.0238236339931255e-07, "logits/generated": 8.926651954650879, "logits/real": 7.478346347808838, "logps/generated": -3152.360107421875, "logps/real": -506.2801208496094, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -237.7240753173828, "rewards/margins": 226.5109100341797, "rewards/real": -11.213167190551758, "step": 5960 }, { "epoch": 1.91, "learning_rate": 2.0178973568804076e-07, "logits/generated": 9.060684204101562, "logits/real": 6.677983283996582, "logps/generated": -2609.33984375, "logps/real": -541.929443359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -189.33409118652344, "rewards/margins": 179.33554077148438, "rewards/real": -9.998517990112305, "step": 5970 }, { "epoch": 1.91, "learning_rate": 2.0119710797676898e-07, "logits/generated": 7.342380523681641, "logits/real": 7.358287811279297, "logps/generated": -2700.94091796875, "logps/real": -508.08441162109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -199.0122528076172, "rewards/margins": 183.59230041503906, "rewards/real": -15.419926643371582, "step": 5980 }, { "epoch": 1.92, "learning_rate": 2.006044802654972e-07, "logits/generated": 7.682666778564453, "logits/real": 7.889317512512207, "logps/generated": -2820.789794921875, "logps/real": -498.14788818359375, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/generated": -203.1293487548828, "rewards/margins": 189.3075714111328, "rewards/real": -13.821795463562012, "step": 5990 }, { "epoch": 1.92, "learning_rate": 2.0001185255422543e-07, "logits/generated": 9.086576461791992, "logits/real": 7.325014591217041, "logps/generated": -2598.021728515625, "logps/real": -512.8424682617188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -185.49595642089844, "rewards/margins": 173.144287109375, "rewards/real": -12.351678848266602, "step": 6000 }, { "epoch": 1.92, "learning_rate": 1.9941922484295365e-07, "logits/generated": 8.543691635131836, "logits/real": 8.154191970825195, "logps/generated": -2732.15478515625, "logps/real": -468.396484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -198.18116760253906, "rewards/margins": 188.1605682373047, "rewards/real": -10.020601272583008, "step": 6010 }, { "epoch": 1.93, "learning_rate": 1.9882659713168186e-07, "logits/generated": 9.159213066101074, "logits/real": 7.334768772125244, "logps/generated": -2416.314208984375, "logps/real": -531.3222045898438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -173.5672607421875, "rewards/margins": 163.15481567382812, "rewards/real": -10.412424087524414, "step": 6020 }, { "epoch": 1.93, "learning_rate": 1.982339694204101e-07, "logits/generated": 8.10887336730957, "logits/real": 6.892736911773682, "logps/generated": -2950.716796875, "logps/real": -556.5784912109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -212.7201690673828, "rewards/margins": 199.91738891601562, "rewards/real": -12.802772521972656, "step": 6030 }, { "epoch": 1.93, "learning_rate": 1.9764134170913832e-07, "logits/generated": 7.66934061050415, "logits/real": 6.839064121246338, "logps/generated": -2707.306396484375, "logps/real": -535.5786743164062, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -195.0601348876953, "rewards/margins": 181.37237548828125, "rewards/real": -13.687767028808594, "step": 6040 }, { "epoch": 1.94, "learning_rate": 1.9704871399786654e-07, "logits/generated": 8.843975067138672, "logits/real": 8.697199821472168, "logps/generated": -3151.53662109375, "logps/real": -493.04803466796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -231.45956420898438, "rewards/margins": 219.19271850585938, "rewards/real": -12.266801834106445, "step": 6050 }, { "epoch": 1.94, "learning_rate": 1.9645608628659475e-07, "logits/generated": 8.91144847869873, "logits/real": 7.452356815338135, "logps/generated": -2820.457275390625, "logps/real": -567.0254516601562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -200.2904052734375, "rewards/margins": 186.19561767578125, "rewards/real": -14.09478759765625, "step": 6060 }, { "epoch": 1.94, "learning_rate": 1.9586345857532297e-07, "logits/generated": 9.620126724243164, "logits/real": 8.269552230834961, "logps/generated": -2840.753173828125, "logps/real": -535.985595703125, "loss": 0.0052, "rewards/accuracies": 0.987500011920929, "rewards/generated": -205.6498565673828, "rewards/margins": 194.3441162109375, "rewards/real": -11.305753707885742, "step": 6070 }, { "epoch": 1.95, "learning_rate": 1.9527083086405118e-07, "logits/generated": 7.677509307861328, "logits/real": 8.028158187866211, "logps/generated": -2888.787353515625, "logps/real": -459.373779296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -211.05575561523438, "rewards/margins": 201.81626892089844, "rewards/real": -9.239497184753418, "step": 6080 }, { "epoch": 1.95, "learning_rate": 1.9467820315277943e-07, "logits/generated": 9.13703727722168, "logits/real": 8.040555953979492, "logps/generated": -2472.905029296875, "logps/real": -520.55712890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -174.52317810058594, "rewards/margins": 163.8053436279297, "rewards/real": -10.717818260192871, "step": 6090 }, { "epoch": 1.95, "learning_rate": 1.9408557544150764e-07, "logits/generated": 7.702120780944824, "logits/real": 7.51211404800415, "logps/generated": -2921.27099609375, "logps/real": -454.24737548828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -216.72677612304688, "rewards/margins": 208.5781707763672, "rewards/real": -8.148611068725586, "step": 6100 }, { "epoch": 1.96, "learning_rate": 1.9349294773023586e-07, "logits/generated": 8.139269828796387, "logits/real": 7.147430419921875, "logps/generated": -2678.47412109375, "logps/real": -507.9501037597656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -191.8583526611328, "rewards/margins": 182.26507568359375, "rewards/real": -9.593276023864746, "step": 6110 }, { "epoch": 1.96, "learning_rate": 1.929003200189641e-07, "logits/generated": 8.51573657989502, "logits/real": 6.0417094230651855, "logps/generated": -2321.69384765625, "logps/real": -576.3086547851562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -163.68356323242188, "rewards/margins": 153.70858764648438, "rewards/real": -9.97500228881836, "step": 6120 }, { "epoch": 1.96, "learning_rate": 1.9230769230769231e-07, "logits/generated": 7.271491050720215, "logits/real": 7.62921142578125, "logps/generated": -2750.7900390625, "logps/real": -453.669189453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -196.0287628173828, "rewards/margins": 188.6156768798828, "rewards/real": -7.413069725036621, "step": 6130 }, { "epoch": 1.96, "learning_rate": 1.9171506459642053e-07, "logits/generated": 9.015237808227539, "logits/real": 7.175163269042969, "logps/generated": -2282.815185546875, "logps/real": -471.3609924316406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -158.94361877441406, "rewards/margins": 150.9917755126953, "rewards/real": -7.951813697814941, "step": 6140 }, { "epoch": 1.97, "learning_rate": 1.9112243688514872e-07, "logits/generated": 9.255011558532715, "logits/real": 7.64856481552124, "logps/generated": -2489.502197265625, "logps/real": -519.0916137695312, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/generated": -178.13418579101562, "rewards/margins": 167.83311462402344, "rewards/real": -10.301029205322266, "step": 6150 }, { "epoch": 1.97, "learning_rate": 1.9052980917387696e-07, "logits/generated": 9.282572746276855, "logits/real": 7.990833282470703, "logps/generated": -2885.37841796875, "logps/real": -528.9043579101562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -219.00228881835938, "rewards/margins": 207.20150756835938, "rewards/real": -11.80078411102295, "step": 6160 }, { "epoch": 1.97, "learning_rate": 1.8993718146260517e-07, "logits/generated": 9.021588325500488, "logits/real": 7.467851161956787, "logps/generated": -2954.815673828125, "logps/real": -514.7556762695312, "loss": 0.0143, "rewards/accuracies": 1.0, "rewards/generated": -222.02633666992188, "rewards/margins": 211.798828125, "rewards/real": -10.227510452270508, "step": 6170 }, { "epoch": 1.98, "learning_rate": 1.893445537513334e-07, "logits/generated": 8.835906982421875, "logits/real": 7.900371551513672, "logps/generated": -3421.32958984375, "logps/real": -517.0118408203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -268.62115478515625, "rewards/margins": 256.0744323730469, "rewards/real": -12.546710014343262, "step": 6180 }, { "epoch": 1.98, "learning_rate": 1.8875192604006163e-07, "logits/generated": 9.172682762145996, "logits/real": 7.795259952545166, "logps/generated": -3212.272705078125, "logps/real": -577.6221313476562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -249.37362670898438, "rewards/margins": 235.3374481201172, "rewards/real": -14.036130905151367, "step": 6190 }, { "epoch": 1.98, "learning_rate": 1.8815929832878985e-07, "logits/generated": 8.65040397644043, "logits/real": 7.068799018859863, "logps/generated": -3485.75439453125, "logps/real": -545.9566040039062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -265.7947082519531, "rewards/margins": 253.57479858398438, "rewards/real": -12.219874382019043, "step": 6200 }, { "epoch": 1.99, "learning_rate": 1.8756667061751806e-07, "logits/generated": 9.270819664001465, "logits/real": 7.318133354187012, "logps/generated": -3437.382080078125, "logps/real": -509.5670471191406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -273.59649658203125, "rewards/margins": 264.04071044921875, "rewards/real": -9.555835723876953, "step": 6210 }, { "epoch": 1.99, "learning_rate": 1.869740429062463e-07, "logits/generated": 8.606060981750488, "logits/real": 7.670060634613037, "logps/generated": -3951.55859375, "logps/real": -601.5804443359375, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/generated": -322.25457763671875, "rewards/margins": 302.784912109375, "rewards/real": -19.46962547302246, "step": 6220 }, { "epoch": 1.99, "learning_rate": 1.863814151949745e-07, "logits/generated": 8.701605796813965, "logits/real": 6.927587985992432, "logps/generated": -4252.7783203125, "logps/real": -576.2169189453125, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/generated": -347.07086181640625, "rewards/margins": 330.9310302734375, "rewards/real": -16.139759063720703, "step": 6230 }, { "epoch": 2.0, "learning_rate": 1.857887874837027e-07, "logits/generated": 8.708304405212402, "logits/real": 7.464688777923584, "logps/generated": -3602.87451171875, "logps/real": -496.2691345214844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -282.6443786621094, "rewards/margins": 271.4021301269531, "rewards/real": -11.2423095703125, "step": 6240 }, { "epoch": 2.0, "learning_rate": 1.8519615977243095e-07, "logits/generated": 8.798139572143555, "logits/real": 6.710761070251465, "logps/generated": -3896.438232421875, "logps/real": -550.7965698242188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -311.7084655761719, "rewards/margins": 300.1250305175781, "rewards/real": -11.583441734313965, "step": 6250 }, { "epoch": 2.0, "learning_rate": 1.8460353206115917e-07, "logits/generated": 9.088658332824707, "logits/real": 7.351723670959473, "logps/generated": -3434.943359375, "logps/real": -523.2825927734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -264.31011962890625, "rewards/margins": 251.1939697265625, "rewards/real": -13.116140365600586, "step": 6260 }, { "epoch": 2.01, "learning_rate": 1.8401090434988738e-07, "logits/generated": 9.453213691711426, "logits/real": 7.244253635406494, "logps/generated": -3534.56787109375, "logps/real": -575.6241455078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -274.08758544921875, "rewards/margins": 258.873046875, "rewards/real": -15.214546203613281, "step": 6270 }, { "epoch": 2.01, "learning_rate": 1.8341827663861562e-07, "logits/generated": 9.047871589660645, "logits/real": 7.265507698059082, "logps/generated": -3603.86279296875, "logps/real": -503.4139099121094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -281.1978454589844, "rewards/margins": 269.614990234375, "rewards/real": -11.582871437072754, "step": 6280 }, { "epoch": 2.01, "learning_rate": 1.8282564892734384e-07, "logits/generated": 8.940240859985352, "logits/real": 7.480827331542969, "logps/generated": -3432.19287109375, "logps/real": -569.4034423828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -268.1244812011719, "rewards/margins": 254.306396484375, "rewards/real": -13.818075180053711, "step": 6290 }, { "epoch": 2.02, "learning_rate": 1.8223302121607205e-07, "logits/generated": 8.655801773071289, "logits/real": 6.478055000305176, "logps/generated": -3162.76123046875, "logps/real": -531.5521850585938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -240.61331176757812, "rewards/margins": 229.14599609375, "rewards/real": -11.467347145080566, "step": 6300 }, { "epoch": 2.02, "learning_rate": 1.816403935048003e-07, "logits/generated": 8.979247093200684, "logits/real": 7.141709804534912, "logps/generated": -3595.20654296875, "logps/real": -521.9537353515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -284.3468933105469, "rewards/margins": 274.21673583984375, "rewards/real": -10.130109786987305, "step": 6310 }, { "epoch": 2.02, "learning_rate": 1.8104776579352849e-07, "logits/generated": 7.87072229385376, "logits/real": 6.463742733001709, "logps/generated": -3213.969970703125, "logps/real": -517.1642456054688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -245.9059600830078, "rewards/margins": 235.7017364501953, "rewards/real": -10.204239845275879, "step": 6320 }, { "epoch": 2.03, "learning_rate": 1.804551380822567e-07, "logits/generated": 9.780380249023438, "logits/real": 7.4917311668396, "logps/generated": -3782.021484375, "logps/real": -564.3033447265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -297.1872253417969, "rewards/margins": 284.2447814941406, "rewards/real": -12.942486763000488, "step": 6330 }, { "epoch": 2.03, "learning_rate": 1.7986251037098494e-07, "logits/generated": 8.989027976989746, "logits/real": 7.28525447845459, "logps/generated": -3551.6953125, "logps/real": -528.2459106445312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -278.18597412109375, "rewards/margins": 266.808837890625, "rewards/real": -11.377119064331055, "step": 6340 }, { "epoch": 2.03, "learning_rate": 1.7926988265971316e-07, "logits/generated": 8.610428810119629, "logits/real": 6.960946083068848, "logps/generated": -3281.053466796875, "logps/real": -517.7056274414062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -250.056640625, "rewards/margins": 238.2983856201172, "rewards/real": -11.758244514465332, "step": 6350 }, { "epoch": 2.04, "learning_rate": 1.7867725494844137e-07, "logits/generated": 8.937821388244629, "logits/real": 6.67473840713501, "logps/generated": -3511.594970703125, "logps/real": -526.5020141601562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -270.14837646484375, "rewards/margins": 259.5404357910156, "rewards/real": -10.607945442199707, "step": 6360 }, { "epoch": 2.04, "learning_rate": 1.7808462723716962e-07, "logits/generated": 9.491483688354492, "logits/real": 7.22829532623291, "logps/generated": -3353.389892578125, "logps/real": -534.6859130859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -255.666748046875, "rewards/margins": 243.8340301513672, "rewards/real": -11.832707405090332, "step": 6370 }, { "epoch": 2.04, "learning_rate": 1.7749199952589783e-07, "logits/generated": 7.953823089599609, "logits/real": 7.250722408294678, "logps/generated": -3541.36962890625, "logps/real": -514.2299194335938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -275.8347473144531, "rewards/margins": 263.5892639160156, "rewards/real": -12.245488166809082, "step": 6380 }, { "epoch": 2.04, "learning_rate": 1.7689937181462605e-07, "logits/generated": 8.545463562011719, "logits/real": 7.698030948638916, "logps/generated": -3749.07421875, "logps/real": -512.2899169921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -297.1908874511719, "rewards/margins": 284.71148681640625, "rewards/real": -12.479406356811523, "step": 6390 }, { "epoch": 2.05, "learning_rate": 1.763067441033543e-07, "logits/generated": 9.309557914733887, "logits/real": 6.4228997230529785, "logps/generated": -3100.44580078125, "logps/real": -551.9783935546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -237.00918579101562, "rewards/margins": 225.79638671875, "rewards/real": -11.212778091430664, "step": 6400 }, { "epoch": 2.05, "learning_rate": 1.7571411639208248e-07, "logits/generated": 8.336507797241211, "logits/real": 6.748232841491699, "logps/generated": -3364.119873046875, "logps/real": -497.05279541015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -263.2693786621094, "rewards/margins": 252.76748657226562, "rewards/real": -10.501882553100586, "step": 6410 }, { "epoch": 2.05, "learning_rate": 1.751214886808107e-07, "logits/generated": 8.724721908569336, "logits/real": 7.239665985107422, "logps/generated": -3356.18212890625, "logps/real": -446.7139587402344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -265.2153015136719, "rewards/margins": 256.7137145996094, "rewards/real": -8.501619338989258, "step": 6420 }, { "epoch": 2.06, "learning_rate": 1.7452886096953893e-07, "logits/generated": 9.057985305786133, "logits/real": 7.4846086502075195, "logps/generated": -3530.210205078125, "logps/real": -531.7076416015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -274.9366149902344, "rewards/margins": 261.56097412109375, "rewards/real": -13.3756685256958, "step": 6430 }, { "epoch": 2.06, "learning_rate": 1.7393623325826715e-07, "logits/generated": 8.541022300720215, "logits/real": 6.361926555633545, "logps/generated": -2893.63037109375, "logps/real": -498.4928283691406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -217.1132354736328, "rewards/margins": 208.45632934570312, "rewards/real": -8.656878471374512, "step": 6440 }, { "epoch": 2.06, "learning_rate": 1.7334360554699537e-07, "logits/generated": 7.9224653244018555, "logits/real": 7.107260227203369, "logps/generated": -3318.268310546875, "logps/real": -504.87890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -256.4622497558594, "rewards/margins": 246.14151000976562, "rewards/real": -10.320732116699219, "step": 6450 }, { "epoch": 2.07, "learning_rate": 1.7275097783572358e-07, "logits/generated": 8.26883602142334, "logits/real": 6.955046653747559, "logps/generated": -3216.326904296875, "logps/real": -512.1249389648438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -247.9410400390625, "rewards/margins": 237.6244659423828, "rewards/real": -10.316598892211914, "step": 6460 }, { "epoch": 2.07, "learning_rate": 1.7215835012445182e-07, "logits/generated": 8.85544490814209, "logits/real": 7.392551422119141, "logps/generated": -3742.67919921875, "logps/real": -520.4263916015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -297.8428955078125, "rewards/margins": 284.34857177734375, "rewards/real": -13.494348526000977, "step": 6470 }, { "epoch": 2.07, "learning_rate": 1.7156572241318004e-07, "logits/generated": 8.579545021057129, "logits/real": 7.4799675941467285, "logps/generated": -3374.89599609375, "logps/real": -518.1550903320312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -259.715576171875, "rewards/margins": 249.54556274414062, "rewards/real": -10.170038223266602, "step": 6480 }, { "epoch": 2.08, "learning_rate": 1.7097309470190825e-07, "logits/generated": 8.086904525756836, "logits/real": 6.762541770935059, "logps/generated": -3298.514892578125, "logps/real": -513.2745361328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -260.8355407714844, "rewards/margins": 250.43515014648438, "rewards/real": -10.40037727355957, "step": 6490 }, { "epoch": 2.08, "learning_rate": 1.7038046699063647e-07, "logits/generated": 8.902335166931152, "logits/real": 7.415275573730469, "logps/generated": -3789.276611328125, "logps/real": -519.8098754882812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -298.0629577636719, "rewards/margins": 285.11212158203125, "rewards/real": -12.950851440429688, "step": 6500 }, { "epoch": 2.08, "learning_rate": 1.6978783927936468e-07, "logits/generated": 9.022926330566406, "logits/real": 7.079081058502197, "logps/generated": -3237.22509765625, "logps/real": -532.1895751953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -250.9137725830078, "rewards/margins": 238.81600952148438, "rewards/real": -12.097719192504883, "step": 6510 }, { "epoch": 2.09, "learning_rate": 1.691952115680929e-07, "logits/generated": 7.929329872131348, "logits/real": 6.843851566314697, "logps/generated": -3960.30517578125, "logps/real": -495.5287170410156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -316.6612548828125, "rewards/margins": 307.29443359375, "rewards/real": -9.366758346557617, "step": 6520 }, { "epoch": 2.09, "learning_rate": 1.6860258385682114e-07, "logits/generated": 8.761899948120117, "logits/real": 6.82781982421875, "logps/generated": -3310.32275390625, "logps/real": -505.434326171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -255.945068359375, "rewards/margins": 245.0864715576172, "rewards/real": -10.85859489440918, "step": 6530 }, { "epoch": 2.09, "learning_rate": 1.6800995614554936e-07, "logits/generated": 8.119993209838867, "logits/real": 6.955175876617432, "logps/generated": -3321.866455078125, "logps/real": -498.65045166015625, "loss": 0.0159, "rewards/accuracies": 0.987500011920929, "rewards/generated": -252.197265625, "rewards/margins": 241.81546020507812, "rewards/real": -10.381818771362305, "step": 6540 }, { "epoch": 2.1, "learning_rate": 1.6741732843427757e-07, "logits/generated": 8.6936674118042, "logits/real": 6.582942962646484, "logps/generated": -2872.398193359375, "logps/real": -470.8648376464844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -216.3501739501953, "rewards/margins": 209.2934112548828, "rewards/real": -7.056779384613037, "step": 6550 }, { "epoch": 2.1, "learning_rate": 1.6682470072300581e-07, "logits/generated": 7.7927961349487305, "logits/real": 7.028050422668457, "logps/generated": -3230.473876953125, "logps/real": -502.80322265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -245.8509521484375, "rewards/margins": 234.7498321533203, "rewards/real": -11.101089477539062, "step": 6560 }, { "epoch": 2.1, "learning_rate": 1.6623207301173403e-07, "logits/generated": 8.20081901550293, "logits/real": 6.232621192932129, "logps/generated": -2883.102783203125, "logps/real": -452.2522888183594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -215.45217895507812, "rewards/margins": 210.1968536376953, "rewards/real": -5.255283355712891, "step": 6570 }, { "epoch": 2.11, "learning_rate": 1.6563944530046224e-07, "logits/generated": 7.8652215003967285, "logits/real": 7.219689846038818, "logps/generated": -3625.671142578125, "logps/real": -499.97344970703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -276.94384765625, "rewards/margins": 267.03985595703125, "rewards/real": -9.904024124145508, "step": 6580 }, { "epoch": 2.11, "learning_rate": 1.6504681758919046e-07, "logits/generated": 8.586295127868652, "logits/real": 6.303733825683594, "logps/generated": -2732.955322265625, "logps/real": -552.63623046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -202.24172973632812, "rewards/margins": 191.96327209472656, "rewards/real": -10.27847671508789, "step": 6590 }, { "epoch": 2.11, "learning_rate": 1.6445418987791868e-07, "logits/generated": 7.9905266761779785, "logits/real": 6.504391670227051, "logps/generated": -3210.52490234375, "logps/real": -514.85888671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -243.1439971923828, "rewards/margins": 233.38760375976562, "rewards/real": -9.756407737731934, "step": 6600 }, { "epoch": 2.12, "learning_rate": 1.638615621666469e-07, "logits/generated": 8.011930465698242, "logits/real": 6.788858890533447, "logps/generated": -3836.227294921875, "logps/real": -457.2505798339844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -301.9410095214844, "rewards/margins": 295.438720703125, "rewards/real": -6.502276420593262, "step": 6610 }, { "epoch": 2.12, "learning_rate": 1.6326893445537513e-07, "logits/generated": 7.896731376647949, "logits/real": 6.2884345054626465, "logps/generated": -2726.336669921875, "logps/real": -512.4219970703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -202.65829467773438, "rewards/margins": 193.80172729492188, "rewards/real": -8.856563568115234, "step": 6620 }, { "epoch": 2.12, "learning_rate": 1.6267630674410335e-07, "logits/generated": 7.1974053382873535, "logits/real": 6.514739990234375, "logps/generated": -2927.04541015625, "logps/real": -524.9796142578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -214.642822265625, "rewards/margins": 202.9619903564453, "rewards/real": -11.6808443069458, "step": 6630 }, { "epoch": 2.12, "learning_rate": 1.6208367903283156e-07, "logits/generated": 8.286745071411133, "logits/real": 6.614323616027832, "logps/generated": -3069.070068359375, "logps/real": -505.1914978027344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -232.01416015625, "rewards/margins": 221.65335083007812, "rewards/real": -10.360797882080078, "step": 6640 }, { "epoch": 2.13, "learning_rate": 1.614910513215598e-07, "logits/generated": 9.476008415222168, "logits/real": 7.458826541900635, "logps/generated": -3249.63916015625, "logps/real": -504.131103515625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -245.968994140625, "rewards/margins": 236.4010467529297, "rewards/real": -9.567926406860352, "step": 6650 }, { "epoch": 2.13, "learning_rate": 1.6089842361028802e-07, "logits/generated": 8.050073623657227, "logits/real": 6.25203800201416, "logps/generated": -3204.10400390625, "logps/real": -484.1669921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -244.3979034423828, "rewards/margins": 236.33792114257812, "rewards/real": -8.060026168823242, "step": 6660 }, { "epoch": 2.13, "learning_rate": 1.6030579589901624e-07, "logits/generated": 8.324923515319824, "logits/real": 6.279478073120117, "logps/generated": -2949.732177734375, "logps/real": -448.829345703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -215.97647094726562, "rewards/margins": 208.87191772460938, "rewards/real": -7.104545593261719, "step": 6670 }, { "epoch": 2.14, "learning_rate": 1.5971316818774445e-07, "logits/generated": 6.9879045486450195, "logits/real": 5.539839267730713, "logps/generated": -3072.359375, "logps/real": -501.189208984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -228.67861938476562, "rewards/margins": 220.2053680419922, "rewards/real": -8.473264694213867, "step": 6680 }, { "epoch": 2.14, "learning_rate": 1.5912054047647267e-07, "logits/generated": 8.722136497497559, "logits/real": 6.435215950012207, "logps/generated": -3174.4970703125, "logps/real": -489.9278259277344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -236.2219696044922, "rewards/margins": 229.3987579345703, "rewards/real": -6.823216438293457, "step": 6690 }, { "epoch": 2.14, "learning_rate": 1.5852791276520088e-07, "logits/generated": 8.487139701843262, "logits/real": 6.012986660003662, "logps/generated": -2824.560302734375, "logps/real": -504.8099670410156, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -206.1438751220703, "rewards/margins": 197.5120391845703, "rewards/real": -8.631848335266113, "step": 6700 }, { "epoch": 2.15, "learning_rate": 1.5793528505392912e-07, "logits/generated": 8.849635124206543, "logits/real": 6.671989440917969, "logps/generated": -3037.92236328125, "logps/real": -543.0914306640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -230.3942108154297, "rewards/margins": 217.1494598388672, "rewards/real": -13.244732856750488, "step": 6710 }, { "epoch": 2.15, "learning_rate": 1.5734265734265734e-07, "logits/generated": 7.045986175537109, "logits/real": 6.005621433258057, "logps/generated": -3125.97705078125, "logps/real": -515.1239624023438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -236.1197509765625, "rewards/margins": 224.2371063232422, "rewards/real": -11.882619857788086, "step": 6720 }, { "epoch": 2.15, "learning_rate": 1.5675002963138556e-07, "logits/generated": 8.139238357543945, "logits/real": 6.531630039215088, "logps/generated": -2919.51171875, "logps/real": -565.8919677734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -216.1737060546875, "rewards/margins": 202.49755859375, "rewards/real": -13.6761474609375, "step": 6730 }, { "epoch": 2.16, "learning_rate": 1.561574019201138e-07, "logits/generated": 8.825748443603516, "logits/real": 7.050539970397949, "logps/generated": -3694.990234375, "logps/real": -535.2900390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -286.7782287597656, "rewards/margins": 274.9042053222656, "rewards/real": -11.874015808105469, "step": 6740 }, { "epoch": 2.16, "learning_rate": 1.55564774208842e-07, "logits/generated": 7.093133449554443, "logits/real": 6.678928375244141, "logps/generated": -3599.341796875, "logps/real": -475.9574279785156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -282.7834777832031, "rewards/margins": 272.8294982910156, "rewards/real": -9.953995704650879, "step": 6750 }, { "epoch": 2.16, "learning_rate": 1.5497214649757023e-07, "logits/generated": 8.537749290466309, "logits/real": 6.642735481262207, "logps/generated": -3347.43212890625, "logps/real": -498.96148681640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -261.14410400390625, "rewards/margins": 248.98806762695312, "rewards/real": -12.156039237976074, "step": 6760 }, { "epoch": 2.17, "learning_rate": 1.5437951878629842e-07, "logits/generated": 8.844841957092285, "logits/real": 6.957926273345947, "logps/generated": -3282.98486328125, "logps/real": -516.2553100585938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -250.47518920898438, "rewards/margins": 240.5557403564453, "rewards/real": -9.919448852539062, "step": 6770 }, { "epoch": 2.17, "learning_rate": 1.5378689107502666e-07, "logits/generated": 8.972710609436035, "logits/real": 6.534879207611084, "logps/generated": -2921.328857421875, "logps/real": -508.1558532714844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -221.67919921875, "rewards/margins": 212.75949096679688, "rewards/real": -8.91970157623291, "step": 6780 }, { "epoch": 2.17, "learning_rate": 1.5319426336375487e-07, "logits/generated": 8.533835411071777, "logits/real": 6.47667932510376, "logps/generated": -3440.529296875, "logps/real": -492.8924255371094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -264.1382751464844, "rewards/margins": 252.6698455810547, "rewards/real": -11.468408584594727, "step": 6790 }, { "epoch": 2.18, "learning_rate": 1.526016356524831e-07, "logits/generated": 8.15134334564209, "logits/real": 6.353520393371582, "logps/generated": -3047.428955078125, "logps/real": -526.0081787109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -228.35330200195312, "rewards/margins": 217.01052856445312, "rewards/real": -11.342748641967773, "step": 6800 }, { "epoch": 2.18, "learning_rate": 1.5200900794121133e-07, "logits/generated": 6.73895788192749, "logits/real": 5.6384100914001465, "logps/generated": -3086.99365234375, "logps/real": -502.20379638671875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/generated": -247.3443145751953, "rewards/margins": 237.15011596679688, "rewards/real": -10.194220542907715, "step": 6810 }, { "epoch": 2.18, "learning_rate": 1.5141638022993955e-07, "logits/generated": 7.38037633895874, "logits/real": 6.842904090881348, "logps/generated": -3694.23486328125, "logps/real": -455.4473571777344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -287.52056884765625, "rewards/margins": 281.06744384765625, "rewards/real": -6.453103065490723, "step": 6820 }, { "epoch": 2.19, "learning_rate": 1.5082375251866776e-07, "logits/generated": 8.233325004577637, "logits/real": 6.0410637855529785, "logps/generated": -3169.161865234375, "logps/real": -508.611572265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -238.38308715820312, "rewards/margins": 231.1945037841797, "rewards/real": -7.1885833740234375, "step": 6830 }, { "epoch": 2.19, "learning_rate": 1.50231124807396e-07, "logits/generated": 7.452976226806641, "logits/real": 6.547481536865234, "logps/generated": -3034.76708984375, "logps/real": -458.2308654785156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -228.6567840576172, "rewards/margins": 220.747314453125, "rewards/real": -7.909487247467041, "step": 6840 }, { "epoch": 2.19, "learning_rate": 1.496384970961242e-07, "logits/generated": 8.054935455322266, "logits/real": 6.371067047119141, "logps/generated": -3391.658935546875, "logps/real": -462.4956970214844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -264.718994140625, "rewards/margins": 256.7153015136719, "rewards/real": -8.003695487976074, "step": 6850 }, { "epoch": 2.2, "learning_rate": 1.490458693848524e-07, "logits/generated": 8.609090805053711, "logits/real": 6.163253307342529, "logps/generated": -2746.60205078125, "logps/real": -534.8907470703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -199.83663940429688, "rewards/margins": 190.72523498535156, "rewards/real": -9.111414909362793, "step": 6860 }, { "epoch": 2.2, "learning_rate": 1.4845324167358065e-07, "logits/generated": 8.60438060760498, "logits/real": 6.821572303771973, "logps/generated": -3099.76904296875, "logps/real": -522.143310546875, "loss": 0.0326, "rewards/accuracies": 0.987500011920929, "rewards/generated": -235.1543426513672, "rewards/margins": 224.4142303466797, "rewards/real": -10.740114212036133, "step": 6870 }, { "epoch": 2.2, "learning_rate": 1.4786061396230887e-07, "logits/generated": 8.680463790893555, "logits/real": 6.93682336807251, "logps/generated": -3042.053466796875, "logps/real": -508.7593688964844, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/generated": -227.0716552734375, "rewards/margins": 217.5121612548828, "rewards/real": -9.55949592590332, "step": 6880 }, { "epoch": 2.2, "learning_rate": 1.4726798625103708e-07, "logits/generated": 8.262113571166992, "logits/real": 6.037432670593262, "logps/generated": -3019.892822265625, "logps/real": -511.603271484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -227.21780395507812, "rewards/margins": 219.6192626953125, "rewards/real": -7.598532199859619, "step": 6890 }, { "epoch": 2.21, "learning_rate": 1.4667535853976532e-07, "logits/generated": 8.15229320526123, "logits/real": 6.869030952453613, "logps/generated": -3266.607177734375, "logps/real": -441.6212463378906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -246.9761505126953, "rewards/margins": 242.05593872070312, "rewards/real": -4.92018985748291, "step": 6900 }, { "epoch": 2.21, "learning_rate": 1.4608273082849354e-07, "logits/generated": 8.923186302185059, "logits/real": 6.955705165863037, "logps/generated": -2866.779296875, "logps/real": -494.61077880859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -217.8163299560547, "rewards/margins": 210.55615234375, "rewards/real": -7.26016902923584, "step": 6910 }, { "epoch": 2.21, "learning_rate": 1.4549010311722175e-07, "logits/generated": 8.818031311035156, "logits/real": 6.868529319763184, "logps/generated": -3115.16455078125, "logps/real": -457.9888610839844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -242.6837921142578, "rewards/margins": 234.36508178710938, "rewards/real": -8.318704605102539, "step": 6920 }, { "epoch": 2.22, "learning_rate": 1.4489747540595e-07, "logits/generated": 8.380151748657227, "logits/real": 6.492759704589844, "logps/generated": -2663.775634765625, "logps/real": -509.0582580566406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -193.53292846679688, "rewards/margins": 185.15147399902344, "rewards/real": -8.381422996520996, "step": 6930 }, { "epoch": 2.22, "learning_rate": 1.4430484769467818e-07, "logits/generated": 8.378347396850586, "logits/real": 6.64984130859375, "logps/generated": -3147.98779296875, "logps/real": -540.5155029296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -235.3885955810547, "rewards/margins": 226.3179168701172, "rewards/real": -9.070671081542969, "step": 6940 }, { "epoch": 2.22, "learning_rate": 1.437122199834064e-07, "logits/generated": 8.826391220092773, "logits/real": 6.846202850341797, "logps/generated": -3313.457763671875, "logps/real": -532.3523559570312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -251.76705932617188, "rewards/margins": 241.6971435546875, "rewards/real": -10.069927215576172, "step": 6950 }, { "epoch": 2.23, "learning_rate": 1.4311959227213464e-07, "logits/generated": 8.973528861999512, "logits/real": 6.881982326507568, "logps/generated": -3429.06640625, "logps/real": -473.8736877441406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -265.77166748046875, "rewards/margins": 258.5041809082031, "rewards/real": -7.267443656921387, "step": 6960 }, { "epoch": 2.23, "learning_rate": 1.4252696456086286e-07, "logits/generated": 8.769198417663574, "logits/real": 6.9018378257751465, "logps/generated": -3349.08984375, "logps/real": -487.83416748046875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -256.9261169433594, "rewards/margins": 247.62295532226562, "rewards/real": -9.303197860717773, "step": 6970 }, { "epoch": 2.23, "learning_rate": 1.4193433684959107e-07, "logits/generated": 8.406700134277344, "logits/real": 7.254457950592041, "logps/generated": -3402.13916015625, "logps/real": -454.93072509765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -265.25970458984375, "rewards/margins": 259.6483459472656, "rewards/real": -5.611349582672119, "step": 6980 }, { "epoch": 2.24, "learning_rate": 1.4134170913831931e-07, "logits/generated": 9.241464614868164, "logits/real": 6.881594181060791, "logps/generated": -3373.100830078125, "logps/real": -507.3643493652344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -258.68670654296875, "rewards/margins": 252.55697631835938, "rewards/real": -6.129730701446533, "step": 6990 }, { "epoch": 2.24, "learning_rate": 1.4074908142704753e-07, "logits/generated": 9.111552238464355, "logits/real": 7.098635196685791, "logps/generated": -3203.89306640625, "logps/real": -452.55194091796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -247.7489013671875, "rewards/margins": 242.0307159423828, "rewards/real": -5.7181878089904785, "step": 7000 }, { "epoch": 2.24, "learning_rate": 1.4015645371577575e-07, "logits/generated": 8.396934509277344, "logits/real": 6.0458502769470215, "logps/generated": -3247.84814453125, "logps/real": -485.9319763183594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -243.8344268798828, "rewards/margins": 234.6044464111328, "rewards/real": -9.229988098144531, "step": 7010 }, { "epoch": 2.25, "learning_rate": 1.39563826004504e-07, "logits/generated": 8.735933303833008, "logits/real": 6.552000999450684, "logps/generated": -3383.5390625, "logps/real": -543.9841918945312, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -262.8901062011719, "rewards/margins": 250.7201690673828, "rewards/real": -12.169952392578125, "step": 7020 }, { "epoch": 2.25, "learning_rate": 1.3897119829323218e-07, "logits/generated": 8.505255699157715, "logits/real": 7.016587734222412, "logps/generated": -3819.838623046875, "logps/real": -487.6444396972656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -310.81805419921875, "rewards/margins": 301.5029602050781, "rewards/real": -9.315073013305664, "step": 7030 }, { "epoch": 2.25, "learning_rate": 1.383785705819604e-07, "logits/generated": 8.423628807067871, "logits/real": 7.505871772766113, "logps/generated": -4059.211669921875, "logps/real": -467.326416015625, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/generated": -323.254150390625, "rewards/margins": 314.37628173828125, "rewards/real": -8.877850532531738, "step": 7040 }, { "epoch": 2.26, "learning_rate": 1.3778594287068863e-07, "logits/generated": 8.615508079528809, "logits/real": 6.36544942855835, "logps/generated": -3176.83203125, "logps/real": -507.17803955078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -236.5415496826172, "rewards/margins": 231.36123657226562, "rewards/real": -5.180331230163574, "step": 7050 }, { "epoch": 2.26, "learning_rate": 1.3719331515941685e-07, "logits/generated": 8.317448616027832, "logits/real": 6.6887946128845215, "logps/generated": -3314.740966796875, "logps/real": -427.5897521972656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -255.1221160888672, "rewards/margins": 250.97695922851562, "rewards/real": -4.145139217376709, "step": 7060 }, { "epoch": 2.26, "learning_rate": 1.3660068744814506e-07, "logits/generated": 9.352903366088867, "logits/real": 7.6759185791015625, "logps/generated": -3882.25927734375, "logps/real": -385.09552001953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -306.3218078613281, "rewards/margins": 303.07958984375, "rewards/real": -3.2422244548797607, "step": 7070 }, { "epoch": 2.27, "learning_rate": 1.3600805973687328e-07, "logits/generated": 8.78382396697998, "logits/real": 7.0406670570373535, "logps/generated": -3306.19580078125, "logps/real": -407.8252258300781, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -254.5303192138672, "rewards/margins": 249.77505493164062, "rewards/real": -4.755265235900879, "step": 7080 }, { "epoch": 2.27, "learning_rate": 1.3541543202560152e-07, "logits/generated": 7.490535736083984, "logits/real": 5.449936389923096, "logps/generated": -2988.26123046875, "logps/real": -489.53656005859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -223.82992553710938, "rewards/margins": 218.34982299804688, "rewards/real": -5.480086326599121, "step": 7090 }, { "epoch": 2.27, "learning_rate": 1.3482280431432974e-07, "logits/generated": 8.167654037475586, "logits/real": 6.341849327087402, "logps/generated": -3206.620849609375, "logps/real": -470.4058532714844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -242.9811553955078, "rewards/margins": 236.8303680419922, "rewards/real": -6.150760173797607, "step": 7100 }, { "epoch": 2.28, "learning_rate": 1.3423017660305795e-07, "logits/generated": 9.043058395385742, "logits/real": 6.698067665100098, "logps/generated": -3044.244873046875, "logps/real": -470.76922607421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -231.871826171875, "rewards/margins": 226.489013671875, "rewards/real": -5.382824897766113, "step": 7110 }, { "epoch": 2.28, "learning_rate": 1.3363754889178617e-07, "logits/generated": 7.741070747375488, "logits/real": 5.986207008361816, "logps/generated": -3287.11328125, "logps/real": -469.1253967285156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -255.7762451171875, "rewards/margins": 248.91635131835938, "rewards/real": -6.859864711761475, "step": 7120 }, { "epoch": 2.28, "learning_rate": 1.3304492118051438e-07, "logits/generated": 8.72269344329834, "logits/real": 6.637537479400635, "logps/generated": -3166.88037109375, "logps/real": -479.25518798828125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -237.02932739257812, "rewards/margins": 231.2690887451172, "rewards/real": -5.760217189788818, "step": 7130 }, { "epoch": 2.28, "learning_rate": 1.324522934692426e-07, "logits/generated": 8.140962600708008, "logits/real": 6.225215435028076, "logps/generated": -3386.83154296875, "logps/real": -443.4869689941406, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -259.06512451171875, "rewards/margins": 254.51748657226562, "rewards/real": -4.5476250648498535, "step": 7140 }, { "epoch": 2.29, "learning_rate": 1.3185966575797084e-07, "logits/generated": 9.293863296508789, "logits/real": 6.415686130523682, "logps/generated": -3294.619873046875, "logps/real": -466.2376403808594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -260.78094482421875, "rewards/margins": 258.16094970703125, "rewards/real": -2.620023012161255, "step": 7150 }, { "epoch": 2.29, "learning_rate": 1.3126703804669906e-07, "logits/generated": 8.657960891723633, "logits/real": 6.652752876281738, "logps/generated": -2739.725830078125, "logps/real": -432.68865966796875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -202.00503540039062, "rewards/margins": 199.0812530517578, "rewards/real": -2.9237618446350098, "step": 7160 }, { "epoch": 2.29, "learning_rate": 1.3067441033542727e-07, "logits/generated": 7.0854291915893555, "logits/real": 5.276641845703125, "logps/generated": -3238.944091796875, "logps/real": -442.7730407714844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -243.7721710205078, "rewards/margins": 240.30892944335938, "rewards/real": -3.4632163047790527, "step": 7170 }, { "epoch": 2.3, "learning_rate": 1.3008178262415551e-07, "logits/generated": 8.631875991821289, "logits/real": 6.348507881164551, "logps/generated": -2581.404541015625, "logps/real": -496.02691650390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -184.64959716796875, "rewards/margins": 178.0803680419922, "rewards/real": -6.569250583648682, "step": 7180 }, { "epoch": 2.3, "learning_rate": 1.2948915491288373e-07, "logits/generated": 6.738485813140869, "logits/real": 5.268484115600586, "logps/generated": -3514.727294921875, "logps/real": -440.744873046875, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -272.2043762207031, "rewards/margins": 268.11370849609375, "rewards/real": -4.090660572052002, "step": 7190 }, { "epoch": 2.3, "learning_rate": 1.2889652720161194e-07, "logits/generated": 8.207395553588867, "logits/real": 6.5558061599731445, "logps/generated": -3093.1455078125, "logps/real": -451.7779235839844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -232.7047119140625, "rewards/margins": 229.0303497314453, "rewards/real": -3.6743712425231934, "step": 7200 }, { "epoch": 2.31, "learning_rate": 1.2830389949034016e-07, "logits/generated": 8.918447494506836, "logits/real": 6.2346720695495605, "logps/generated": -3468.356689453125, "logps/real": -406.62750244140625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -270.0596008300781, "rewards/margins": 268.2888488769531, "rewards/real": -1.770755410194397, "step": 7210 }, { "epoch": 2.31, "learning_rate": 1.2771127177906838e-07, "logits/generated": 8.439786911010742, "logits/real": 5.744556427001953, "logps/generated": -3204.14599609375, "logps/real": -483.17901611328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -242.4368133544922, "rewards/margins": 237.56005859375, "rewards/real": -4.8767476081848145, "step": 7220 }, { "epoch": 2.31, "learning_rate": 1.271186440677966e-07, "logits/generated": 8.601608276367188, "logits/real": 6.897097587585449, "logps/generated": -3541.59228515625, "logps/real": -530.5047607421875, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/generated": -270.2284240722656, "rewards/margins": 260.85369873046875, "rewards/real": -9.374723434448242, "step": 7230 }, { "epoch": 2.32, "learning_rate": 1.2652601635652483e-07, "logits/generated": 8.738018035888672, "logits/real": 6.436424255371094, "logps/generated": -3279.285888671875, "logps/real": -559.18359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -251.47543334960938, "rewards/margins": 240.8763885498047, "rewards/real": -10.59908676147461, "step": 7240 }, { "epoch": 2.32, "learning_rate": 1.2593338864525305e-07, "logits/generated": 8.319341659545898, "logits/real": 5.828103542327881, "logps/generated": -3006.374267578125, "logps/real": -519.8423461914062, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -228.9573211669922, "rewards/margins": 219.62197875976562, "rewards/real": -9.335381507873535, "step": 7250 }, { "epoch": 2.32, "learning_rate": 1.2534076093398126e-07, "logits/generated": 8.734830856323242, "logits/real": 6.9785051345825195, "logps/generated": -3130.96337890625, "logps/real": -490.4901428222656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -240.1027069091797, "rewards/margins": 228.65390014648438, "rewards/real": -11.448785781860352, "step": 7260 }, { "epoch": 2.33, "learning_rate": 1.247481332227095e-07, "logits/generated": 7.888822078704834, "logits/real": 6.749417304992676, "logps/generated": -3655.416015625, "logps/real": -525.7223510742188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -279.3916320800781, "rewards/margins": 268.2656555175781, "rewards/real": -11.125957489013672, "step": 7270 }, { "epoch": 2.33, "learning_rate": 1.241555055114377e-07, "logits/generated": 8.444803237915039, "logits/real": 6.05511999130249, "logps/generated": -3036.83740234375, "logps/real": -523.1268920898438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -228.044677734375, "rewards/margins": 218.98080444335938, "rewards/real": -9.063852310180664, "step": 7280 }, { "epoch": 2.33, "learning_rate": 1.2356287780016594e-07, "logits/generated": 8.031878471374512, "logits/real": 7.035772800445557, "logps/generated": -3368.91064453125, "logps/real": -522.8712158203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -261.19561767578125, "rewards/margins": 249.4193115234375, "rewards/real": -11.776281356811523, "step": 7290 }, { "epoch": 2.34, "learning_rate": 1.2297025008889415e-07, "logits/generated": 8.28609561920166, "logits/real": 6.288962364196777, "logps/generated": -3361.7734375, "logps/real": -539.480712890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -264.5245666503906, "rewards/margins": 253.39013671875, "rewards/real": -11.134428977966309, "step": 7300 }, { "epoch": 2.34, "learning_rate": 1.2237762237762237e-07, "logits/generated": 7.856895446777344, "logits/real": 5.045229911804199, "logps/generated": -2988.12646484375, "logps/real": -521.8264770507812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -225.96408081054688, "rewards/margins": 217.01510620117188, "rewards/real": -8.948990821838379, "step": 7310 }, { "epoch": 2.34, "learning_rate": 1.2178499466635058e-07, "logits/generated": 8.300315856933594, "logits/real": 6.873198509216309, "logps/generated": -3046.8232421875, "logps/real": -505.1780700683594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -233.4577178955078, "rewards/margins": 223.3386993408203, "rewards/real": -10.11899471282959, "step": 7320 }, { "epoch": 2.35, "learning_rate": 1.2119236695507882e-07, "logits/generated": 6.966501712799072, "logits/real": 5.874166965484619, "logps/generated": -3359.65966796875, "logps/real": -470.18817138671875, "loss": 0.0301, "rewards/accuracies": 1.0, "rewards/generated": -259.39019775390625, "rewards/margins": 250.69100952148438, "rewards/real": -8.699178695678711, "step": 7330 }, { "epoch": 2.35, "learning_rate": 1.2059973924380704e-07, "logits/generated": 8.509557723999023, "logits/real": 6.391755104064941, "logps/generated": -3162.77490234375, "logps/real": -503.335693359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -245.4657745361328, "rewards/margins": 236.8259735107422, "rewards/real": -8.639801025390625, "step": 7340 }, { "epoch": 2.35, "learning_rate": 1.2000711153253525e-07, "logits/generated": 8.237863540649414, "logits/real": 5.539827823638916, "logps/generated": -2839.787353515625, "logps/real": -530.6296997070312, "loss": 0.0114, "rewards/accuracies": 0.987500011920929, "rewards/generated": -211.0758819580078, "rewards/margins": 199.57498168945312, "rewards/real": -11.500879287719727, "step": 7350 }, { "epoch": 2.36, "learning_rate": 1.1941448382126347e-07, "logits/generated": 8.909086227416992, "logits/real": 6.881753444671631, "logps/generated": -3122.566650390625, "logps/real": -471.3475646972656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -238.6673583984375, "rewards/margins": 230.98965454101562, "rewards/real": -7.677725315093994, "step": 7360 }, { "epoch": 2.36, "learning_rate": 1.188218561099917e-07, "logits/generated": 8.347406387329102, "logits/real": 5.757805347442627, "logps/generated": -2856.157958984375, "logps/real": -506.03338623046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -210.9342498779297, "rewards/margins": 201.84642028808594, "rewards/real": -9.087831497192383, "step": 7370 }, { "epoch": 2.36, "learning_rate": 1.1822922839871991e-07, "logits/generated": 8.802051544189453, "logits/real": 6.685844421386719, "logps/generated": -2690.776611328125, "logps/real": -485.890380859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -197.5401153564453, "rewards/margins": 187.3584747314453, "rewards/real": -10.181621551513672, "step": 7380 }, { "epoch": 2.36, "learning_rate": 1.1763660068744814e-07, "logits/generated": 8.245077133178711, "logits/real": 6.461634159088135, "logps/generated": -2959.48095703125, "logps/real": -492.4681091308594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -220.91873168945312, "rewards/margins": 209.2865447998047, "rewards/real": -11.632192611694336, "step": 7390 }, { "epoch": 2.37, "learning_rate": 1.1704397297617637e-07, "logits/generated": 7.972049713134766, "logits/real": 6.514813423156738, "logps/generated": -3003.560791015625, "logps/real": -558.0601806640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -225.2334442138672, "rewards/margins": 214.35977172851562, "rewards/real": -10.873666763305664, "step": 7400 }, { "epoch": 2.37, "learning_rate": 1.1645134526490457e-07, "logits/generated": 8.545842170715332, "logits/real": 6.854429721832275, "logps/generated": -2970.538818359375, "logps/real": -547.0460205078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -219.17715454101562, "rewards/margins": 207.31192016601562, "rewards/real": -11.86522102355957, "step": 7410 }, { "epoch": 2.37, "learning_rate": 1.158587175536328e-07, "logits/generated": 8.99134349822998, "logits/real": 6.58907413482666, "logps/generated": -3038.564697265625, "logps/real": -552.1268920898438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -227.2650909423828, "rewards/margins": 216.3042449951172, "rewards/real": -10.960864067077637, "step": 7420 }, { "epoch": 2.38, "learning_rate": 1.1526608984236103e-07, "logits/generated": 8.350300788879395, "logits/real": 6.459983825683594, "logps/generated": -3248.830810546875, "logps/real": -489.6896057128906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -247.15890502929688, "rewards/margins": 239.06338500976562, "rewards/real": -8.095573425292969, "step": 7430 }, { "epoch": 2.38, "learning_rate": 1.1467346213108925e-07, "logits/generated": 8.454129219055176, "logits/real": 6.041914463043213, "logps/generated": -2700.3955078125, "logps/real": -571.2770385742188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -199.3916473388672, "rewards/margins": 188.1833038330078, "rewards/real": -11.208356857299805, "step": 7440 }, { "epoch": 2.38, "learning_rate": 1.1408083441981746e-07, "logits/generated": 7.960211277008057, "logits/real": 6.010439872741699, "logps/generated": -2977.273681640625, "logps/real": -525.8263549804688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -224.74032592773438, "rewards/margins": 212.96878051757812, "rewards/real": -11.771557807922363, "step": 7450 }, { "epoch": 2.39, "learning_rate": 1.1348820670854568e-07, "logits/generated": 8.200027465820312, "logits/real": 6.665806770324707, "logps/generated": -3064.36767578125, "logps/real": -486.09844970703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -228.4701385498047, "rewards/margins": 219.6273193359375, "rewards/real": -8.842813491821289, "step": 7460 }, { "epoch": 2.39, "learning_rate": 1.128955789972739e-07, "logits/generated": 8.364141464233398, "logits/real": 6.64084005355835, "logps/generated": -3091.780517578125, "logps/real": -510.8404846191406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -230.28793334960938, "rewards/margins": 221.954345703125, "rewards/real": -8.333571434020996, "step": 7470 }, { "epoch": 2.39, "learning_rate": 1.1230295128600213e-07, "logits/generated": 8.71517562866211, "logits/real": 7.3082475662231445, "logps/generated": -3157.639892578125, "logps/real": -494.10369873046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -239.4864501953125, "rewards/margins": 227.5179443359375, "rewards/real": -11.968478202819824, "step": 7480 }, { "epoch": 2.4, "learning_rate": 1.1171032357473035e-07, "logits/generated": 8.69986629486084, "logits/real": 6.811720848083496, "logps/generated": -3120.3955078125, "logps/real": -515.9996948242188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -236.11764526367188, "rewards/margins": 223.11318969726562, "rewards/real": -13.00445556640625, "step": 7490 }, { "epoch": 2.4, "learning_rate": 1.1111769586345857e-07, "logits/generated": 8.366250991821289, "logits/real": 6.488030910491943, "logps/generated": -3186.220703125, "logps/real": -512.2991943359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -244.80825805664062, "rewards/margins": 233.73526000976562, "rewards/real": -11.072997093200684, "step": 7500 }, { "epoch": 2.4, "learning_rate": 1.105250681521868e-07, "logits/generated": 8.290487289428711, "logits/real": 6.556336402893066, "logps/generated": -3309.074951171875, "logps/real": -539.4486083984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -251.5416259765625, "rewards/margins": 240.22921752929688, "rewards/real": -11.312393188476562, "step": 7510 }, { "epoch": 2.41, "learning_rate": 1.0993244044091501e-07, "logits/generated": 7.556092739105225, "logits/real": 6.210749626159668, "logps/generated": -3153.72412109375, "logps/real": -448.5066833496094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -238.2395477294922, "rewards/margins": 229.29702758789062, "rewards/real": -8.942509651184082, "step": 7520 }, { "epoch": 2.41, "learning_rate": 1.0933981272964324e-07, "logits/generated": 8.106557846069336, "logits/real": 6.071941375732422, "logps/generated": -3127.99853515625, "logps/real": -527.2283935546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -232.1862335205078, "rewards/margins": 224.18887329101562, "rewards/real": -7.997386932373047, "step": 7530 }, { "epoch": 2.41, "learning_rate": 1.0874718501837145e-07, "logits/generated": 8.648457527160645, "logits/real": 6.627309322357178, "logps/generated": -2996.19775390625, "logps/real": -508.94573974609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -227.7726287841797, "rewards/margins": 218.9408721923828, "rewards/real": -8.831782341003418, "step": 7540 }, { "epoch": 2.42, "learning_rate": 1.0815455730709967e-07, "logits/generated": 7.198336601257324, "logits/real": 6.407703399658203, "logps/generated": -3076.6884765625, "logps/real": -508.71624755859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -232.6094970703125, "rewards/margins": 222.42626953125, "rewards/real": -10.183234214782715, "step": 7550 }, { "epoch": 2.42, "learning_rate": 1.075619295958279e-07, "logits/generated": 8.322542190551758, "logits/real": 7.442639350891113, "logps/generated": -3138.765625, "logps/real": -513.2637939453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -236.7306365966797, "rewards/margins": 226.0619659423828, "rewards/real": -10.66869068145752, "step": 7560 }, { "epoch": 2.42, "learning_rate": 1.0696930188455613e-07, "logits/generated": 7.3330841064453125, "logits/real": 6.53369665145874, "logps/generated": -3417.4921875, "logps/real": -483.42169189453125, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/generated": -260.60186767578125, "rewards/margins": 250.2184295654297, "rewards/real": -10.383401870727539, "step": 7570 }, { "epoch": 2.43, "learning_rate": 1.0637667417328434e-07, "logits/generated": 8.66540813446045, "logits/real": 6.852670192718506, "logps/generated": -3120.0986328125, "logps/real": -525.4196166992188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -237.3667449951172, "rewards/margins": 228.04263305664062, "rewards/real": -9.324110984802246, "step": 7580 }, { "epoch": 2.43, "learning_rate": 1.0578404646201256e-07, "logits/generated": 8.588456153869629, "logits/real": 6.860833168029785, "logps/generated": -3217.048828125, "logps/real": -493.32220458984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -241.5303192138672, "rewards/margins": 233.52383422851562, "rewards/real": -8.006438255310059, "step": 7590 }, { "epoch": 2.43, "learning_rate": 1.0519141875074079e-07, "logits/generated": 7.846449851989746, "logits/real": 6.278639316558838, "logps/generated": -3072.62548828125, "logps/real": -496.69305419921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -232.2223663330078, "rewards/margins": 222.03768920898438, "rewards/real": -10.184678077697754, "step": 7600 }, { "epoch": 2.44, "learning_rate": 1.04598791039469e-07, "logits/generated": 9.045788764953613, "logits/real": 7.030297756195068, "logps/generated": -3398.98291015625, "logps/real": -469.4801330566406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -263.426025390625, "rewards/margins": 256.78546142578125, "rewards/real": -6.640578269958496, "step": 7610 }, { "epoch": 2.44, "learning_rate": 1.0400616332819723e-07, "logits/generated": 7.379385948181152, "logits/real": 5.993938446044922, "logps/generated": -2652.6396484375, "logps/real": -471.1642150878906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -194.46290588378906, "rewards/margins": 188.17604064941406, "rewards/real": -6.286888122558594, "step": 7620 }, { "epoch": 2.44, "learning_rate": 1.0341353561692543e-07, "logits/generated": 8.407931327819824, "logits/real": 7.203877925872803, "logps/generated": -3417.8828125, "logps/real": -502.9291076660156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -263.67401123046875, "rewards/margins": 254.647705078125, "rewards/real": -9.026300430297852, "step": 7630 }, { "epoch": 2.44, "learning_rate": 1.0282090790565366e-07, "logits/generated": 9.146268844604492, "logits/real": 6.94366979598999, "logps/generated": -3075.98681640625, "logps/real": -516.8055419921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -224.0660400390625, "rewards/margins": 217.21109008789062, "rewards/real": -6.854970455169678, "step": 7640 }, { "epoch": 2.45, "learning_rate": 1.0222828019438189e-07, "logits/generated": 8.590862274169922, "logits/real": 5.913880348205566, "logps/generated": -2737.70849609375, "logps/real": -528.1287231445312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -199.1512451171875, "rewards/margins": 189.98355102539062, "rewards/real": -9.167717933654785, "step": 7650 }, { "epoch": 2.45, "learning_rate": 1.016356524831101e-07, "logits/generated": 7.989936828613281, "logits/real": 6.846987247467041, "logps/generated": -3002.65966796875, "logps/real": -423.24920654296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -225.61865234375, "rewards/margins": 218.360595703125, "rewards/real": -7.258036136627197, "step": 7660 }, { "epoch": 2.45, "learning_rate": 1.0104302477183832e-07, "logits/generated": 8.157800674438477, "logits/real": 6.385549545288086, "logps/generated": -2724.33544921875, "logps/real": -476.62677001953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -201.969482421875, "rewards/margins": 195.93917846679688, "rewards/real": -6.030301094055176, "step": 7670 }, { "epoch": 2.46, "learning_rate": 1.0045039706056655e-07, "logits/generated": 8.902392387390137, "logits/real": 6.8903398513793945, "logps/generated": -2666.68359375, "logps/real": -466.9556579589844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -190.82240295410156, "rewards/margins": 185.0307159423828, "rewards/real": -5.791709899902344, "step": 7680 }, { "epoch": 2.46, "learning_rate": 9.985776934929476e-08, "logits/generated": 8.826810836791992, "logits/real": 6.918051719665527, "logps/generated": -3278.642578125, "logps/real": -463.67669677734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -251.1935272216797, "rewards/margins": 244.5741729736328, "rewards/real": -6.619349479675293, "step": 7690 }, { "epoch": 2.46, "learning_rate": 9.926514163802299e-08, "logits/generated": 8.831548690795898, "logits/real": 6.432858467102051, "logps/generated": -2823.852783203125, "logps/real": -505.65728759765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -208.52395629882812, "rewards/margins": 198.29066467285156, "rewards/real": -10.233301162719727, "step": 7700 }, { "epoch": 2.47, "learning_rate": 9.867251392675122e-08, "logits/generated": 8.891983032226562, "logits/real": 7.067227363586426, "logps/generated": -3419.63818359375, "logps/real": -540.4342651367188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -262.2893981933594, "rewards/margins": 252.92373657226562, "rewards/real": -9.365700721740723, "step": 7710 }, { "epoch": 2.47, "learning_rate": 9.807988621547942e-08, "logits/generated": 8.530720710754395, "logits/real": 6.083195686340332, "logps/generated": -3049.927001953125, "logps/real": -482.12371826171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -228.1068572998047, "rewards/margins": 223.59994506835938, "rewards/real": -4.5069074630737305, "step": 7720 }, { "epoch": 2.47, "learning_rate": 9.748725850420765e-08, "logits/generated": 7.608381748199463, "logits/real": 6.290399551391602, "logps/generated": -3043.66259765625, "logps/real": -474.33251953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -228.9565887451172, "rewards/margins": 219.8770294189453, "rewards/real": -9.079549789428711, "step": 7730 }, { "epoch": 2.48, "learning_rate": 9.689463079293588e-08, "logits/generated": 7.3313117027282715, "logits/real": 6.270340442657471, "logps/generated": -2731.7001953125, "logps/real": -474.37103271484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -206.07177734375, "rewards/margins": 198.39649963378906, "rewards/real": -7.675266265869141, "step": 7740 }, { "epoch": 2.48, "learning_rate": 9.63020030816641e-08, "logits/generated": 8.210073471069336, "logits/real": 6.875308990478516, "logps/generated": -3052.072021484375, "logps/real": -507.93548583984375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -228.67483520507812, "rewards/margins": 219.43246459960938, "rewards/real": -9.242362022399902, "step": 7750 }, { "epoch": 2.48, "learning_rate": 9.570937537039231e-08, "logits/generated": 7.494936943054199, "logits/real": 6.656552791595459, "logps/generated": -3074.93701171875, "logps/real": -451.45965576171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -227.959716796875, "rewards/margins": 220.84555053710938, "rewards/real": -7.114162445068359, "step": 7760 }, { "epoch": 2.49, "learning_rate": 9.511674765912053e-08, "logits/generated": 7.773172855377197, "logits/real": 6.899244785308838, "logps/generated": -3074.46533203125, "logps/real": -430.2386169433594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -225.5403289794922, "rewards/margins": 218.5653839111328, "rewards/real": -6.974958896636963, "step": 7770 }, { "epoch": 2.49, "learning_rate": 9.452411994784876e-08, "logits/generated": 8.745676040649414, "logits/real": 7.1577606201171875, "logps/generated": -3227.46826171875, "logps/real": -472.109130859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -250.3969268798828, "rewards/margins": 242.6730499267578, "rewards/real": -7.723855495452881, "step": 7780 }, { "epoch": 2.49, "learning_rate": 9.393149223657698e-08, "logits/generated": 7.699820041656494, "logits/real": 6.185589790344238, "logps/generated": -2883.79052734375, "logps/real": -462.053955078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -212.4676055908203, "rewards/margins": 204.74209594726562, "rewards/real": -7.725516319274902, "step": 7790 }, { "epoch": 2.5, "learning_rate": 9.33388645253052e-08, "logits/generated": 7.670037269592285, "logits/real": 6.324313163757324, "logps/generated": -3058.869873046875, "logps/real": -483.980224609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -228.7894287109375, "rewards/margins": 220.53054809570312, "rewards/real": -8.258888244628906, "step": 7800 }, { "epoch": 2.5, "learning_rate": 9.274623681403342e-08, "logits/generated": 8.345635414123535, "logits/real": 7.0114336013793945, "logps/generated": -3245.26904296875, "logps/real": -460.19580078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -244.41445922851562, "rewards/margins": 236.5243682861328, "rewards/real": -7.890088081359863, "step": 7810 }, { "epoch": 2.5, "learning_rate": 9.215360910276164e-08, "logits/generated": 7.948687553405762, "logits/real": 6.488442897796631, "logps/generated": -2791.9853515625, "logps/real": -529.9259033203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -202.7161407470703, "rewards/margins": 193.24630737304688, "rewards/real": -9.469828605651855, "step": 7820 }, { "epoch": 2.51, "learning_rate": 9.156098139148986e-08, "logits/generated": 8.674848556518555, "logits/real": 6.751415252685547, "logps/generated": -3026.496337890625, "logps/real": -488.12493896484375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -224.2441864013672, "rewards/margins": 214.6639404296875, "rewards/real": -9.580277442932129, "step": 7830 }, { "epoch": 2.51, "learning_rate": 9.096835368021809e-08, "logits/generated": 8.015366554260254, "logits/real": 6.515145778656006, "logps/generated": -2725.67822265625, "logps/real": -484.0481872558594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -199.52987670898438, "rewards/margins": 191.36412048339844, "rewards/real": -8.165761947631836, "step": 7840 }, { "epoch": 2.51, "learning_rate": 9.03757259689463e-08, "logits/generated": 7.64105749130249, "logits/real": 6.039445877075195, "logps/generated": -2905.73388671875, "logps/real": -496.81866455078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -214.1513214111328, "rewards/margins": 205.6875, "rewards/real": -8.463826179504395, "step": 7850 }, { "epoch": 2.52, "learning_rate": 8.978309825767452e-08, "logits/generated": 8.532212257385254, "logits/real": 6.559307098388672, "logps/generated": -3066.919189453125, "logps/real": -453.01513671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -234.78897094726562, "rewards/margins": 227.9938507080078, "rewards/real": -6.795121192932129, "step": 7860 }, { "epoch": 2.52, "learning_rate": 8.919047054640275e-08, "logits/generated": 8.549744606018066, "logits/real": 6.594319820404053, "logps/generated": -3098.24951171875, "logps/real": -495.947998046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -231.82827758789062, "rewards/margins": 224.74755859375, "rewards/real": -7.080715179443359, "step": 7870 }, { "epoch": 2.52, "learning_rate": 8.859784283513098e-08, "logits/generated": 8.632806777954102, "logits/real": 7.073866844177246, "logps/generated": -3101.5869140625, "logps/real": -500.9227600097656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -236.7616424560547, "rewards/margins": 226.14273071289062, "rewards/real": -10.61892032623291, "step": 7880 }, { "epoch": 2.52, "learning_rate": 8.800521512385919e-08, "logits/generated": 8.268487930297852, "logits/real": 6.868411064147949, "logps/generated": -3099.438232421875, "logps/real": -441.53729248046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -229.96975708007812, "rewards/margins": 222.85592651367188, "rewards/real": -7.1137824058532715, "step": 7890 }, { "epoch": 2.53, "learning_rate": 8.741258741258741e-08, "logits/generated": 8.475733757019043, "logits/real": 6.7457780838012695, "logps/generated": -2864.928955078125, "logps/real": -513.4609985351562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -216.50387573242188, "rewards/margins": 206.0469207763672, "rewards/real": -10.456939697265625, "step": 7900 }, { "epoch": 2.53, "learning_rate": 8.681995970131564e-08, "logits/generated": 8.000772476196289, "logits/real": 6.194445610046387, "logps/generated": -2624.559326171875, "logps/real": -482.51446533203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -189.77847290039062, "rewards/margins": 180.35269165039062, "rewards/real": -9.42580509185791, "step": 7910 }, { "epoch": 2.53, "learning_rate": 8.622733199004385e-08, "logits/generated": 8.773655891418457, "logits/real": 6.2353973388671875, "logps/generated": -2840.182373046875, "logps/real": -499.4354553222656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -210.44277954101562, "rewards/margins": 203.42784118652344, "rewards/real": -7.0149431228637695, "step": 7920 }, { "epoch": 2.54, "learning_rate": 8.563470427877208e-08, "logits/generated": 8.499971389770508, "logits/real": 6.817389488220215, "logps/generated": -3008.66064453125, "logps/real": -482.41094970703125, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/generated": -233.84768676757812, "rewards/margins": 226.2111053466797, "rewards/real": -7.636592864990234, "step": 7930 }, { "epoch": 2.54, "learning_rate": 8.504207656750028e-08, "logits/generated": 7.8906354904174805, "logits/real": 6.1556396484375, "logps/generated": -2600.800537109375, "logps/real": -495.549072265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -187.71524047851562, "rewards/margins": 180.59689331054688, "rewards/real": -7.118335723876953, "step": 7940 }, { "epoch": 2.54, "learning_rate": 8.444944885622851e-08, "logits/generated": 7.559821128845215, "logits/real": 7.1993889808654785, "logps/generated": -3027.2060546875, "logps/real": -408.83050537109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -223.34005737304688, "rewards/margins": 220.49716186523438, "rewards/real": -2.842907667160034, "step": 7950 }, { "epoch": 2.55, "learning_rate": 8.385682114495674e-08, "logits/generated": 7.938685417175293, "logits/real": 7.287388801574707, "logps/generated": -2590.703857421875, "logps/real": -458.4735412597656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -183.72933959960938, "rewards/margins": 176.34324645996094, "rewards/real": -7.386090278625488, "step": 7960 }, { "epoch": 2.55, "learning_rate": 8.326419343368495e-08, "logits/generated": 8.193671226501465, "logits/real": 6.7212066650390625, "logps/generated": -2593.3232421875, "logps/real": -458.5462341308594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -186.13685607910156, "rewards/margins": 179.97171020507812, "rewards/real": -6.16514778137207, "step": 7970 }, { "epoch": 2.55, "learning_rate": 8.267156572241317e-08, "logits/generated": 8.131782531738281, "logits/real": 6.256504058837891, "logps/generated": -2737.94140625, "logps/real": -522.2247314453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -199.4661102294922, "rewards/margins": 190.0080108642578, "rewards/real": -9.458121299743652, "step": 7980 }, { "epoch": 2.56, "learning_rate": 8.20789380111414e-08, "logits/generated": 8.670708656311035, "logits/real": 7.335108757019043, "logps/generated": -2945.6826171875, "logps/real": -421.588134765625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -219.63961791992188, "rewards/margins": 214.8706512451172, "rewards/real": -4.7689924240112305, "step": 7990 }, { "epoch": 2.56, "learning_rate": 8.148631029986961e-08, "logits/generated": 8.351374626159668, "logits/real": 6.128232002258301, "logps/generated": -2504.122314453125, "logps/real": -537.0484619140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -178.33494567871094, "rewards/margins": 167.54067993164062, "rewards/real": -10.794281959533691, "step": 8000 }, { "epoch": 2.56, "learning_rate": 8.089368258859784e-08, "logits/generated": 8.668087005615234, "logits/real": 6.344940185546875, "logps/generated": -2691.02978515625, "logps/real": -487.99761962890625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -193.5940399169922, "rewards/margins": 187.6653594970703, "rewards/real": -5.928709983825684, "step": 8010 }, { "epoch": 2.57, "learning_rate": 8.030105487732607e-08, "logits/generated": 8.408792495727539, "logits/real": 6.670283317565918, "logps/generated": -2835.9873046875, "logps/real": -467.03961181640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -212.1270294189453, "rewards/margins": 206.1318359375, "rewards/real": -5.99518346786499, "step": 8020 }, { "epoch": 2.57, "learning_rate": 7.970842716605427e-08, "logits/generated": 8.6127290725708, "logits/real": 6.988703727722168, "logps/generated": -3112.61572265625, "logps/real": -471.856201171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -232.280517578125, "rewards/margins": 225.05819702148438, "rewards/real": -7.222315788269043, "step": 8030 }, { "epoch": 2.57, "learning_rate": 7.91157994547825e-08, "logits/generated": 8.019750595092773, "logits/real": 6.731808662414551, "logps/generated": -2885.22509765625, "logps/real": -419.47882080078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -206.2860870361328, "rewards/margins": 203.50540161132812, "rewards/real": -2.780665397644043, "step": 8040 }, { "epoch": 2.58, "learning_rate": 7.852317174351073e-08, "logits/generated": 7.291733741760254, "logits/real": 5.934380054473877, "logps/generated": -2882.16015625, "logps/real": -482.5450134277344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -212.2113494873047, "rewards/margins": 206.3450469970703, "rewards/real": -5.866301536560059, "step": 8050 }, { "epoch": 2.58, "learning_rate": 7.793054403223895e-08, "logits/generated": 8.186673164367676, "logits/real": 6.381945610046387, "logps/generated": -2336.23486328125, "logps/real": -554.4151611328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -159.95040893554688, "rewards/margins": 151.04306030273438, "rewards/real": -8.907343864440918, "step": 8060 }, { "epoch": 2.58, "learning_rate": 7.733791632096716e-08, "logits/generated": 7.242814064025879, "logits/real": 5.613251686096191, "logps/generated": -2496.26904296875, "logps/real": -480.0599060058594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -178.32337951660156, "rewards/margins": 171.197021484375, "rewards/real": -7.126336574554443, "step": 8070 }, { "epoch": 2.59, "learning_rate": 7.674528860969538e-08, "logits/generated": 7.202728271484375, "logits/real": 7.086617469787598, "logps/generated": -3030.654052734375, "logps/real": -447.0415954589844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -222.2557373046875, "rewards/margins": 214.59091186523438, "rewards/real": -7.664820194244385, "step": 8080 }, { "epoch": 2.59, "learning_rate": 7.61526608984236e-08, "logits/generated": 8.18061637878418, "logits/real": 6.772614479064941, "logps/generated": -2938.49365234375, "logps/real": -427.3243103027344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -219.37203979492188, "rewards/margins": 211.4892578125, "rewards/real": -7.8827691078186035, "step": 8090 }, { "epoch": 2.59, "learning_rate": 7.556003318715183e-08, "logits/generated": 7.658883094787598, "logits/real": 6.239137172698975, "logps/generated": -2842.7802734375, "logps/real": -441.6241760253906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -209.82827758789062, "rewards/margins": 204.9974822998047, "rewards/real": -4.830779552459717, "step": 8100 }, { "epoch": 2.6, "learning_rate": 7.496740547588005e-08, "logits/generated": 8.507543563842773, "logits/real": 6.860140800476074, "logps/generated": -2684.00244140625, "logps/real": -450.237548828125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -194.63888549804688, "rewards/margins": 187.77694702148438, "rewards/real": -6.861931800842285, "step": 8110 }, { "epoch": 2.6, "learning_rate": 7.437477776460826e-08, "logits/generated": 8.28409481048584, "logits/real": 6.793871879577637, "logps/generated": -2975.43017578125, "logps/real": -475.64910888671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -218.27847290039062, "rewards/margins": 210.76644897460938, "rewards/real": -7.512001037597656, "step": 8120 }, { "epoch": 2.6, "learning_rate": 7.37821500533365e-08, "logits/generated": 7.718270778656006, "logits/real": 6.06919002532959, "logps/generated": -2796.29345703125, "logps/real": -491.36773681640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -201.97525024414062, "rewards/margins": 193.45481872558594, "rewards/real": -8.520421028137207, "step": 8130 }, { "epoch": 2.6, "learning_rate": 7.318952234206471e-08, "logits/generated": 8.212821960449219, "logits/real": 6.44082498550415, "logps/generated": -2817.268310546875, "logps/real": -501.8804626464844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -206.9821014404297, "rewards/margins": 198.33004760742188, "rewards/real": -8.652074813842773, "step": 8140 }, { "epoch": 2.61, "learning_rate": 7.259689463079294e-08, "logits/generated": 8.36870288848877, "logits/real": 6.5693039894104, "logps/generated": -2830.0927734375, "logps/real": -514.526123046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -208.09927368164062, "rewards/margins": 198.58323669433594, "rewards/real": -9.516035079956055, "step": 8150 }, { "epoch": 2.61, "learning_rate": 7.200426691952115e-08, "logits/generated": 8.577680587768555, "logits/real": 7.00147008895874, "logps/generated": -2552.5947265625, "logps/real": -494.31134033203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -176.40646362304688, "rewards/margins": 169.28854370117188, "rewards/real": -7.117916107177734, "step": 8160 }, { "epoch": 2.61, "learning_rate": 7.141163920824937e-08, "logits/generated": 9.002822875976562, "logits/real": 7.558182716369629, "logps/generated": -3054.18896484375, "logps/real": -521.36474609375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -230.5747528076172, "rewards/margins": 220.90518188476562, "rewards/real": -9.669589042663574, "step": 8170 }, { "epoch": 2.62, "learning_rate": 7.08190114969776e-08, "logits/generated": 8.595315933227539, "logits/real": 6.56006383895874, "logps/generated": -2626.43017578125, "logps/real": -476.56951904296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -188.16757202148438, "rewards/margins": 181.84848022460938, "rewards/real": -6.319100856781006, "step": 8180 }, { "epoch": 2.62, "learning_rate": 7.022638378570583e-08, "logits/generated": 8.552518844604492, "logits/real": 6.481810569763184, "logps/generated": -2577.979736328125, "logps/real": -508.6376953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -185.40133666992188, "rewards/margins": 179.17532348632812, "rewards/real": -6.226019859313965, "step": 8190 }, { "epoch": 2.62, "learning_rate": 6.963375607443404e-08, "logits/generated": 8.224884986877441, "logits/real": 6.555108547210693, "logps/generated": -2706.04345703125, "logps/real": -479.3734436035156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -193.58982849121094, "rewards/margins": 186.0579376220703, "rewards/real": -7.531900882720947, "step": 8200 }, { "epoch": 2.63, "learning_rate": 6.904112836316226e-08, "logits/generated": 7.602932929992676, "logits/real": 6.416626930236816, "logps/generated": -2461.69580078125, "logps/real": -462.24969482421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -177.93165588378906, "rewards/margins": 170.52548217773438, "rewards/real": -7.40619421005249, "step": 8210 }, { "epoch": 2.63, "learning_rate": 6.844850065189047e-08, "logits/generated": 8.246734619140625, "logits/real": 6.39022159576416, "logps/generated": -2652.571533203125, "logps/real": -470.65863037109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -188.11558532714844, "rewards/margins": 183.194091796875, "rewards/real": -4.921482086181641, "step": 8220 }, { "epoch": 2.63, "learning_rate": 6.78558729406187e-08, "logits/generated": 7.071225643157959, "logits/real": 6.135659694671631, "logps/generated": -2573.64892578125, "logps/real": -415.0548400878906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -184.86944580078125, "rewards/margins": 179.15206909179688, "rewards/real": -5.717393398284912, "step": 8230 }, { "epoch": 2.64, "learning_rate": 6.726324522934693e-08, "logits/generated": 7.598072052001953, "logits/real": 6.732085227966309, "logps/generated": -2893.511474609375, "logps/real": -489.0870056152344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -214.98422241210938, "rewards/margins": 205.0094757080078, "rewards/real": -9.974757194519043, "step": 8240 }, { "epoch": 2.64, "learning_rate": 6.667061751807513e-08, "logits/generated": 7.639240264892578, "logits/real": 6.466312408447266, "logps/generated": -2897.51318359375, "logps/real": -483.0697326660156, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/generated": -217.611572265625, "rewards/margins": 209.23281860351562, "rewards/real": -8.378759384155273, "step": 8250 }, { "epoch": 2.64, "learning_rate": 6.607798980680336e-08, "logits/generated": 8.229767799377441, "logits/real": 7.028386116027832, "logps/generated": -2790.645751953125, "logps/real": -517.77734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -205.6338348388672, "rewards/margins": 195.99588012695312, "rewards/real": -9.63794994354248, "step": 8260 }, { "epoch": 2.65, "learning_rate": 6.548536209553159e-08, "logits/generated": 8.223030090332031, "logits/real": 6.099096775054932, "logps/generated": -2843.166259765625, "logps/real": -528.0230712890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -212.291015625, "rewards/margins": 200.51295471191406, "rewards/real": -11.778053283691406, "step": 8270 }, { "epoch": 2.65, "learning_rate": 6.48927343842598e-08, "logits/generated": 7.658328056335449, "logits/real": 6.354641914367676, "logps/generated": -3140.0693359375, "logps/real": -485.04840087890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -235.8081512451172, "rewards/margins": 226.01113891601562, "rewards/real": -9.797000885009766, "step": 8280 }, { "epoch": 2.65, "learning_rate": 6.430010667298802e-08, "logits/generated": 8.180818557739258, "logits/real": 7.045019626617432, "logps/generated": -2764.741943359375, "logps/real": -501.38427734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -204.32656860351562, "rewards/margins": 195.32008361816406, "rewards/real": -9.006494522094727, "step": 8290 }, { "epoch": 2.66, "learning_rate": 6.370747896171625e-08, "logits/generated": 8.057973861694336, "logits/real": 6.8830976486206055, "logps/generated": -2730.14990234375, "logps/real": -479.64617919921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -201.2572479248047, "rewards/margins": 190.1494598388672, "rewards/real": -11.107784271240234, "step": 8300 }, { "epoch": 2.66, "learning_rate": 6.311485125044446e-08, "logits/generated": 8.547441482543945, "logits/real": 7.09319543838501, "logps/generated": -2746.68408203125, "logps/real": -483.42803955078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -203.1092071533203, "rewards/margins": 191.0872344970703, "rewards/real": -12.021976470947266, "step": 8310 }, { "epoch": 2.66, "learning_rate": 6.252222353917269e-08, "logits/generated": 7.857272148132324, "logits/real": 5.963496208190918, "logps/generated": -2629.42626953125, "logps/real": -567.9591674804688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -186.41082763671875, "rewards/margins": 173.80018615722656, "rewards/real": -12.61064338684082, "step": 8320 }, { "epoch": 2.67, "learning_rate": 6.192959582790091e-08, "logits/generated": 8.497546195983887, "logits/real": 7.240806579589844, "logps/generated": -3128.419921875, "logps/real": -499.1009826660156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -233.95986938476562, "rewards/margins": 224.8828125, "rewards/real": -9.077058792114258, "step": 8330 }, { "epoch": 2.67, "learning_rate": 6.133696811662914e-08, "logits/generated": 7.850691318511963, "logits/real": 6.384494304656982, "logps/generated": -2767.103759765625, "logps/real": -580.7399291992188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -200.2229461669922, "rewards/margins": 186.88534545898438, "rewards/real": -13.337620735168457, "step": 8340 }, { "epoch": 2.67, "learning_rate": 6.074434040535735e-08, "logits/generated": 8.901998519897461, "logits/real": 6.852387428283691, "logps/generated": -2762.582763671875, "logps/real": -525.2606201171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -201.91220092773438, "rewards/margins": 191.7213134765625, "rewards/real": -10.190906524658203, "step": 8350 }, { "epoch": 2.68, "learning_rate": 6.015171269408558e-08, "logits/generated": 8.194256782531738, "logits/real": 6.980317115783691, "logps/generated": -2920.869384765625, "logps/real": -465.341064453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -213.07711791992188, "rewards/margins": 203.16885375976562, "rewards/real": -9.908285140991211, "step": 8360 }, { "epoch": 2.68, "learning_rate": 5.955908498281379e-08, "logits/generated": 8.978832244873047, "logits/real": 7.830671787261963, "logps/generated": -3097.4677734375, "logps/real": -451.311767578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -232.93325805664062, "rewards/margins": 226.37680053710938, "rewards/real": -6.556451320648193, "step": 8370 }, { "epoch": 2.68, "learning_rate": 5.896645727154202e-08, "logits/generated": 8.551450729370117, "logits/real": 6.750830173492432, "logps/generated": -2822.60888671875, "logps/real": -518.9656982421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -212.748291015625, "rewards/margins": 203.3511962890625, "rewards/real": -9.397098541259766, "step": 8380 }, { "epoch": 2.68, "learning_rate": 5.837382956027023e-08, "logits/generated": 8.076333999633789, "logits/real": 6.803060054779053, "logps/generated": -2940.834716796875, "logps/real": -536.74169921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -220.457275390625, "rewards/margins": 208.01016235351562, "rewards/real": -12.44709300994873, "step": 8390 }, { "epoch": 2.69, "learning_rate": 5.7781201848998455e-08, "logits/generated": 8.139703750610352, "logits/real": 6.848072052001953, "logps/generated": -3285.93994140625, "logps/real": -495.5901794433594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -252.226806640625, "rewards/margins": 243.6891326904297, "rewards/real": -8.537650108337402, "step": 8400 }, { "epoch": 2.69, "learning_rate": 5.7188574137726684e-08, "logits/generated": 8.618913650512695, "logits/real": 6.8653154373168945, "logps/generated": -3046.39990234375, "logps/real": -519.8492431640625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -232.59121704101562, "rewards/margins": 221.1407012939453, "rewards/real": -11.45051383972168, "step": 8410 }, { "epoch": 2.69, "learning_rate": 5.65959464264549e-08, "logits/generated": 7.925799369812012, "logits/real": 6.666626930236816, "logps/generated": -2867.095947265625, "logps/real": -498.11492919921875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/generated": -217.46176147460938, "rewards/margins": 208.4671630859375, "rewards/real": -8.994630813598633, "step": 8420 }, { "epoch": 2.7, "learning_rate": 5.600331871518312e-08, "logits/generated": 7.4714860916137695, "logits/real": 6.480812072753906, "logps/generated": -3156.049072265625, "logps/real": -500.91497802734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -239.84237670898438, "rewards/margins": 230.21908569335938, "rewards/real": -9.623289108276367, "step": 8430 }, { "epoch": 2.7, "learning_rate": 5.5410691003911337e-08, "logits/generated": 8.462733268737793, "logits/real": 7.28622579574585, "logps/generated": -2978.993896484375, "logps/real": -476.1937561035156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -223.6619415283203, "rewards/margins": 213.018798828125, "rewards/real": -10.64312744140625, "step": 8440 }, { "epoch": 2.7, "learning_rate": 5.4818063292639565e-08, "logits/generated": 8.099030494689941, "logits/real": 6.773154258728027, "logps/generated": -3111.182861328125, "logps/real": -507.8434143066406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -234.56222534179688, "rewards/margins": 225.3612823486328, "rewards/real": -9.200922966003418, "step": 8450 }, { "epoch": 2.71, "learning_rate": 5.422543558136778e-08, "logits/generated": 7.840134620666504, "logits/real": 6.55547571182251, "logps/generated": -2849.388427734375, "logps/real": -536.7872924804688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -214.1895294189453, "rewards/margins": 203.33958435058594, "rewards/real": -10.84993839263916, "step": 8460 }, { "epoch": 2.71, "learning_rate": 5.3632807870096e-08, "logits/generated": 8.007733345031738, "logits/real": 7.028973579406738, "logps/generated": -3373.69384765625, "logps/real": -518.6016845703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -257.79718017578125, "rewards/margins": 246.2850341796875, "rewards/real": -11.51213550567627, "step": 8470 }, { "epoch": 2.71, "learning_rate": 5.3040180158824225e-08, "logits/generated": 8.43766975402832, "logits/real": 7.645676612854004, "logps/generated": -3520.742919921875, "logps/real": -465.8804626464844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -269.493896484375, "rewards/margins": 261.46337890625, "rewards/real": -8.03052043914795, "step": 8480 }, { "epoch": 2.72, "learning_rate": 5.2447552447552447e-08, "logits/generated": 7.908270835876465, "logits/real": 7.2662248611450195, "logps/generated": -3104.773681640625, "logps/real": -508.33380126953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -230.1032257080078, "rewards/margins": 219.03085327148438, "rewards/real": -11.07237434387207, "step": 8490 }, { "epoch": 2.72, "learning_rate": 5.185492473628066e-08, "logits/generated": 8.900175094604492, "logits/real": 7.068187713623047, "logps/generated": -2827.81787109375, "logps/real": -533.6253662109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -212.9059600830078, "rewards/margins": 201.28646850585938, "rewards/real": -11.619464874267578, "step": 8500 }, { "epoch": 2.72, "learning_rate": 5.1262297025008884e-08, "logits/generated": 7.991377830505371, "logits/real": 7.606109619140625, "logps/generated": -3527.918701171875, "logps/real": -511.31036376953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -279.5680847167969, "rewards/margins": 267.9775695800781, "rewards/real": -11.59052848815918, "step": 8510 }, { "epoch": 2.73, "learning_rate": 5.066966931373711e-08, "logits/generated": 8.099992752075195, "logits/real": 6.451042175292969, "logps/generated": -3204.447509765625, "logps/real": -534.2952880859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -240.8048858642578, "rewards/margins": 231.18701171875, "rewards/real": -9.617894172668457, "step": 8520 }, { "epoch": 2.73, "learning_rate": 5.007704160246533e-08, "logits/generated": 8.226224899291992, "logits/real": 7.946010589599609, "logps/generated": -3413.53125, "logps/real": -490.82781982421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -262.39764404296875, "rewards/margins": 251.7268829345703, "rewards/real": -10.67077350616455, "step": 8530 }, { "epoch": 2.73, "learning_rate": 4.948441389119355e-08, "logits/generated": 7.138372898101807, "logits/real": 7.372198581695557, "logps/generated": -2980.23779296875, "logps/real": -441.276611328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -219.284423828125, "rewards/margins": 211.97677612304688, "rewards/real": -7.307618141174316, "step": 8540 }, { "epoch": 2.74, "learning_rate": 4.889178617992177e-08, "logits/generated": 7.441501617431641, "logits/real": 6.0672783851623535, "logps/generated": -2781.381591796875, "logps/real": -543.1688232421875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -204.57998657226562, "rewards/margins": 194.0602264404297, "rewards/real": -10.519752502441406, "step": 8550 }, { "epoch": 2.74, "learning_rate": 4.8299158468649994e-08, "logits/generated": 7.985754489898682, "logits/real": 6.938901424407959, "logps/generated": -3412.931640625, "logps/real": -456.54315185546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -265.6098327636719, "rewards/margins": 258.1305236816406, "rewards/real": -7.479300022125244, "step": 8560 }, { "epoch": 2.74, "learning_rate": 4.770653075737821e-08, "logits/generated": 8.336395263671875, "logits/real": 7.059435844421387, "logps/generated": -2844.428466796875, "logps/real": -514.3206787109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -205.6320343017578, "rewards/margins": 196.85089111328125, "rewards/real": -8.781147956848145, "step": 8570 }, { "epoch": 2.75, "learning_rate": 4.711390304610643e-08, "logits/generated": 8.086278915405273, "logits/real": 7.028947353363037, "logps/generated": -2875.539794921875, "logps/real": -505.24462890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -213.0565643310547, "rewards/margins": 203.7962188720703, "rewards/real": -9.260336875915527, "step": 8580 }, { "epoch": 2.75, "learning_rate": 4.6521275334834654e-08, "logits/generated": 8.97681713104248, "logits/real": 6.837056636810303, "logps/generated": -2732.68798828125, "logps/real": -551.1002197265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -197.75961303710938, "rewards/margins": 189.5072479248047, "rewards/real": -8.252367973327637, "step": 8590 }, { "epoch": 2.75, "learning_rate": 4.5928647623562876e-08, "logits/generated": 8.267646789550781, "logits/real": 7.1119794845581055, "logps/generated": -2741.421142578125, "logps/real": -496.58355712890625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -201.11563110351562, "rewards/margins": 192.79783630371094, "rewards/real": -8.317792892456055, "step": 8600 }, { "epoch": 2.76, "learning_rate": 4.53360199122911e-08, "logits/generated": 7.597458839416504, "logits/real": 7.183170318603516, "logps/generated": -3328.329345703125, "logps/real": -478.10693359375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -255.9205780029297, "rewards/margins": 248.2677459716797, "rewards/real": -7.652830600738525, "step": 8610 }, { "epoch": 2.76, "learning_rate": 4.474339220101932e-08, "logits/generated": 8.28695011138916, "logits/real": 6.171719551086426, "logps/generated": -2852.8271484375, "logps/real": -554.2230834960938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -214.74124145507812, "rewards/margins": 206.2547149658203, "rewards/real": -8.486517906188965, "step": 8620 }, { "epoch": 2.76, "learning_rate": 4.415076448974754e-08, "logits/generated": 7.9613189697265625, "logits/real": 7.0819244384765625, "logps/generated": -2829.24951171875, "logps/real": -504.09698486328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -211.5718536376953, "rewards/margins": 201.73953247070312, "rewards/real": -9.832326889038086, "step": 8630 }, { "epoch": 2.76, "learning_rate": 4.355813677847576e-08, "logits/generated": 7.89370059967041, "logits/real": 7.525933265686035, "logps/generated": -3094.92529296875, "logps/real": -513.4002075195312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -235.6068572998047, "rewards/margins": 224.07308959960938, "rewards/real": -11.533733367919922, "step": 8640 }, { "epoch": 2.77, "learning_rate": 4.296550906720398e-08, "logits/generated": 8.352346420288086, "logits/real": 7.204506874084473, "logps/generated": -3093.535888671875, "logps/real": -464.26593017578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -235.6905059814453, "rewards/margins": 226.2654266357422, "rewards/real": -9.425071716308594, "step": 8650 }, { "epoch": 2.77, "learning_rate": 4.23728813559322e-08, "logits/generated": 8.221736907958984, "logits/real": 7.019160270690918, "logps/generated": -3101.844970703125, "logps/real": -486.76641845703125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -239.68258666992188, "rewards/margins": 229.4948272705078, "rewards/real": -10.187750816345215, "step": 8660 }, { "epoch": 2.77, "learning_rate": 4.178025364466042e-08, "logits/generated": 7.2331862449646, "logits/real": 6.70922327041626, "logps/generated": -3194.39501953125, "logps/real": -552.7675170898438, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -247.42160034179688, "rewards/margins": 235.07473754882812, "rewards/real": -12.346845626831055, "step": 8670 }, { "epoch": 2.78, "learning_rate": 4.118762593338864e-08, "logits/generated": 8.301423072814941, "logits/real": 7.5850043296813965, "logps/generated": -3096.105712890625, "logps/real": -464.41265869140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -233.5794219970703, "rewards/margins": 225.34939575195312, "rewards/real": -8.230051040649414, "step": 8680 }, { "epoch": 2.78, "learning_rate": 4.059499822211687e-08, "logits/generated": 8.723894119262695, "logits/real": 7.180922031402588, "logps/generated": -2702.56591796875, "logps/real": -520.3831176757812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -203.79489135742188, "rewards/margins": 192.74755859375, "rewards/real": -11.047327041625977, "step": 8690 }, { "epoch": 2.78, "learning_rate": 4.000237051084508e-08, "logits/generated": 8.716232299804688, "logits/real": 6.887723445892334, "logps/generated": -3034.03173828125, "logps/real": -476.8695373535156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -225.7946014404297, "rewards/margins": 217.4459228515625, "rewards/real": -8.348687171936035, "step": 8700 }, { "epoch": 2.79, "learning_rate": 3.9409742799573305e-08, "logits/generated": 8.401078224182129, "logits/real": 6.465890407562256, "logps/generated": -2955.52001953125, "logps/real": -555.0927734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -219.4933624267578, "rewards/margins": 207.9189910888672, "rewards/real": -11.574369430541992, "step": 8710 }, { "epoch": 2.79, "learning_rate": 3.8817115088301533e-08, "logits/generated": 8.765127182006836, "logits/real": 7.416386604309082, "logps/generated": -3067.182861328125, "logps/real": -521.6505126953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -227.99697875976562, "rewards/margins": 217.0262451171875, "rewards/real": -10.970739364624023, "step": 8720 }, { "epoch": 2.79, "learning_rate": 3.822448737702975e-08, "logits/generated": 8.804158210754395, "logits/real": 7.564444541931152, "logps/generated": -3270.056640625, "logps/real": -499.8545837402344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -250.69882202148438, "rewards/margins": 241.4369354248047, "rewards/real": -9.261892318725586, "step": 8730 }, { "epoch": 2.8, "learning_rate": 3.763185966575797e-08, "logits/generated": 7.953909873962402, "logits/real": 6.796091556549072, "logps/generated": -3211.81689453125, "logps/real": -477.65216064453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -242.2017822265625, "rewards/margins": 234.18246459960938, "rewards/real": -8.01932430267334, "step": 8740 }, { "epoch": 2.8, "learning_rate": 3.7039231954486186e-08, "logits/generated": 8.203560829162598, "logits/real": 7.138692378997803, "logps/generated": -2844.4208984375, "logps/real": -486.26336669921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -209.966552734375, "rewards/margins": 197.39117431640625, "rewards/real": -12.575355529785156, "step": 8750 }, { "epoch": 2.8, "learning_rate": 3.6446604243214415e-08, "logits/generated": 8.57895565032959, "logits/real": 6.996590614318848, "logps/generated": -3067.794921875, "logps/real": -534.1387939453125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -231.3258514404297, "rewards/margins": 221.8261260986328, "rewards/real": -9.499747276306152, "step": 8760 }, { "epoch": 2.81, "learning_rate": 3.585397653194263e-08, "logits/generated": 8.202235221862793, "logits/real": 7.388916015625, "logps/generated": -3203.379638671875, "logps/real": -495.0619201660156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -244.82388305664062, "rewards/margins": 233.64047241210938, "rewards/real": -11.183393478393555, "step": 8770 }, { "epoch": 2.81, "learning_rate": 3.526134882067085e-08, "logits/generated": 8.482281684875488, "logits/real": 7.2415924072265625, "logps/generated": -2814.8525390625, "logps/real": -495.26824951171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -207.6466522216797, "rewards/margins": 196.92782592773438, "rewards/real": -10.718798637390137, "step": 8780 }, { "epoch": 2.81, "learning_rate": 3.4668721109399074e-08, "logits/generated": 8.95695972442627, "logits/real": 7.588113307952881, "logps/generated": -3075.870361328125, "logps/real": -492.8494567871094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -231.52987670898438, "rewards/margins": 220.01321411132812, "rewards/real": -11.516661643981934, "step": 8790 }, { "epoch": 2.82, "learning_rate": 3.4076093398127296e-08, "logits/generated": 7.875680446624756, "logits/real": 6.970887660980225, "logps/generated": -2981.309326171875, "logps/real": -468.750244140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -226.5687255859375, "rewards/margins": 217.2181396484375, "rewards/real": -9.350580215454102, "step": 8800 }, { "epoch": 2.82, "learning_rate": 3.348346568685552e-08, "logits/generated": 8.556954383850098, "logits/real": 6.701404571533203, "logps/generated": -3123.025634765625, "logps/real": -488.3585510253906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -239.79296875, "rewards/margins": 231.0467987060547, "rewards/real": -8.746153831481934, "step": 8810 }, { "epoch": 2.82, "learning_rate": 3.2890837975583734e-08, "logits/generated": 8.128173828125, "logits/real": 6.693127632141113, "logps/generated": -3101.912353515625, "logps/real": -493.995361328125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -231.74655151367188, "rewards/margins": 222.4376220703125, "rewards/real": -9.308927536010742, "step": 8820 }, { "epoch": 2.83, "learning_rate": 3.229821026431196e-08, "logits/generated": 8.637815475463867, "logits/real": 7.720175266265869, "logps/generated": -3125.2490234375, "logps/real": -520.68310546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -234.7375946044922, "rewards/margins": 224.14797973632812, "rewards/real": -10.58964729309082, "step": 8830 }, { "epoch": 2.83, "learning_rate": 3.170558255304018e-08, "logits/generated": 8.322412490844727, "logits/real": 7.59633731842041, "logps/generated": -3225.390625, "logps/real": -460.01239013671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -244.38314819335938, "rewards/margins": 235.30978393554688, "rewards/real": -9.073370933532715, "step": 8840 }, { "epoch": 2.83, "learning_rate": 3.11129548417684e-08, "logits/generated": 7.9953203201293945, "logits/real": 6.670275688171387, "logps/generated": -3042.13037109375, "logps/real": -528.563232421875, "loss": 0.0022, "rewards/accuracies": 0.987500011920929, "rewards/generated": -229.0786590576172, "rewards/margins": 218.18905639648438, "rewards/real": -10.889580726623535, "step": 8850 }, { "epoch": 2.84, "learning_rate": 3.052032713049662e-08, "logits/generated": 7.613698482513428, "logits/real": 6.712839603424072, "logps/generated": -3153.233154296875, "logps/real": -505.58203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -238.701904296875, "rewards/margins": 229.136474609375, "rewards/real": -9.565437316894531, "step": 8860 }, { "epoch": 2.84, "learning_rate": 2.9927699419224844e-08, "logits/generated": 8.15238094329834, "logits/real": 6.753890037536621, "logps/generated": -3060.76025390625, "logps/real": -509.9293518066406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -234.29736328125, "rewards/margins": 222.47171020507812, "rewards/real": -11.825661659240723, "step": 8870 }, { "epoch": 2.84, "learning_rate": 2.9335071707953063e-08, "logits/generated": 7.850224494934082, "logits/real": 7.407826900482178, "logps/generated": -2979.707763671875, "logps/real": -471.8998107910156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -222.354248046875, "rewards/margins": 211.6306610107422, "rewards/real": -10.72357177734375, "step": 8880 }, { "epoch": 2.84, "learning_rate": 2.8742443996681285e-08, "logits/generated": 8.879858016967773, "logits/real": 7.324443817138672, "logps/generated": -3383.140625, "logps/real": -499.24395751953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -260.9498291015625, "rewards/margins": 252.03866577148438, "rewards/real": -8.911173820495605, "step": 8890 }, { "epoch": 2.85, "learning_rate": 2.8149816285409503e-08, "logits/generated": 8.500874519348145, "logits/real": 7.039736270904541, "logps/generated": -3438.938720703125, "logps/real": -498.4562072753906, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -265.0511779785156, "rewards/margins": 255.5115966796875, "rewards/real": -9.539575576782227, "step": 8900 }, { "epoch": 2.85, "learning_rate": 2.7557188574137725e-08, "logits/generated": 8.226998329162598, "logits/real": 6.9249467849731445, "logps/generated": -3134.25341796875, "logps/real": -469.9586486816406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -233.0384521484375, "rewards/margins": 224.59390258789062, "rewards/real": -8.444602012634277, "step": 8910 }, { "epoch": 2.85, "learning_rate": 2.6964560862865947e-08, "logits/generated": 8.703134536743164, "logits/real": 7.221149444580078, "logps/generated": -2935.45703125, "logps/real": -522.2362060546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -219.24734497070312, "rewards/margins": 209.77395629882812, "rewards/real": -9.473384857177734, "step": 8920 }, { "epoch": 2.86, "learning_rate": 2.6371933151594166e-08, "logits/generated": 8.7809419631958, "logits/real": 7.861895561218262, "logps/generated": -3071.876953125, "logps/real": -560.5285034179688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -232.33462524414062, "rewards/margins": 218.4232940673828, "rewards/real": -13.911321640014648, "step": 8930 }, { "epoch": 2.86, "learning_rate": 2.5779305440322388e-08, "logits/generated": 8.76445198059082, "logits/real": 8.03178882598877, "logps/generated": -3397.264892578125, "logps/real": -484.3458557128906, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/generated": -263.78240966796875, "rewards/margins": 253.8608856201172, "rewards/real": -9.921534538269043, "step": 8940 }, { "epoch": 2.86, "learning_rate": 2.5186677729050607e-08, "logits/generated": 7.6614861488342285, "logits/real": 7.373976230621338, "logps/generated": -3188.702392578125, "logps/real": -494.7622985839844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -242.7777862548828, "rewards/margins": 234.5918426513672, "rewards/real": -8.185911178588867, "step": 8950 }, { "epoch": 2.87, "learning_rate": 2.459405001777883e-08, "logits/generated": 8.235227584838867, "logits/real": 6.7460432052612305, "logps/generated": -2921.992431640625, "logps/real": -498.3289489746094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -215.79931640625, "rewards/margins": 205.0426788330078, "rewards/real": -10.756634712219238, "step": 8960 }, { "epoch": 2.87, "learning_rate": 2.4001422306507054e-08, "logits/generated": 8.557123184204102, "logits/real": 6.847373962402344, "logps/generated": -2717.42431640625, "logps/real": -569.5227661132812, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -197.24667358398438, "rewards/margins": 185.33213806152344, "rewards/real": -11.914536476135254, "step": 8970 }, { "epoch": 2.87, "learning_rate": 2.3408794595235273e-08, "logits/generated": 8.220282554626465, "logits/real": 7.633485317230225, "logps/generated": -3096.020751953125, "logps/real": -514.7177734375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -235.3781280517578, "rewards/margins": 221.917236328125, "rewards/real": -13.460886001586914, "step": 8980 }, { "epoch": 2.88, "learning_rate": 2.2816166883963495e-08, "logits/generated": 7.869513034820557, "logits/real": 7.054207801818848, "logps/generated": -2968.21044921875, "logps/real": -583.9354248046875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -220.66531372070312, "rewards/margins": 204.53053283691406, "rewards/real": -16.134782791137695, "step": 8990 }, { "epoch": 2.88, "learning_rate": 2.2223539172691714e-08, "logits/generated": 7.808424472808838, "logits/real": 6.341929912567139, "logps/generated": -2995.339599609375, "logps/real": -463.3304138183594, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -227.91983032226562, "rewards/margins": 218.44375610351562, "rewards/real": -9.476068496704102, "step": 9000 }, { "epoch": 2.88, "learning_rate": 2.1630911461419936e-08, "logits/generated": 7.785159111022949, "logits/real": 7.184203147888184, "logps/generated": -2710.5859375, "logps/real": -525.7408447265625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -195.81272888183594, "rewards/margins": 184.5967254638672, "rewards/real": -11.216008186340332, "step": 9010 }, { "epoch": 2.89, "learning_rate": 2.1038283750148154e-08, "logits/generated": 8.97920036315918, "logits/real": 7.6993207931518555, "logps/generated": -3128.960693359375, "logps/real": -509.15692138671875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -230.43954467773438, "rewards/margins": 220.4319610595703, "rewards/real": -10.007574081420898, "step": 9020 }, { "epoch": 2.89, "learning_rate": 2.0445656038876377e-08, "logits/generated": 8.032505989074707, "logits/real": 7.174866676330566, "logps/generated": -3234.5, "logps/real": -527.7041625976562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -242.859619140625, "rewards/margins": 231.9322052001953, "rewards/real": -10.927396774291992, "step": 9030 }, { "epoch": 2.89, "learning_rate": 1.98530283276046e-08, "logits/generated": 6.732813835144043, "logits/real": 7.0517897605896, "logps/generated": -3563.056640625, "logps/real": -462.65399169921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -269.57916259765625, "rewards/margins": 259.9852294921875, "rewards/real": -9.593974113464355, "step": 9040 }, { "epoch": 2.9, "learning_rate": 1.9260400616332817e-08, "logits/generated": 8.678823471069336, "logits/real": 7.099053382873535, "logps/generated": -3064.253662109375, "logps/real": -557.8177490234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -233.07058715820312, "rewards/margins": 220.3520050048828, "rewards/real": -12.718572616577148, "step": 9050 }, { "epoch": 2.9, "learning_rate": 1.866777290506104e-08, "logits/generated": 8.235837936401367, "logits/real": 7.246317386627197, "logps/generated": -2986.19580078125, "logps/real": -531.8699951171875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -218.9315185546875, "rewards/margins": 209.5327911376953, "rewards/real": -9.398733139038086, "step": 9060 }, { "epoch": 2.9, "learning_rate": 1.807514519378926e-08, "logits/generated": 9.1199369430542, "logits/real": 7.043376922607422, "logps/generated": -2817.24951171875, "logps/real": -469.8770446777344, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -211.3979949951172, "rewards/margins": 202.86318969726562, "rewards/real": -8.534795761108398, "step": 9070 }, { "epoch": 2.91, "learning_rate": 1.7482517482517483e-08, "logits/generated": 8.673420906066895, "logits/real": 7.112066745758057, "logps/generated": -3197.769287109375, "logps/real": -527.2247924804688, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -247.66488647460938, "rewards/margins": 237.1915740966797, "rewards/real": -10.47331714630127, "step": 9080 }, { "epoch": 2.91, "learning_rate": 1.6889889771245702e-08, "logits/generated": 8.417943954467773, "logits/real": 6.62189245223999, "logps/generated": -2834.17431640625, "logps/real": -531.4713745117188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -211.2469482421875, "rewards/margins": 199.7887725830078, "rewards/real": -11.45817756652832, "step": 9090 }, { "epoch": 2.91, "learning_rate": 1.6297262059973924e-08, "logits/generated": 8.768732070922852, "logits/real": 7.625872611999512, "logps/generated": -3280.11328125, "logps/real": -484.859130859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -250.0456085205078, "rewards/margins": 242.04934692382812, "rewards/real": -7.996267795562744, "step": 9100 }, { "epoch": 2.92, "learning_rate": 1.5704634348702146e-08, "logits/generated": 8.263387680053711, "logits/real": 7.393287658691406, "logps/generated": -3025.78857421875, "logps/real": -528.0397338867188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -228.1303253173828, "rewards/margins": 216.8250732421875, "rewards/real": -11.305240631103516, "step": 9110 }, { "epoch": 2.92, "learning_rate": 1.5112006637430365e-08, "logits/generated": 8.31395435333252, "logits/real": 6.723788261413574, "logps/generated": -2949.27880859375, "logps/real": -542.2833251953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -217.7350311279297, "rewards/margins": 208.3937530517578, "rewards/real": -9.341277122497559, "step": 9120 }, { "epoch": 2.92, "learning_rate": 1.4519378926158587e-08, "logits/generated": 7.9235405921936035, "logits/real": 7.7715959548950195, "logps/generated": -3014.697021484375, "logps/real": -497.0755920410156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -226.562255859375, "rewards/margins": 216.12100219726562, "rewards/real": -10.441266059875488, "step": 9130 }, { "epoch": 2.92, "learning_rate": 1.3926751214886807e-08, "logits/generated": 8.845457077026367, "logits/real": 7.088343620300293, "logps/generated": -2780.33154296875, "logps/real": -522.0088500976562, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -209.6867218017578, "rewards/margins": 200.1481475830078, "rewards/real": -9.538576126098633, "step": 9140 }, { "epoch": 2.93, "learning_rate": 1.333412350361503e-08, "logits/generated": 8.59052562713623, "logits/real": 7.341599464416504, "logps/generated": -3211.372314453125, "logps/real": -518.3256225585938, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -240.33511352539062, "rewards/margins": 229.4217071533203, "rewards/real": -10.913371086120605, "step": 9150 }, { "epoch": 2.93, "learning_rate": 1.274149579234325e-08, "logits/generated": 8.772137641906738, "logits/real": 6.745736122131348, "logps/generated": -3158.949462890625, "logps/real": -597.6865234375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -240.2683868408203, "rewards/margins": 226.1527099609375, "rewards/real": -14.115678787231445, "step": 9160 }, { "epoch": 2.93, "learning_rate": 1.214886808107147e-08, "logits/generated": 9.065434455871582, "logits/real": 7.645088195800781, "logps/generated": -2997.620849609375, "logps/real": -529.5694580078125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -221.7777557373047, "rewards/margins": 211.21005249023438, "rewards/real": -10.56767463684082, "step": 9170 }, { "epoch": 2.94, "learning_rate": 1.155624036979969e-08, "logits/generated": 8.795722007751465, "logits/real": 7.217993259429932, "logps/generated": -2855.42333984375, "logps/real": -485.5347595214844, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -211.6577606201172, "rewards/margins": 202.3023681640625, "rewards/real": -9.355379104614258, "step": 9180 }, { "epoch": 2.94, "learning_rate": 1.0963612658527912e-08, "logits/generated": 7.265430450439453, "logits/real": 6.917641639709473, "logps/generated": -2909.56982421875, "logps/real": -490.321533203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -213.5467071533203, "rewards/margins": 202.5613250732422, "rewards/real": -10.98538589477539, "step": 9190 }, { "epoch": 2.94, "learning_rate": 1.0370984947256134e-08, "logits/generated": 7.716809272766113, "logits/real": 7.509675025939941, "logps/generated": -2929.58740234375, "logps/real": -466.08673095703125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -216.98245239257812, "rewards/margins": 207.25155639648438, "rewards/real": -9.730905532836914, "step": 9200 }, { "epoch": 2.95, "learning_rate": 9.778357235984355e-09, "logits/generated": 8.067037582397461, "logits/real": 6.947924613952637, "logps/generated": -2833.19580078125, "logps/real": -544.2594604492188, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -209.1349334716797, "rewards/margins": 197.0256805419922, "rewards/real": -12.109265327453613, "step": 9210 }, { "epoch": 2.95, "learning_rate": 9.185729524712575e-09, "logits/generated": 8.286117553710938, "logits/real": 6.495217323303223, "logps/generated": -3061.060791015625, "logps/real": -565.9337158203125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -229.41000366210938, "rewards/margins": 216.41397094726562, "rewards/real": -12.99604320526123, "step": 9220 }, { "epoch": 2.95, "learning_rate": 8.593101813440796e-09, "logits/generated": 8.073034286499023, "logits/real": 7.767852783203125, "logps/generated": -3293.905029296875, "logps/real": -503.7654724121094, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -248.76718139648438, "rewards/margins": 235.86087036132812, "rewards/real": -12.906286239624023, "step": 9230 }, { "epoch": 2.96, "learning_rate": 8.000474102169016e-09, "logits/generated": 8.848856925964355, "logits/real": 7.8941779136657715, "logps/generated": -3028.908935546875, "logps/real": -535.1795043945312, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -228.0379638671875, "rewards/margins": 216.16464233398438, "rewards/real": -11.873281478881836, "step": 9240 }, { "epoch": 2.96, "learning_rate": 7.407846390897238e-09, "logits/generated": 7.967347145080566, "logits/real": 6.756280422210693, "logps/generated": -3154.21728515625, "logps/real": -473.6285095214844, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -239.987060546875, "rewards/margins": 230.71969604492188, "rewards/real": -9.267355918884277, "step": 9250 }, { "epoch": 2.96, "learning_rate": 6.815218679625459e-09, "logits/generated": 8.066893577575684, "logits/real": 6.4289374351501465, "logps/generated": -2897.50244140625, "logps/real": -497.54534912109375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -220.3232421875, "rewards/margins": 211.45858764648438, "rewards/real": -8.864641189575195, "step": 9260 }, { "epoch": 2.97, "learning_rate": 6.22259096835368e-09, "logits/generated": 9.104695320129395, "logits/real": 7.100895881652832, "logps/generated": -2840.745849609375, "logps/real": -568.906494140625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -205.9685821533203, "rewards/margins": 195.31594848632812, "rewards/real": -10.652606010437012, "step": 9270 }, { "epoch": 2.97, "learning_rate": 5.629963257081901e-09, "logits/generated": 8.228031158447266, "logits/real": 7.226017951965332, "logps/generated": -2928.40234375, "logps/real": -469.6527404785156, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -217.2353973388672, "rewards/margins": 205.7250213623047, "rewards/real": -11.510384559631348, "step": 9280 }, { "epoch": 2.97, "learning_rate": 5.037335545810122e-09, "logits/generated": 7.780027866363525, "logits/real": 6.504982948303223, "logps/generated": -3110.919189453125, "logps/real": -503.9840393066406, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -231.04806518554688, "rewards/margins": 222.05929565429688, "rewards/real": -8.988751411437988, "step": 9290 }, { "epoch": 2.98, "learning_rate": 4.444707834538343e-09, "logits/generated": 8.291112899780273, "logits/real": 7.904989719390869, "logps/generated": -3069.237548828125, "logps/real": -508.61468505859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -228.2886505126953, "rewards/margins": 216.349853515625, "rewards/real": -11.938814163208008, "step": 9300 }, { "epoch": 2.98, "learning_rate": 3.8520801232665634e-09, "logits/generated": 9.209867477416992, "logits/real": 7.487800598144531, "logps/generated": -3485.589111328125, "logps/real": -552.0, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/generated": -265.2016906738281, "rewards/margins": 253.6442413330078, "rewards/real": -11.55746841430664, "step": 9310 }, { "epoch": 2.98, "learning_rate": 3.2594524119947846e-09, "logits/generated": 8.013742446899414, "logits/real": 7.19378662109375, "logps/generated": -2807.793212890625, "logps/real": -458.1249084472656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -211.60000610351562, "rewards/margins": 201.39096069335938, "rewards/real": -10.209046363830566, "step": 9320 }, { "epoch": 2.99, "learning_rate": 2.6668247007230054e-09, "logits/generated": 8.698243141174316, "logits/real": 7.442419528961182, "logps/generated": -3290.629638671875, "logps/real": -487.89202880859375, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -258.7073974609375, "rewards/margins": 248.32846069335938, "rewards/real": -10.378918647766113, "step": 9330 }, { "epoch": 2.99, "learning_rate": 2.0741969894512266e-09, "logits/generated": 7.758219242095947, "logits/real": 7.231850624084473, "logps/generated": -3058.931884765625, "logps/real": -540.364501953125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -225.93081665039062, "rewards/margins": 212.0258331298828, "rewards/real": -13.904986381530762, "step": 9340 }, { "epoch": 2.99, "learning_rate": 1.4815692781794476e-09, "logits/generated": 6.423818111419678, "logits/real": 6.986824989318848, "logps/generated": -3106.513427734375, "logps/real": -498.1249084472656, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -239.3776397705078, "rewards/margins": 228.1728515625, "rewards/real": -11.204763412475586, "step": 9350 }, { "epoch": 3.0, "learning_rate": 8.889415669076685e-10, "logits/generated": 9.10843276977539, "logits/real": 7.404960632324219, "logps/generated": -3026.97412109375, "logps/real": -551.2117919921875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -229.6231231689453, "rewards/margins": 217.53732299804688, "rewards/real": -12.085813522338867, "step": 9360 }, { "epoch": 3.0, "learning_rate": 2.963138556358895e-10, "logits/generated": 8.667880058288574, "logits/real": 7.627316951751709, "logps/generated": -3049.637451171875, "logps/real": -548.5264892578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -229.8819580078125, "rewards/margins": 218.9852294921875, "rewards/real": -10.896716117858887, "step": 9370 }, { "epoch": 3.0, "step": 9375, "total_flos": 0.0, "train_loss": 0.010426454302569806, "train_runtime": 77095.0881, "train_samples_per_second": 3.891, "train_steps_per_second": 0.122 } ], "logging_steps": 10, "max_steps": 9375, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }