Safetensors
llama
AALF commited on
Commit
b3b4656
·
verified ·
1 Parent(s): 5b451e5

Delete trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +0 -1248
trainer_state.json DELETED
@@ -1,1248 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 0.8063872255489022,
5
- "eval_steps": 500,
6
- "global_step": 404,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.001996007984031936,
13
- "grad_norm": 19.011175567956343,
14
- "learning_rate": 1.9607843137254902e-08,
15
- "logits/chosen": -0.23683343827724457,
16
- "logits/rejected": -0.2160334289073944,
17
- "logps/chosen": -0.7725335359573364,
18
- "logps/rejected": -0.7464257478713989,
19
- "loss": 0.6931,
20
- "rewards/accuracies": 0.0,
21
- "rewards/chosen": 0.0,
22
- "rewards/margins": 0.0,
23
- "rewards/rejected": 0.0,
24
- "step": 1
25
- },
26
- {
27
- "epoch": 0.00998003992015968,
28
- "grad_norm": 24.85303405730939,
29
- "learning_rate": 9.80392156862745e-08,
30
- "logits/chosen": -0.17754913866519928,
31
- "logits/rejected": -0.1510540395975113,
32
- "logps/chosen": -0.6808110475540161,
33
- "logps/rejected": -0.7670315504074097,
34
- "loss": 0.6946,
35
- "rewards/accuracies": 0.296875,
36
- "rewards/chosen": -0.003806930035352707,
37
- "rewards/margins": -0.0012099393643438816,
38
- "rewards/rejected": -0.0025969906710088253,
39
- "step": 5
40
- },
41
- {
42
- "epoch": 0.01996007984031936,
43
- "grad_norm": 27.331245165405218,
44
- "learning_rate": 1.96078431372549e-07,
45
- "logits/chosen": -0.21193155646324158,
46
- "logits/rejected": -0.14373356103897095,
47
- "logps/chosen": -0.6621003746986389,
48
- "logps/rejected": -0.7422515153884888,
49
- "loss": 0.6942,
50
- "rewards/accuracies": 0.5,
51
- "rewards/chosen": -0.003911865875124931,
52
- "rewards/margins": -0.00975899025797844,
53
- "rewards/rejected": 0.005847124848514795,
54
- "step": 10
55
- },
56
- {
57
- "epoch": 0.029940119760479042,
58
- "grad_norm": 18.13812492001746,
59
- "learning_rate": 2.941176470588235e-07,
60
- "logits/chosen": -0.2625748813152313,
61
- "logits/rejected": -0.21968011558055878,
62
- "logps/chosen": -0.7216169238090515,
63
- "logps/rejected": -0.7573580741882324,
64
- "loss": 0.6955,
65
- "rewards/accuracies": 0.5249999761581421,
66
- "rewards/chosen": -0.0036083627492189407,
67
- "rewards/margins": 0.0002948194742202759,
68
- "rewards/rejected": -0.0039031822234392166,
69
- "step": 15
70
- },
71
- {
72
- "epoch": 0.03992015968063872,
73
- "grad_norm": 29.30189659403405,
74
- "learning_rate": 3.92156862745098e-07,
75
- "logits/chosen": -0.2208031415939331,
76
- "logits/rejected": -0.21727153658866882,
77
- "logps/chosen": -0.806796669960022,
78
- "logps/rejected": -0.7866016626358032,
79
- "loss": 0.6931,
80
- "rewards/accuracies": 0.550000011920929,
81
- "rewards/chosen": 0.011953282169997692,
82
- "rewards/margins": 0.00740851229056716,
83
- "rewards/rejected": 0.0045447684824466705,
84
- "step": 20
85
- },
86
- {
87
- "epoch": 0.0499001996007984,
88
- "grad_norm": 17.484098617193414,
89
- "learning_rate": 4.901960784313725e-07,
90
- "logits/chosen": -0.23456409573554993,
91
- "logits/rejected": -0.20080241560935974,
92
- "logps/chosen": -0.752682089805603,
93
- "logps/rejected": -0.81329745054245,
94
- "loss": 0.6933,
95
- "rewards/accuracies": 0.4625000059604645,
96
- "rewards/chosen": -0.008837291970849037,
97
- "rewards/margins": -0.008301705121994019,
98
- "rewards/rejected": -0.0005355868488550186,
99
- "step": 25
100
- },
101
- {
102
- "epoch": 0.059880239520958084,
103
- "grad_norm": 22.79460314618375,
104
- "learning_rate": 5.88235294117647e-07,
105
- "logits/chosen": -0.19971349835395813,
106
- "logits/rejected": -0.1838277131319046,
107
- "logps/chosen": -0.741012454032898,
108
- "logps/rejected": -0.8006389737129211,
109
- "loss": 0.6905,
110
- "rewards/accuracies": 0.5249999761581421,
111
- "rewards/chosen": -0.007402450777590275,
112
- "rewards/margins": 0.008965044282376766,
113
- "rewards/rejected": -0.01636749505996704,
114
- "step": 30
115
- },
116
- {
117
- "epoch": 0.06986027944111776,
118
- "grad_norm": 19.972022304372274,
119
- "learning_rate": 6.862745098039216e-07,
120
- "logits/chosen": -0.20151765644550323,
121
- "logits/rejected": -0.19096672534942627,
122
- "logps/chosen": -0.7682427167892456,
123
- "logps/rejected": -0.8185433149337769,
124
- "loss": 0.6912,
125
- "rewards/accuracies": 0.4375,
126
- "rewards/chosen": -0.011349962092936039,
127
- "rewards/margins": -0.0024173937272280455,
128
- "rewards/rejected": -0.00893256813287735,
129
- "step": 35
130
- },
131
- {
132
- "epoch": 0.07984031936127745,
133
- "grad_norm": 18.601712274112785,
134
- "learning_rate": 7.84313725490196e-07,
135
- "logits/chosen": -0.24523372948169708,
136
- "logits/rejected": -0.2008388340473175,
137
- "logps/chosen": -0.784850537776947,
138
- "logps/rejected": -0.8351114392280579,
139
- "loss": 0.6888,
140
- "rewards/accuracies": 0.7250000238418579,
141
- "rewards/chosen": -0.019527489319443703,
142
- "rewards/margins": 0.02737308107316494,
143
- "rewards/rejected": -0.04690057039260864,
144
- "step": 40
145
- },
146
- {
147
- "epoch": 0.08982035928143713,
148
- "grad_norm": 22.21021114642223,
149
- "learning_rate": 8.823529411764705e-07,
150
- "logits/chosen": -0.20831866562366486,
151
- "logits/rejected": -0.19121626019477844,
152
- "logps/chosen": -0.7837399244308472,
153
- "logps/rejected": -0.8101400136947632,
154
- "loss": 0.6787,
155
- "rewards/accuracies": 0.6000000238418579,
156
- "rewards/chosen": -0.02101544663310051,
157
- "rewards/margins": 0.036486249417066574,
158
- "rewards/rejected": -0.057501696050167084,
159
- "step": 45
160
- },
161
- {
162
- "epoch": 0.0998003992015968,
163
- "grad_norm": 35.946736239250875,
164
- "learning_rate": 9.80392156862745e-07,
165
- "logits/chosen": -0.2484116554260254,
166
- "logits/rejected": -0.21271376311779022,
167
- "logps/chosen": -0.7204190492630005,
168
- "logps/rejected": -0.8208059072494507,
169
- "loss": 0.6745,
170
- "rewards/accuracies": 0.699999988079071,
171
- "rewards/chosen": -0.018808891996741295,
172
- "rewards/margins": 0.07578931748867035,
173
- "rewards/rejected": -0.0945982038974762,
174
- "step": 50
175
- },
176
- {
177
- "epoch": 0.10978043912175649,
178
- "grad_norm": 17.586104380746775,
179
- "learning_rate": 9.99805057520177e-07,
180
- "logits/chosen": -0.22446580231189728,
181
- "logits/rejected": -0.21255891025066376,
182
- "logps/chosen": -0.768947958946228,
183
- "logps/rejected": -0.7895203828811646,
184
- "loss": 0.6709,
185
- "rewards/accuracies": 0.675000011920929,
186
- "rewards/chosen": -0.03166341781616211,
187
- "rewards/margins": 0.14431020617485046,
188
- "rewards/rejected": -0.17597363889217377,
189
- "step": 55
190
- },
191
- {
192
- "epoch": 0.11976047904191617,
193
- "grad_norm": 16.613461293187417,
194
- "learning_rate": 9.990133642141357e-07,
195
- "logits/chosen": -0.24056999385356903,
196
- "logits/rejected": -0.2318592071533203,
197
- "logps/chosen": -0.7434613108634949,
198
- "logps/rejected": -0.8091262578964233,
199
- "loss": 0.6692,
200
- "rewards/accuracies": 0.7250000238418579,
201
- "rewards/chosen": -0.08080559223890305,
202
- "rewards/margins": 0.17325755953788757,
203
- "rewards/rejected": -0.2540631592273712,
204
- "step": 60
205
- },
206
- {
207
- "epoch": 0.12974051896207583,
208
- "grad_norm": 20.90647324488342,
209
- "learning_rate": 9.976136999909155e-07,
210
- "logits/chosen": -0.18930143117904663,
211
- "logits/rejected": -0.17337587475776672,
212
- "logps/chosen": -0.7673609852790833,
213
- "logps/rejected": -0.7971418499946594,
214
- "loss": 0.6651,
215
- "rewards/accuracies": 0.6875,
216
- "rewards/chosen": -0.06646038591861725,
217
- "rewards/margins": 0.06751471757888794,
218
- "rewards/rejected": -0.133975088596344,
219
- "step": 65
220
- },
221
- {
222
- "epoch": 0.13972055888223553,
223
- "grad_norm": 29.878943207680674,
224
- "learning_rate": 9.956077701257707e-07,
225
- "logits/chosen": -0.2409631907939911,
226
- "logits/rejected": -0.2077023983001709,
227
- "logps/chosen": -0.7360613346099854,
228
- "logps/rejected": -0.7820955514907837,
229
- "loss": 0.6524,
230
- "rewards/accuracies": 0.7124999761581421,
231
- "rewards/chosen": -0.04467972368001938,
232
- "rewards/margins": 0.07468457520008087,
233
- "rewards/rejected": -0.11936430633068085,
234
- "step": 70
235
- },
236
- {
237
- "epoch": 0.1497005988023952,
238
- "grad_norm": 23.171174553493415,
239
- "learning_rate": 9.929980185352525e-07,
240
- "logits/chosen": -0.3143348693847656,
241
- "logits/rejected": -0.31170645356178284,
242
- "logps/chosen": -0.7954690456390381,
243
- "logps/rejected": -0.8250002861022949,
244
- "loss": 0.6568,
245
- "rewards/accuracies": 0.75,
246
- "rewards/chosen": -0.09316639602184296,
247
- "rewards/margins": 0.060890208929777145,
248
- "rewards/rejected": -0.1540566086769104,
249
- "step": 75
250
- },
251
- {
252
- "epoch": 0.1596806387225549,
253
- "grad_norm": 17.553227599037466,
254
- "learning_rate": 9.89787624799672e-07,
255
- "logits/chosen": -0.38358816504478455,
256
- "logits/rejected": -0.32817578315734863,
257
- "logps/chosen": -0.7703452110290527,
258
- "logps/rejected": -0.8151019811630249,
259
- "loss": 0.6486,
260
- "rewards/accuracies": 0.737500011920929,
261
- "rewards/chosen": -0.10643620789051056,
262
- "rewards/margins": 0.10724379867315292,
263
- "rewards/rejected": -0.2136799842119217,
264
- "step": 80
265
- },
266
- {
267
- "epoch": 0.16966067864271456,
268
- "grad_norm": 13.431691131153842,
269
- "learning_rate": 9.859805002892731e-07,
270
- "logits/chosen": -0.3225359618663788,
271
- "logits/rejected": -0.3592807650566101,
272
- "logps/chosen": -0.7858971357345581,
273
- "logps/rejected": -0.8596378564834595,
274
- "loss": 0.6523,
275
- "rewards/accuracies": 0.625,
276
- "rewards/chosen": -0.1400977075099945,
277
- "rewards/margins": 0.16820211708545685,
278
- "rewards/rejected": -0.30829980969429016,
279
- "step": 85
280
- },
281
- {
282
- "epoch": 0.17964071856287425,
283
- "grad_norm": 27.30667277119807,
284
- "learning_rate": 9.81581283398829e-07,
285
- "logits/chosen": -0.34176284074783325,
286
- "logits/rejected": -0.34205198287963867,
287
- "logps/chosen": -0.7768250703811646,
288
- "logps/rejected": -0.820307731628418,
289
- "loss": 0.6451,
290
- "rewards/accuracies": 0.75,
291
- "rewards/chosen": -0.07238658517599106,
292
- "rewards/margins": 0.23312333226203918,
293
- "rewards/rejected": -0.30550986528396606,
294
- "step": 90
295
- },
296
- {
297
- "epoch": 0.18962075848303392,
298
- "grad_norm": 20.047219809230725,
299
- "learning_rate": 9.765953338964734e-07,
300
- "logits/chosen": -0.4065936207771301,
301
- "logits/rejected": -0.37638360261917114,
302
- "logps/chosen": -0.8715718388557434,
303
- "logps/rejected": -0.9510416984558105,
304
- "loss": 0.6336,
305
- "rewards/accuracies": 0.7749999761581421,
306
- "rewards/chosen": -0.1532100886106491,
307
- "rewards/margins": 0.2994120717048645,
308
- "rewards/rejected": -0.4526221752166748,
309
- "step": 95
310
- },
311
- {
312
- "epoch": 0.1996007984031936,
313
- "grad_norm": 36.23944490923282,
314
- "learning_rate": 9.710287263936483e-07,
315
- "logits/chosen": -0.43931150436401367,
316
- "logits/rejected": -0.44534721970558167,
317
- "logps/chosen": -0.8958739042282104,
318
- "logps/rejected": -0.9571765065193176,
319
- "loss": 0.6432,
320
- "rewards/accuracies": 0.75,
321
- "rewards/chosen": -0.27455487847328186,
322
- "rewards/margins": 0.25604015588760376,
323
- "rewards/rejected": -0.5305949449539185,
324
- "step": 100
325
- },
326
- {
327
- "epoch": 0.20958083832335328,
328
- "grad_norm": 25.004187021683798,
329
- "learning_rate": 9.648882429441256e-07,
330
- "logits/chosen": -0.44500723481178284,
331
- "logits/rejected": -0.4199863076210022,
332
- "logps/chosen": -0.7884619832038879,
333
- "logps/rejected": -0.7948769330978394,
334
- "loss": 0.6333,
335
- "rewards/accuracies": 0.699999988079071,
336
- "rewards/chosen": -0.23280362784862518,
337
- "rewards/margins": 0.13038918375968933,
338
- "rewards/rejected": -0.3631927967071533,
339
- "step": 105
340
- },
341
- {
342
- "epoch": 0.21956087824351297,
343
- "grad_norm": 15.650165706867742,
344
- "learning_rate": 9.581813647811197e-07,
345
- "logits/chosen": -0.47238340973854065,
346
- "logits/rejected": -0.46014589071273804,
347
- "logps/chosen": -0.7871894240379333,
348
- "logps/rejected": -0.8054457902908325,
349
- "loss": 0.6277,
350
- "rewards/accuracies": 0.6875,
351
- "rewards/chosen": -0.25777310132980347,
352
- "rewards/margins": 0.21288836002349854,
353
- "rewards/rejected": -0.470661461353302,
354
- "step": 110
355
- },
356
- {
357
- "epoch": 0.22954091816367264,
358
- "grad_norm": 18.380315632232364,
359
- "learning_rate": 9.509162632025569e-07,
360
- "logits/chosen": -0.5215901732444763,
361
- "logits/rejected": -0.5049930214881897,
362
- "logps/chosen": -0.9149976968765259,
363
- "logps/rejected": -0.9515384435653687,
364
- "loss": 0.6358,
365
- "rewards/accuracies": 0.6499999761581421,
366
- "rewards/chosen": -0.3461766839027405,
367
- "rewards/margins": 0.15195634961128235,
368
- "rewards/rejected": -0.49813300371170044,
369
- "step": 115
370
- },
371
- {
372
- "epoch": 0.23952095808383234,
373
- "grad_norm": 21.138881533254413,
374
- "learning_rate": 9.431017896156073e-07,
375
- "logits/chosen": -0.4705902636051178,
376
- "logits/rejected": -0.4593280255794525,
377
- "logps/chosen": -0.8069537281990051,
378
- "logps/rejected": -0.8675839304924011,
379
- "loss": 0.614,
380
- "rewards/accuracies": 0.625,
381
- "rewards/chosen": -0.29863637685775757,
382
- "rewards/margins": 0.31691476702690125,
383
- "rewards/rejected": -0.6155511140823364,
384
- "step": 120
385
- },
386
- {
387
- "epoch": 0.249500998003992,
388
- "grad_norm": 18.813175578776896,
389
- "learning_rate": 9.347474647526095e-07,
390
- "logits/chosen": -0.4284445345401764,
391
- "logits/rejected": -0.3746599555015564,
392
- "logps/chosen": -0.7812046408653259,
393
- "logps/rejected": -0.8142662048339844,
394
- "loss": 0.6011,
395
- "rewards/accuracies": 0.7749999761581421,
396
- "rewards/chosen": -0.3037291169166565,
397
- "rewards/margins": 0.3044063150882721,
398
- "rewards/rejected": -0.6081355214118958,
399
- "step": 125
400
- },
401
- {
402
- "epoch": 0.25948103792415167,
403
- "grad_norm": 17.070591448816156,
404
- "learning_rate": 9.258634670715237e-07,
405
- "logits/chosen": -0.5489827394485474,
406
- "logits/rejected": -0.5030359029769897,
407
- "logps/chosen": -0.7899664044380188,
408
- "logps/rejected": -0.8873406648635864,
409
- "loss": 0.6085,
410
- "rewards/accuracies": 0.762499988079071,
411
- "rewards/chosen": -0.3300507664680481,
412
- "rewards/margins": 0.2699635326862335,
413
- "rewards/rejected": -0.600014328956604,
414
- "step": 130
415
- },
416
- {
417
- "epoch": 0.2694610778443114,
418
- "grad_norm": 18.560449924688594,
419
- "learning_rate": 9.164606203550497e-07,
420
- "logits/chosen": -0.5027534365653992,
421
- "logits/rejected": -0.45569664239883423,
422
- "logps/chosen": -0.7548262476921082,
423
- "logps/rejected": -0.8240019083023071,
424
- "loss": 0.5993,
425
- "rewards/accuracies": 0.7875000238418579,
426
- "rewards/chosen": -0.33843767642974854,
427
- "rewards/margins": 0.27864545583724976,
428
- "rewards/rejected": -0.6170830726623535,
429
- "step": 135
430
- },
431
- {
432
- "epoch": 0.27944111776447106,
433
- "grad_norm": 15.456587593946033,
434
- "learning_rate": 9.065503805235137e-07,
435
- "logits/chosen": -0.5412222146987915,
436
- "logits/rejected": -0.5133959054946899,
437
- "logps/chosen": -0.8278282284736633,
438
- "logps/rejected": -0.8514043092727661,
439
- "loss": 0.6118,
440
- "rewards/accuracies": 0.7250000238418579,
441
- "rewards/chosen": -0.515911340713501,
442
- "rewards/margins": 0.23073478043079376,
443
- "rewards/rejected": -0.7466461658477783,
444
- "step": 140
445
- },
446
- {
447
- "epoch": 0.2894211576846307,
448
- "grad_norm": 13.75653652171432,
449
- "learning_rate": 8.961448216775953e-07,
450
- "logits/chosen": -0.531816303730011,
451
- "logits/rejected": -0.5432392358779907,
452
- "logps/chosen": -0.8441339731216431,
453
- "logps/rejected": -0.9052772521972656,
454
- "loss": 0.6047,
455
- "rewards/accuracies": 0.7250000238418579,
456
- "rewards/chosen": -0.6214492917060852,
457
- "rewards/margins": 0.40321844816207886,
458
- "rewards/rejected": -1.024667739868164,
459
- "step": 145
460
- },
461
- {
462
- "epoch": 0.2994011976047904,
463
- "grad_norm": 14.061323030297434,
464
- "learning_rate": 8.852566213878946e-07,
465
- "logits/chosen": -0.5842114686965942,
466
- "logits/rejected": -0.5797411203384399,
467
- "logps/chosen": -0.7897329926490784,
468
- "logps/rejected": -0.8609731793403625,
469
- "loss": 0.6038,
470
- "rewards/accuracies": 0.75,
471
- "rewards/chosen": -0.49532920122146606,
472
- "rewards/margins": 0.36390385031700134,
473
- "rewards/rejected": -0.859233021736145,
474
- "step": 150
475
- },
476
- {
477
- "epoch": 0.3093812375249501,
478
- "grad_norm": 12.749472652825297,
479
- "learning_rate": 8.73899045249266e-07,
480
- "logits/chosen": -0.6203972697257996,
481
- "logits/rejected": -0.6109431982040405,
482
- "logps/chosen": -0.8315925598144531,
483
- "logps/rejected": -0.9241877794265747,
484
- "loss": 0.5848,
485
- "rewards/accuracies": 0.737500011920929,
486
- "rewards/chosen": -0.6116100549697876,
487
- "rewards/margins": 0.5322220325469971,
488
- "rewards/rejected": -1.1438319683074951,
489
- "step": 155
490
- },
491
- {
492
- "epoch": 0.3193612774451098,
493
- "grad_norm": 18.453987933361336,
494
- "learning_rate": 8.620859307187338e-07,
495
- "logits/chosen": -0.5765129923820496,
496
- "logits/rejected": -0.5761350393295288,
497
- "logps/chosen": -0.8263224363327026,
498
- "logps/rejected": -0.8567667007446289,
499
- "loss": 0.5871,
500
- "rewards/accuracies": 0.6875,
501
- "rewards/chosen": -0.5846480131149292,
502
- "rewards/margins": 0.3195909559726715,
503
- "rewards/rejected": -0.9042388796806335,
504
- "step": 160
505
- },
506
- {
507
- "epoch": 0.32934131736526945,
508
- "grad_norm": 24.93482299590875,
509
- "learning_rate": 8.498316702566826e-07,
510
- "logits/chosen": -0.6133869886398315,
511
- "logits/rejected": -0.5582712292671204,
512
- "logps/chosen": -0.828209400177002,
513
- "logps/rejected": -0.893822193145752,
514
- "loss": 0.5702,
515
- "rewards/accuracies": 0.699999988079071,
516
- "rewards/chosen": -0.8109213709831238,
517
- "rewards/margins": 0.3340582847595215,
518
- "rewards/rejected": -1.14497971534729,
519
- "step": 165
520
- },
521
- {
522
- "epoch": 0.3393213572854291,
523
- "grad_norm": 14.606101127926557,
524
- "learning_rate": 8.371511937918617e-07,
525
- "logits/chosen": -0.6824047565460205,
526
- "logits/rejected": -0.664223313331604,
527
- "logps/chosen": -0.8030799031257629,
528
- "logps/rejected": -0.8865512013435364,
529
- "loss": 0.6006,
530
- "rewards/accuracies": 0.737500011920929,
531
- "rewards/chosen": -0.6996228694915771,
532
- "rewards/margins": 0.5749022960662842,
533
- "rewards/rejected": -1.2745250463485718,
534
- "step": 170
535
- },
536
- {
537
- "epoch": 0.34930139720558884,
538
- "grad_norm": 14.615105697665864,
539
- "learning_rate": 8.240599505315654e-07,
540
- "logits/chosen": -0.6872956156730652,
541
- "logits/rejected": -0.6839295625686646,
542
- "logps/chosen": -0.8706089854240417,
543
- "logps/rejected": -0.9267762899398804,
544
- "loss": 0.5737,
545
- "rewards/accuracies": 0.762499988079071,
546
- "rewards/chosen": -0.7884671688079834,
547
- "rewards/margins": 0.42132559418678284,
548
- "rewards/rejected": -1.2097927331924438,
549
- "step": 175
550
- },
551
- {
552
- "epoch": 0.3592814371257485,
553
- "grad_norm": 17.325663221044394,
554
- "learning_rate": 8.105738901391551e-07,
555
- "logits/chosen": -0.702431857585907,
556
- "logits/rejected": -0.6738135814666748,
557
- "logps/chosen": -0.8363839387893677,
558
- "logps/rejected": -0.8939155340194702,
559
- "loss": 0.5712,
560
- "rewards/accuracies": 0.7124999761581421,
561
- "rewards/chosen": -0.724533200263977,
562
- "rewards/margins": 0.3838126063346863,
563
- "rewards/rejected": -1.108345866203308,
564
- "step": 180
565
- },
566
- {
567
- "epoch": 0.36926147704590817,
568
- "grad_norm": 16.365607087163703,
569
- "learning_rate": 7.967094433018508e-07,
570
- "logits/chosen": -0.7076471447944641,
571
- "logits/rejected": -0.6945314407348633,
572
- "logps/chosen": -0.9004107713699341,
573
- "logps/rejected": -1.046083688735962,
574
- "loss": 0.5742,
575
- "rewards/accuracies": 0.7250000238418579,
576
- "rewards/chosen": -0.9093888401985168,
577
- "rewards/margins": 0.6952089071273804,
578
- "rewards/rejected": -1.6045976877212524,
579
- "step": 185
580
- },
581
- {
582
- "epoch": 0.37924151696606784,
583
- "grad_norm": 43.74908736902799,
584
- "learning_rate": 7.82483501712469e-07,
585
- "logits/chosen": -0.6856539845466614,
586
- "logits/rejected": -0.6615663766860962,
587
- "logps/chosen": -0.9109293222427368,
588
- "logps/rejected": -0.9081963300704956,
589
- "loss": 0.5967,
590
- "rewards/accuracies": 0.6499999761581421,
591
- "rewards/chosen": -0.9922858476638794,
592
- "rewards/margins": 0.4050213396549225,
593
- "rewards/rejected": -1.39730703830719,
594
- "step": 190
595
- },
596
- {
597
- "epoch": 0.38922155688622756,
598
- "grad_norm": 19.60550699714766,
599
- "learning_rate": 7.679133974894982e-07,
600
- "logits/chosen": -0.6713054180145264,
601
- "logits/rejected": -0.6843950748443604,
602
- "logps/chosen": -0.8581186532974243,
603
- "logps/rejected": -0.9895851016044617,
604
- "loss": 0.5531,
605
- "rewards/accuracies": 0.7875000238418579,
606
- "rewards/chosen": -0.9614213705062866,
607
- "rewards/margins": 0.7596980929374695,
608
- "rewards/rejected": -1.7211196422576904,
609
- "step": 195
610
- },
611
- {
612
- "epoch": 0.3992015968063872,
613
- "grad_norm": 15.996993544634163,
614
- "learning_rate": 7.530168820605818e-07,
615
- "logits/chosen": -0.6824347972869873,
616
- "logits/rejected": -0.6842206120491028,
617
- "logps/chosen": -0.9382361173629761,
618
- "logps/rejected": -0.9921613931655884,
619
- "loss": 0.5583,
620
- "rewards/accuracies": 0.762499988079071,
621
- "rewards/chosen": -1.1333320140838623,
622
- "rewards/margins": 0.5768089890480042,
623
- "rewards/rejected": -1.7101409435272217,
624
- "step": 200
625
- },
626
- {
627
- "epoch": 0.4091816367265469,
628
- "grad_norm": 15.558869167043554,
629
- "learning_rate": 7.378121045351377e-07,
630
- "logits/chosen": -0.6672806739807129,
631
- "logits/rejected": -0.6542935371398926,
632
- "logps/chosen": -0.8662320375442505,
633
- "logps/rejected": -0.9581985473632812,
634
- "loss": 0.5465,
635
- "rewards/accuracies": 0.7250000238418579,
636
- "rewards/chosen": -1.0950018167495728,
637
- "rewards/margins": 0.47961243987083435,
638
- "rewards/rejected": -1.57461416721344,
639
- "step": 205
640
- },
641
- {
642
- "epoch": 0.41916167664670656,
643
- "grad_norm": 28.887132861516903,
644
- "learning_rate": 7.223175895924637e-07,
645
- "logits/chosen": -0.690104603767395,
646
- "logits/rejected": -0.6761881709098816,
647
- "logps/chosen": -0.863226592540741,
648
- "logps/rejected": -0.9225906133651733,
649
- "loss": 0.5553,
650
- "rewards/accuracies": 0.824999988079071,
651
- "rewards/chosen": -1.0103793144226074,
652
- "rewards/margins": 0.6174372434616089,
653
- "rewards/rejected": -1.6278165578842163,
654
- "step": 210
655
- },
656
- {
657
- "epoch": 0.4291417165668663,
658
- "grad_norm": 15.081565074387589,
659
- "learning_rate": 7.065522149122709e-07,
660
- "logits/chosen": -0.7095133662223816,
661
- "logits/rejected": -0.6953072547912598,
662
- "logps/chosen": -0.9815180897712708,
663
- "logps/rejected": -1.0388023853302002,
664
- "loss": 0.5393,
665
- "rewards/accuracies": 0.75,
666
- "rewards/chosen": -1.4431049823760986,
667
- "rewards/margins": 0.5409745573997498,
668
- "rewards/rejected": -1.9840797185897827,
669
- "step": 215
670
- },
671
- {
672
- "epoch": 0.43912175648702595,
673
- "grad_norm": 27.859691103168426,
674
- "learning_rate": 6.905351881751371e-07,
675
- "logits/chosen": -0.7326300144195557,
676
- "logits/rejected": -0.7271891832351685,
677
- "logps/chosen": -0.886702835559845,
678
- "logps/rejected": -0.9949976205825806,
679
- "loss": 0.5524,
680
- "rewards/accuracies": 0.699999988079071,
681
- "rewards/chosen": -1.178215742111206,
682
- "rewards/margins": 0.5899935364723206,
683
- "rewards/rejected": -1.7682092189788818,
684
- "step": 220
685
- },
686
- {
687
- "epoch": 0.4491017964071856,
688
- "grad_norm": 20.049309145299137,
689
- "learning_rate": 6.742860236609076e-07,
690
- "logits/chosen": -0.7312344312667847,
691
- "logits/rejected": -0.7114800810813904,
692
- "logps/chosen": -0.9219420552253723,
693
- "logps/rejected": -0.9769749641418457,
694
- "loss": 0.5397,
695
- "rewards/accuracies": 0.737500011920929,
696
- "rewards/chosen": -1.2608001232147217,
697
- "rewards/margins": 0.6559053659439087,
698
- "rewards/rejected": -1.9167054891586304,
699
- "step": 225
700
- },
701
- {
702
- "epoch": 0.4590818363273453,
703
- "grad_norm": 21.600145701212536,
704
- "learning_rate": 6.578245184735512e-07,
705
- "logits/chosen": -0.7300236225128174,
706
- "logits/rejected": -0.7002195119857788,
707
- "logps/chosen": -0.9393006563186646,
708
- "logps/rejected": -0.9583779573440552,
709
- "loss": 0.5285,
710
- "rewards/accuracies": 0.675000011920929,
711
- "rewards/chosen": -1.3502466678619385,
712
- "rewards/margins": 0.5810690522193909,
713
- "rewards/rejected": -1.9313156604766846,
714
- "step": 230
715
- },
716
- {
717
- "epoch": 0.469061876247505,
718
- "grad_norm": 15.102196954163185,
719
- "learning_rate": 6.411707284214383e-07,
720
- "logits/chosen": -0.7769094705581665,
721
- "logits/rejected": -0.784206211566925,
722
- "logps/chosen": -0.937769889831543,
723
- "logps/rejected": -1.0256552696228027,
724
- "loss": 0.5481,
725
- "rewards/accuracies": 0.699999988079071,
726
- "rewards/chosen": -1.3135572671890259,
727
- "rewards/margins": 0.6154013872146606,
728
- "rewards/rejected": -1.928958535194397,
729
- "step": 235
730
- },
731
- {
732
- "epoch": 0.47904191616766467,
733
- "grad_norm": 22.589088897581874,
734
- "learning_rate": 6.243449435824276e-07,
735
- "logits/chosen": -0.7115018367767334,
736
- "logits/rejected": -0.6854550242424011,
737
- "logps/chosen": -0.8389812707901001,
738
- "logps/rejected": -0.9537237882614136,
739
- "loss": 0.5204,
740
- "rewards/accuracies": 0.75,
741
- "rewards/chosen": -1.3218843936920166,
742
- "rewards/margins": 0.7666546106338501,
743
- "rewards/rejected": -2.088538885116577,
744
- "step": 240
745
- },
746
- {
747
- "epoch": 0.48902195608782434,
748
- "grad_norm": 17.05950897502288,
749
- "learning_rate": 6.073676635835316e-07,
750
- "logits/chosen": -0.7151886820793152,
751
- "logits/rejected": -0.7269682884216309,
752
- "logps/chosen": -0.8353130221366882,
753
- "logps/rejected": -1.045952320098877,
754
- "loss": 0.5198,
755
- "rewards/accuracies": 0.7875000238418579,
756
- "rewards/chosen": -1.2784829139709473,
757
- "rewards/margins": 1.028085470199585,
758
- "rewards/rejected": -2.3065686225891113,
759
- "step": 245
760
- },
761
- {
762
- "epoch": 0.499001996007984,
763
- "grad_norm": 22.204919215887884,
764
- "learning_rate": 5.9025957262528e-07,
765
- "logits/chosen": -0.7427772879600525,
766
- "logits/rejected": -0.7114007472991943,
767
- "logps/chosen": -0.8959082365036011,
768
- "logps/rejected": -1.015169382095337,
769
- "loss": 0.5047,
770
- "rewards/accuracies": 0.8500000238418579,
771
- "rewards/chosen": -1.4343379735946655,
772
- "rewards/margins": 0.8260199427604675,
773
- "rewards/rejected": -2.260357618331909,
774
- "step": 250
775
- },
776
- {
777
- "epoch": 0.5089820359281437,
778
- "grad_norm": 29.103918291256367,
779
- "learning_rate": 5.730415142812058e-07,
780
- "logits/chosen": -0.7786175608634949,
781
- "logits/rejected": -0.7618826627731323,
782
- "logps/chosen": -0.9678372144699097,
783
- "logps/rejected": -1.090914011001587,
784
- "loss": 0.5111,
785
- "rewards/accuracies": 0.7749999761581421,
786
- "rewards/chosen": -1.7259149551391602,
787
- "rewards/margins": 0.8811995387077332,
788
- "rewards/rejected": -2.607114315032959,
789
- "step": 255
790
- },
791
- {
792
- "epoch": 0.5189620758483033,
793
- "grad_norm": 17.21881262945149,
794
- "learning_rate": 5.557344661031627e-07,
795
- "logits/chosen": -0.7823535799980164,
796
- "logits/rejected": -0.7699525356292725,
797
- "logps/chosen": -0.9408347010612488,
798
- "logps/rejected": -1.0648287534713745,
799
- "loss": 0.5406,
800
- "rewards/accuracies": 0.75,
801
- "rewards/chosen": -1.5965430736541748,
802
- "rewards/margins": 1.0114134550094604,
803
- "rewards/rejected": -2.6079564094543457,
804
- "step": 260
805
- },
806
- {
807
- "epoch": 0.5289421157684631,
808
- "grad_norm": 15.711105031107639,
809
- "learning_rate": 5.383595140634093e-07,
810
- "logits/chosen": -0.7325602769851685,
811
- "logits/rejected": -0.7186430096626282,
812
- "logps/chosen": -0.943433403968811,
813
- "logps/rejected": -1.0095100402832031,
814
- "loss": 0.5276,
815
- "rewards/accuracies": 0.7749999761581421,
816
- "rewards/chosen": -1.7036699056625366,
817
- "rewards/margins": 0.8646278381347656,
818
- "rewards/rejected": -2.5682976245880127,
819
- "step": 265
820
- },
821
- {
822
- "epoch": 0.5389221556886228,
823
- "grad_norm": 13.20029610671174,
824
- "learning_rate": 5.209378268645997e-07,
825
- "logits/chosen": -0.7176939249038696,
826
- "logits/rejected": -0.7001760601997375,
827
- "logps/chosen": -0.8967610597610474,
828
- "logps/rejected": -1.0068597793579102,
829
- "loss": 0.5582,
830
- "rewards/accuracies": 0.800000011920929,
831
- "rewards/chosen": -1.737532377243042,
832
- "rewards/margins": 0.5873457193374634,
833
- "rewards/rejected": -2.324878215789795,
834
- "step": 270
835
- },
836
- {
837
- "epoch": 0.5489021956087824,
838
- "grad_norm": 20.138444852189,
839
- "learning_rate": 5.034906301489807e-07,
840
- "logits/chosen": -0.7280402779579163,
841
- "logits/rejected": -0.717974066734314,
842
- "logps/chosen": -0.9318618774414062,
843
- "logps/rejected": -1.0344207286834717,
844
- "loss": 0.5238,
845
- "rewards/accuracies": 0.75,
846
- "rewards/chosen": -1.814552903175354,
847
- "rewards/margins": 0.8326984643936157,
848
- "rewards/rejected": -2.647251605987549,
849
- "step": 275
850
- },
851
- {
852
- "epoch": 0.5588822355289421,
853
- "grad_norm": 37.63172145615237,
854
- "learning_rate": 4.860391806382156e-07,
855
- "logits/chosen": -0.8018016815185547,
856
- "logits/rejected": -0.8149466514587402,
857
- "logps/chosen": -0.9740177392959595,
858
- "logps/rejected": -1.010301113128662,
859
- "loss": 0.5217,
860
- "rewards/accuracies": 0.8125,
861
- "rewards/chosen": -1.8418344259262085,
862
- "rewards/margins": 0.7042922973632812,
863
- "rewards/rejected": -2.5461268424987793,
864
- "step": 280
865
- },
866
- {
867
- "epoch": 0.5688622754491018,
868
- "grad_norm": 20.60319293019308,
869
- "learning_rate": 4.686047402353433e-07,
870
- "logits/chosen": -0.7303526997566223,
871
- "logits/rejected": -0.7184230089187622,
872
- "logps/chosen": -0.9589718580245972,
873
- "logps/rejected": -1.045867681503296,
874
- "loss": 0.523,
875
- "rewards/accuracies": 0.8500000238418579,
876
- "rewards/chosen": -1.8164526224136353,
877
- "rewards/margins": 0.7647993564605713,
878
- "rewards/rejected": -2.581251621246338,
879
- "step": 285
880
- },
881
- {
882
- "epoch": 0.5788423153692615,
883
- "grad_norm": 31.323512708787323,
884
- "learning_rate": 4.512085501204253e-07,
885
- "logits/chosen": -0.7896037101745605,
886
- "logits/rejected": -0.7570759057998657,
887
- "logps/chosen": -0.9357205629348755,
888
- "logps/rejected": -1.0159623622894287,
889
- "loss": 0.5636,
890
- "rewards/accuracies": 0.737500011920929,
891
- "rewards/chosen": -1.7307599782943726,
892
- "rewards/margins": 0.7415143251419067,
893
- "rewards/rejected": -2.4722743034362793,
894
- "step": 290
895
- },
896
- {
897
- "epoch": 0.5888223552894212,
898
- "grad_norm": 16.113569930607966,
899
- "learning_rate": 4.338718048714387e-07,
900
- "logits/chosen": -0.7748357057571411,
901
- "logits/rejected": -0.7527580857276917,
902
- "logps/chosen": -0.9312615394592285,
903
- "logps/rejected": -0.9944890141487122,
904
- "loss": 0.5261,
905
- "rewards/accuracies": 0.862500011920929,
906
- "rewards/chosen": -1.7469574213027954,
907
- "rewards/margins": 0.5817708969116211,
908
- "rewards/rejected": -2.328728675842285,
909
- "step": 295
910
- },
911
- {
912
- "epoch": 0.5988023952095808,
913
- "grad_norm": 28.923235923466983,
914
- "learning_rate": 4.166156266419489e-07,
915
- "logits/chosen": -0.7699166536331177,
916
- "logits/rejected": -0.7807640433311462,
917
- "logps/chosen": -0.8693283200263977,
918
- "logps/rejected": -1.0598359107971191,
919
- "loss": 0.5223,
920
- "rewards/accuracies": 0.8125,
921
- "rewards/chosen": -1.6791868209838867,
922
- "rewards/margins": 1.093235969543457,
923
- "rewards/rejected": -2.7724227905273438,
924
- "step": 300
925
- },
926
- {
927
- "epoch": 0.6087824351297405,
928
- "grad_norm": 43.24348090401382,
929
- "learning_rate": 3.9946103942701775e-07,
930
- "logits/chosen": -0.713184118270874,
931
- "logits/rejected": -0.7183653116226196,
932
- "logps/chosen": -0.877181887626648,
933
- "logps/rejected": -1.0504357814788818,
934
- "loss": 0.5006,
935
- "rewards/accuracies": 0.7875000238418579,
936
- "rewards/chosen": -1.6295019388198853,
937
- "rewards/margins": 0.8480024337768555,
938
- "rewards/rejected": -2.477504253387451,
939
- "step": 305
940
- },
941
- {
942
- "epoch": 0.6187624750499002,
943
- "grad_norm": 18.17532898488996,
944
- "learning_rate": 3.8242894344870495e-07,
945
- "logits/chosen": -0.7871264815330505,
946
- "logits/rejected": -0.7895926237106323,
947
- "logps/chosen": -0.9971866607666016,
948
- "logps/rejected": -1.110032558441162,
949
- "loss": 0.5224,
950
- "rewards/accuracies": 0.7250000238418579,
951
- "rewards/chosen": -2.071504592895508,
952
- "rewards/margins": 0.8322666883468628,
953
- "rewards/rejected": -2.90377140045166,
954
- "step": 310
955
- },
956
- {
957
- "epoch": 0.6287425149700598,
958
- "grad_norm": 22.503824878122693,
959
- "learning_rate": 3.6554008969236715e-07,
960
- "logits/chosen": -0.7987596392631531,
961
- "logits/rejected": -0.8034515380859375,
962
- "logps/chosen": -0.9355812072753906,
963
- "logps/rejected": -1.101746916770935,
964
- "loss": 0.4993,
965
- "rewards/accuracies": 0.8374999761581421,
966
- "rewards/chosen": -1.8815546035766602,
967
- "rewards/margins": 1.0128275156021118,
968
- "rewards/rejected": -2.8943822383880615,
969
- "step": 315
970
- },
971
- {
972
- "epoch": 0.6387225548902196,
973
- "grad_norm": 18.4314421544152,
974
- "learning_rate": 3.488150546247778e-07,
975
- "logits/chosen": -0.7646247148513794,
976
- "logits/rejected": -0.7596274018287659,
977
- "logps/chosen": -0.9071539640426636,
978
- "logps/rejected": -0.9974797964096069,
979
- "loss": 0.5067,
980
- "rewards/accuracies": 0.824999988079071,
981
- "rewards/chosen": -1.761649489402771,
982
- "rewards/margins": 0.7561764717102051,
983
- "rewards/rejected": -2.5178260803222656,
984
- "step": 320
985
- },
986
- {
987
- "epoch": 0.6487025948103793,
988
- "grad_norm": 13.46220126387281,
989
- "learning_rate": 3.3227421512487255e-07,
990
- "logits/chosen": -0.7663795948028564,
991
- "logits/rejected": -0.7689803838729858,
992
- "logps/chosen": -0.9501086473464966,
993
- "logps/rejected": -1.0315439701080322,
994
- "loss": 0.5057,
995
- "rewards/accuracies": 0.7749999761581421,
996
- "rewards/chosen": -1.9115231037139893,
997
- "rewards/margins": 0.7186993360519409,
998
- "rewards/rejected": -2.630222797393799,
999
- "step": 325
1000
- },
1001
- {
1002
- "epoch": 0.6586826347305389,
1003
- "grad_norm": 43.00875479745304,
1004
- "learning_rate": 3.15937723657661e-07,
1005
- "logits/chosen": -0.8140354156494141,
1006
- "logits/rejected": -0.8173881769180298,
1007
- "logps/chosen": -0.9712478518486023,
1008
- "logps/rejected": -1.0843861103057861,
1009
- "loss": 0.5873,
1010
- "rewards/accuracies": 0.737500011920929,
1011
- "rewards/chosen": -2.222109794616699,
1012
- "rewards/margins": 0.6900671720504761,
1013
- "rewards/rejected": -2.9121768474578857,
1014
- "step": 330
1015
- },
1016
- {
1017
- "epoch": 0.6686626746506986,
1018
- "grad_norm": 32.97485130184056,
1019
- "learning_rate": 2.9982548372155256e-07,
1020
- "logits/chosen": -0.7841066718101501,
1021
- "logits/rejected": -0.801399827003479,
1022
- "logps/chosen": -0.9092627763748169,
1023
- "logps/rejected": -1.0644382238388062,
1024
- "loss": 0.5121,
1025
- "rewards/accuracies": 0.8374999761581421,
1026
- "rewards/chosen": -1.8617267608642578,
1027
- "rewards/margins": 1.0364662408828735,
1028
- "rewards/rejected": -2.898192882537842,
1029
- "step": 335
1030
- },
1031
- {
1032
- "epoch": 0.6786427145708582,
1033
- "grad_norm": 50.76904692556944,
1034
- "learning_rate": 2.8395712559900874e-07,
1035
- "logits/chosen": -0.8244466781616211,
1036
- "logits/rejected": -0.8209226727485657,
1037
- "logps/chosen": -1.0543348789215088,
1038
- "logps/rejected": -1.1451005935668945,
1039
- "loss": 0.5205,
1040
- "rewards/accuracies": 0.75,
1041
- "rewards/chosen": -2.412365674972534,
1042
- "rewards/margins": 0.8550116419792175,
1043
- "rewards/rejected": -3.2673773765563965,
1044
- "step": 340
1045
- },
1046
- {
1047
- "epoch": 0.688622754491018,
1048
- "grad_norm": 27.915007397999354,
1049
- "learning_rate": 2.683519824400692e-07,
1050
- "logits/chosen": -0.8374012112617493,
1051
- "logits/rejected": -0.8106748461723328,
1052
- "logps/chosen": -0.9777308702468872,
1053
- "logps/rejected": -1.1284379959106445,
1054
- "loss": 0.5234,
1055
- "rewards/accuracies": 0.7124999761581421,
1056
- "rewards/chosen": -2.029723882675171,
1057
- "rewards/margins": 1.0547873973846436,
1058
- "rewards/rejected": -3.0845110416412354,
1059
- "step": 345
1060
- },
1061
- {
1062
- "epoch": 0.6986027944111777,
1063
- "grad_norm": 22.882772557486724,
1064
- "learning_rate": 2.530290667078846e-07,
1065
- "logits/chosen": -0.8036109805107117,
1066
- "logits/rejected": -0.7900259494781494,
1067
- "logps/chosen": -0.9737772941589355,
1068
- "logps/rejected": -1.085715651512146,
1069
- "loss": 0.5015,
1070
- "rewards/accuracies": 0.7124999761581421,
1071
- "rewards/chosen": -2.1590065956115723,
1072
- "rewards/margins": 0.7729440927505493,
1073
- "rewards/rejected": -2.931950807571411,
1074
- "step": 350
1075
- },
1076
- {
1077
- "epoch": 0.7085828343313373,
1078
- "grad_norm": 18.205286801888395,
1079
- "learning_rate": 2.380070470149605e-07,
1080
- "logits/chosen": -0.8169373273849487,
1081
- "logits/rejected": -0.8327714204788208,
1082
- "logps/chosen": -0.9806594848632812,
1083
- "logps/rejected": -1.1701897382736206,
1084
- "loss": 0.496,
1085
- "rewards/accuracies": 0.75,
1086
- "rewards/chosen": -2.151632308959961,
1087
- "rewards/margins": 1.1866142749786377,
1088
- "rewards/rejected": -3.3382461071014404,
1089
- "step": 355
1090
- },
1091
- {
1092
- "epoch": 0.718562874251497,
1093
- "grad_norm": 41.98073419560719,
1094
- "learning_rate": 2.23304225378328e-07,
1095
- "logits/chosen": -0.8116399049758911,
1096
- "logits/rejected": -0.7772837281227112,
1097
- "logps/chosen": -1.0802838802337646,
1098
- "logps/rejected": -1.1937668323516846,
1099
- "loss": 0.5124,
1100
- "rewards/accuracies": 0.800000011920929,
1101
- "rewards/chosen": -2.334536075592041,
1102
- "rewards/margins": 1.168308138847351,
1103
- "rewards/rejected": -3.5028443336486816,
1104
- "step": 360
1105
- },
1106
- {
1107
- "epoch": 0.7285429141716567,
1108
- "grad_norm": 27.12760666687319,
1109
- "learning_rate": 2.0893851492135532e-07,
1110
- "logits/chosen": -0.8288819193840027,
1111
- "logits/rejected": -0.8238687515258789,
1112
- "logps/chosen": -0.9590953588485718,
1113
- "logps/rejected": -1.1089394092559814,
1114
- "loss": 0.5151,
1115
- "rewards/accuracies": 0.762499988079071,
1116
- "rewards/chosen": -1.967963457107544,
1117
- "rewards/margins": 1.0413198471069336,
1118
- "rewards/rejected": -3.0092828273773193,
1119
- "step": 365
1120
- },
1121
- {
1122
- "epoch": 0.7385229540918163,
1123
- "grad_norm": 28.594288392858182,
1124
- "learning_rate": 1.9492741804936618e-07,
1125
- "logits/chosen": -0.8266215324401855,
1126
- "logits/rejected": -0.8546527028083801,
1127
- "logps/chosen": -1.0014851093292236,
1128
- "logps/rejected": -1.0811948776245117,
1129
- "loss": 0.5134,
1130
- "rewards/accuracies": 0.737500011920929,
1131
- "rewards/chosen": -2.126549482345581,
1132
- "rewards/margins": 0.8130427598953247,
1133
- "rewards/rejected": -2.939592123031616,
1134
- "step": 370
1135
- },
1136
- {
1137
- "epoch": 0.7485029940119761,
1138
- "grad_norm": 16.69085619906614,
1139
- "learning_rate": 1.812880051256551e-07,
1140
- "logits/chosen": -0.8046598434448242,
1141
- "logits/rejected": -0.8114801645278931,
1142
- "logps/chosen": -0.9752721786499023,
1143
- "logps/rejected": -1.0586512088775635,
1144
- "loss": 0.4999,
1145
- "rewards/accuracies": 0.6499999761581421,
1146
- "rewards/chosen": -2.1684842109680176,
1147
- "rewards/margins": 0.6114571690559387,
1148
- "rewards/rejected": -2.7799415588378906,
1149
- "step": 375
1150
- },
1151
- {
1152
- "epoch": 0.7584830339321357,
1153
- "grad_norm": 15.627879784878328,
1154
- "learning_rate": 1.6803689367387918e-07,
1155
- "logits/chosen": -0.823495090007782,
1156
- "logits/rejected": -0.7943788170814514,
1157
- "logps/chosen": -0.8508175015449524,
1158
- "logps/rejected": -0.9670391082763672,
1159
- "loss": 0.5187,
1160
- "rewards/accuracies": 0.6625000238418579,
1161
- "rewards/chosen": -1.9837039709091187,
1162
- "rewards/margins": 0.5095661878585815,
1163
- "rewards/rejected": -2.4932703971862793,
1164
- "step": 380
1165
- },
1166
- {
1167
- "epoch": 0.7684630738522954,
1168
- "grad_norm": 14.609956803422161,
1169
- "learning_rate": 1.551902281321651e-07,
1170
- "logits/chosen": -0.8435534238815308,
1171
- "logits/rejected": -0.8091138601303101,
1172
- "logps/chosen": -0.998641848564148,
1173
- "logps/rejected": -1.133357286453247,
1174
- "loss": 0.4954,
1175
- "rewards/accuracies": 0.887499988079071,
1176
- "rewards/chosen": -1.9779011011123657,
1177
- "rewards/margins": 1.3220001459121704,
1178
- "rewards/rejected": -3.2999014854431152,
1179
- "step": 385
1180
- },
1181
- {
1182
- "epoch": 0.7784431137724551,
1183
- "grad_norm": 27.689053757448693,
1184
- "learning_rate": 1.4276366018359842e-07,
1185
- "logits/chosen": -0.8398737907409668,
1186
- "logits/rejected": -0.811150848865509,
1187
- "logps/chosen": -0.9453309178352356,
1188
- "logps/rejected": -1.0934703350067139,
1189
- "loss": 0.5225,
1190
- "rewards/accuracies": 0.762499988079071,
1191
- "rewards/chosen": -2.0561106204986572,
1192
- "rewards/margins": 1.0419596433639526,
1193
- "rewards/rejected": -3.0980706214904785,
1194
- "step": 390
1195
- },
1196
- {
1197
- "epoch": 0.7884231536926147,
1198
- "grad_norm": 37.02718002584944,
1199
- "learning_rate": 1.3077232968705805e-07,
1200
- "logits/chosen": -0.8516527414321899,
1201
- "logits/rejected": -0.8527445793151855,
1202
- "logps/chosen": -0.969285786151886,
1203
- "logps/rejected": -1.0676743984222412,
1204
- "loss": 0.4928,
1205
- "rewards/accuracies": 0.7250000238418579,
1206
- "rewards/chosen": -1.9902063608169556,
1207
- "rewards/margins": 0.7401792407035828,
1208
- "rewards/rejected": -2.7303855419158936,
1209
- "step": 395
1210
- },
1211
- {
1212
- "epoch": 0.7984031936127745,
1213
- "grad_norm": 14.525059669424861,
1214
- "learning_rate": 1.192308462316317e-07,
1215
- "logits/chosen": -0.7803691625595093,
1216
- "logits/rejected": -0.7951369285583496,
1217
- "logps/chosen": -0.9650856852531433,
1218
- "logps/rejected": -1.1078399419784546,
1219
- "loss": 0.4959,
1220
- "rewards/accuracies": 0.824999988079071,
1221
- "rewards/chosen": -2.048185110092163,
1222
- "rewards/margins": 0.8834241628646851,
1223
- "rewards/rejected": -2.9316093921661377,
1224
- "step": 400
1225
- }
1226
- ],
1227
- "logging_steps": 5,
1228
- "max_steps": 501,
1229
- "num_input_tokens_seen": 0,
1230
- "num_train_epochs": 1,
1231
- "save_steps": 101,
1232
- "stateful_callbacks": {
1233
- "TrainerControl": {
1234
- "args": {
1235
- "should_epoch_stop": false,
1236
- "should_evaluate": false,
1237
- "should_log": false,
1238
- "should_save": true,
1239
- "should_training_stop": false
1240
- },
1241
- "attributes": {}
1242
- }
1243
- },
1244
- "total_flos": 0.0,
1245
- "train_batch_size": 4,
1246
- "trial_name": null,
1247
- "trial_params": null
1248
- }