ellabettison commited on
Commit
528973a
·
verified ·
1 Parent(s): 91b7878

Training in progress, epoch 1

Browse files
all_results.json CHANGED
@@ -1,4 +1,8 @@
1
  {
2
- "eval_accuracy": 0.24349442379182157,
3
- "eval_loss": 0.034249916672706604
 
 
 
 
4
  }
 
1
  {
2
+ "epoch": 20.0,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.44298483904670266,
5
+ "train_runtime": 986.129,
6
+ "train_samples_per_second": 21.843,
7
+ "train_steps_per_second": 0.69
8
  }
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "openai/clip-vit-base-patch32",
3
  "architectures": [
4
  "CLIPModel"
5
  ],
 
1
  {
2
+ "_name_or_path": "ellabettison/logo-matching-base",
3
  "architectures": [
4
  "CLIPModel"
5
  ],
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cbedbdc865506373a1da560e00b99bc692dfc892a99cfe51f48ef95edc641528
3
  size 605156676
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab4af9aa03da49868df3e4c2c94b297aff45455cfdd13e28107e8a8bc4eec710
3
  size 605156676
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 10.0,
3
- "total_flos": 5.916629591779738e+17,
4
- "train_loss": 0.006165030080468413,
5
- "train_runtime": 454.1808,
6
- "train_samples_per_second": 16.469,
7
- "train_steps_per_second": 1.035
8
  }
 
1
  {
2
+ "epoch": 20.0,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.44298483904670266,
5
+ "train_runtime": 986.129,
6
+ "train_samples_per_second": 21.843,
7
+ "train_steps_per_second": 0.69
8
  }
trainer_state.json CHANGED
@@ -1,446 +1,567 @@
1
  {
2
- "best_metric": 0.04811817407608032,
3
- "best_model_checkpoint": "./logo-matching-base/checkpoint-47",
4
- "epoch": 10.0,
5
  "eval_steps": 500,
6
- "global_step": 470,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
- {
12
- "epoch": 0.2127659574468085,
13
- "grad_norm": 0.07344582676887512,
14
- "learning_rate": 0.00019574468085106384,
15
- "loss": 0.0184,
16
- "step": 10
17
- },
18
- {
19
- "epoch": 0.425531914893617,
20
- "grad_norm": 0.07934936881065369,
21
- "learning_rate": 0.00019148936170212768,
22
- "loss": 0.0158,
23
- "step": 20
24
- },
25
- {
26
- "epoch": 0.6382978723404256,
27
- "grad_norm": 0.06698207557201385,
28
- "learning_rate": 0.0001872340425531915,
29
- "loss": 0.0147,
30
- "step": 30
31
- },
32
- {
33
- "epoch": 0.851063829787234,
34
- "grad_norm": 0.08733490109443665,
35
- "learning_rate": 0.00018297872340425532,
36
- "loss": 0.014,
37
- "step": 40
38
- },
39
  {
40
  "epoch": 1.0,
41
- "eval_accuracy": 0.28807339449541286,
42
- "eval_loss": 0.04811817407608032,
43
- "eval_runtime": 6.3598,
44
- "eval_samples_per_second": 85.695,
45
- "eval_steps_per_second": 10.849,
46
- "step": 47
47
- },
48
- {
49
- "epoch": 1.0638297872340425,
50
- "grad_norm": 0.062165793031454086,
51
- "learning_rate": 0.00017872340425531915,
52
- "loss": 0.0126,
53
- "step": 50
54
- },
55
- {
56
- "epoch": 1.2765957446808511,
57
- "grad_norm": 0.07251156866550446,
58
- "learning_rate": 0.00017446808510638298,
59
- "loss": 0.0128,
60
- "step": 60
61
  },
62
  {
63
- "epoch": 1.4893617021276595,
64
- "grad_norm": 0.0481877438724041,
65
- "learning_rate": 0.00017021276595744682,
66
- "loss": 0.0126,
67
- "step": 70
68
- },
69
- {
70
- "epoch": 1.702127659574468,
71
- "grad_norm": 0.07360873371362686,
72
- "learning_rate": 0.00016595744680851065,
73
- "loss": 0.0121,
74
- "step": 80
75
- },
76
- {
77
- "epoch": 1.9148936170212765,
78
- "grad_norm": 0.058246735483407974,
79
- "learning_rate": 0.00016170212765957446,
80
- "loss": 0.0127,
81
- "step": 90
82
  },
83
  {
84
  "epoch": 2.0,
85
- "eval_accuracy": 0.12844036697247707,
86
- "eval_loss": 0.054430264979600906,
87
- "eval_runtime": 6.5223,
88
- "eval_samples_per_second": 83.559,
89
- "eval_steps_per_second": 10.579,
90
- "step": 94
91
- },
92
- {
93
- "epoch": 2.127659574468085,
94
- "grad_norm": 0.06463072448968887,
95
- "learning_rate": 0.00015744680851063832,
96
- "loss": 0.0113,
97
- "step": 100
98
- },
99
- {
100
- "epoch": 2.3404255319148937,
101
- "grad_norm": 0.055768080055713654,
102
- "learning_rate": 0.00015319148936170213,
103
- "loss": 0.0099,
104
- "step": 110
105
- },
106
- {
107
- "epoch": 2.5531914893617023,
108
- "grad_norm": 0.08255070447921753,
109
- "learning_rate": 0.00014893617021276596,
110
- "loss": 0.0105,
111
- "step": 120
112
  },
113
  {
114
- "epoch": 2.7659574468085104,
115
- "grad_norm": 0.07059154659509659,
116
- "learning_rate": 0.0001446808510638298,
117
- "loss": 0.0092,
118
- "step": 130
119
- },
120
- {
121
- "epoch": 2.978723404255319,
122
- "grad_norm": 0.0670301541686058,
123
- "learning_rate": 0.00014042553191489363,
124
- "loss": 0.0097,
125
- "step": 140
 
 
 
 
 
 
126
  },
127
  {
128
  "epoch": 3.0,
129
- "eval_accuracy": 0.13394495412844037,
130
- "eval_loss": 0.056532666087150574,
131
- "eval_runtime": 6.5935,
132
- "eval_samples_per_second": 82.657,
133
- "eval_steps_per_second": 10.465,
134
- "step": 141
135
- },
136
- {
137
- "epoch": 3.1914893617021276,
138
- "grad_norm": 0.05259150639176369,
139
- "learning_rate": 0.00013617021276595746,
140
- "loss": 0.0094,
141
- "step": 150
142
- },
143
- {
144
- "epoch": 3.404255319148936,
145
- "grad_norm": 0.08173543959856033,
146
- "learning_rate": 0.00013191489361702127,
147
- "loss": 0.0096,
148
- "step": 160
149
- },
150
- {
151
- "epoch": 3.617021276595745,
152
- "grad_norm": 0.04590131714940071,
153
- "learning_rate": 0.00012765957446808513,
154
- "loss": 0.0071,
155
- "step": 170
156
  },
157
  {
158
- "epoch": 3.829787234042553,
159
- "grad_norm": 0.03957865759730339,
160
- "learning_rate": 0.00012340425531914893,
161
- "loss": 0.0062,
162
- "step": 180
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  },
164
  {
165
  "epoch": 4.0,
166
- "eval_accuracy": 0.1669724770642202,
167
- "eval_loss": 0.0624094121158123,
168
- "eval_runtime": 6.6046,
169
- "eval_samples_per_second": 82.518,
170
- "eval_steps_per_second": 10.447,
171
- "step": 188
172
  },
173
  {
174
- "epoch": 4.042553191489362,
175
- "grad_norm": 0.07129650563001633,
176
- "learning_rate": 0.00011914893617021277,
177
- "loss": 0.0056,
178
- "step": 190
179
- },
180
- {
181
- "epoch": 4.25531914893617,
182
- "grad_norm": 0.056660715490579605,
183
- "learning_rate": 0.00011489361702127661,
184
- "loss": 0.0061,
185
- "step": 200
186
- },
187
- {
188
- "epoch": 4.468085106382979,
189
- "grad_norm": 0.05685529112815857,
190
- "learning_rate": 0.00011063829787234043,
191
- "loss": 0.005,
192
- "step": 210
193
- },
194
- {
195
- "epoch": 4.680851063829787,
196
- "grad_norm": 0.03731105104088783,
197
- "learning_rate": 0.00010638297872340425,
198
- "loss": 0.006,
199
- "step": 220
200
- },
201
- {
202
- "epoch": 4.8936170212765955,
203
- "grad_norm": 0.039578877389431,
204
- "learning_rate": 0.00010212765957446809,
205
- "loss": 0.0051,
206
- "step": 230
207
  },
208
  {
209
  "epoch": 5.0,
210
- "eval_accuracy": 0.23669724770642203,
211
- "eval_loss": 0.059408094733953476,
212
- "eval_runtime": 6.822,
213
- "eval_samples_per_second": 79.889,
214
- "eval_steps_per_second": 10.114,
215
- "step": 235
216
- },
217
- {
218
- "epoch": 5.1063829787234045,
219
- "grad_norm": 0.05410230532288551,
220
- "learning_rate": 9.787234042553192e-05,
221
- "loss": 0.0044,
222
- "step": 240
223
- },
224
- {
225
- "epoch": 5.319148936170213,
226
- "grad_norm": 0.03862292692065239,
227
- "learning_rate": 9.361702127659576e-05,
228
- "loss": 0.0037,
229
- "step": 250
230
- },
231
- {
232
- "epoch": 5.531914893617021,
233
- "grad_norm": 0.06473053991794586,
234
- "learning_rate": 8.936170212765958e-05,
235
- "loss": 0.0046,
236
- "step": 260
237
- },
238
- {
239
- "epoch": 5.74468085106383,
240
- "grad_norm": 0.03953048214316368,
241
- "learning_rate": 8.510638297872341e-05,
242
- "loss": 0.0039,
243
- "step": 270
244
  },
245
  {
246
- "epoch": 5.957446808510638,
247
- "grad_norm": 0.050171270966529846,
248
- "learning_rate": 8.085106382978723e-05,
249
- "loss": 0.0037,
250
- "step": 280
 
 
 
 
 
 
 
 
 
 
 
 
 
251
  },
252
  {
253
  "epoch": 6.0,
254
- "eval_accuracy": 0.20917431192660552,
255
- "eval_loss": 0.06461313366889954,
256
- "eval_runtime": 5.4957,
257
- "eval_samples_per_second": 99.169,
258
- "eval_steps_per_second": 12.555,
259
- "step": 282
260
- },
261
- {
262
- "epoch": 6.170212765957447,
263
- "grad_norm": 0.05051916465163231,
264
- "learning_rate": 7.659574468085106e-05,
265
- "loss": 0.0035,
266
- "step": 290
267
- },
268
- {
269
- "epoch": 6.382978723404255,
270
- "grad_norm": 0.03177861496806145,
271
- "learning_rate": 7.23404255319149e-05,
272
- "loss": 0.0029,
273
- "step": 300
274
  },
275
  {
276
- "epoch": 6.595744680851064,
277
- "grad_norm": 0.01716785505414009,
278
- "learning_rate": 6.808510638297873e-05,
279
- "loss": 0.0026,
280
- "step": 310
281
- },
282
- {
283
- "epoch": 6.808510638297872,
284
- "grad_norm": 0.02455182373523712,
285
- "learning_rate": 6.382978723404256e-05,
286
- "loss": 0.0023,
287
- "step": 320
 
 
 
 
 
 
288
  },
289
  {
290
  "epoch": 7.0,
291
- "eval_accuracy": 0.1981651376146789,
292
- "eval_loss": 0.06650757044553757,
293
- "eval_runtime": 6.6722,
294
- "eval_samples_per_second": 81.683,
295
- "eval_steps_per_second": 10.341,
296
- "step": 329
297
- },
298
- {
299
- "epoch": 7.0212765957446805,
300
- "grad_norm": 0.06645756959915161,
301
- "learning_rate": 5.9574468085106384e-05,
302
- "loss": 0.0023,
303
- "step": 330
304
- },
305
- {
306
- "epoch": 7.23404255319149,
307
- "grad_norm": 0.036258358508348465,
308
- "learning_rate": 5.531914893617022e-05,
309
- "loss": 0.0022,
310
- "step": 340
311
- },
312
- {
313
- "epoch": 7.446808510638298,
314
- "grad_norm": 0.055592458695173264,
315
- "learning_rate": 5.1063829787234044e-05,
316
- "loss": 0.0027,
317
- "step": 350
318
- },
319
- {
320
- "epoch": 7.659574468085106,
321
- "grad_norm": 0.02403583563864231,
322
- "learning_rate": 4.680851063829788e-05,
323
- "loss": 0.0015,
324
- "step": 360
325
  },
326
  {
327
- "epoch": 7.872340425531915,
328
- "grad_norm": 0.01888449862599373,
329
- "learning_rate": 4.2553191489361704e-05,
330
- "loss": 0.0015,
331
- "step": 370
 
 
 
 
 
 
 
 
 
 
 
 
 
332
  },
333
  {
334
  "epoch": 8.0,
335
- "eval_accuracy": 0.1596330275229358,
336
- "eval_loss": 0.06884702295064926,
337
- "eval_runtime": 6.3823,
338
- "eval_samples_per_second": 85.393,
339
- "eval_steps_per_second": 10.811,
340
- "step": 376
341
- },
342
- {
343
- "epoch": 8.085106382978724,
344
- "grad_norm": 0.012278878130018711,
345
- "learning_rate": 3.829787234042553e-05,
346
- "loss": 0.0015,
347
- "step": 380
348
- },
349
- {
350
- "epoch": 8.297872340425531,
351
- "grad_norm": 0.029290180653333664,
352
- "learning_rate": 3.4042553191489365e-05,
353
- "loss": 0.0013,
354
- "step": 390
355
  },
356
  {
357
- "epoch": 8.51063829787234,
358
- "grad_norm": 0.01336819026619196,
359
- "learning_rate": 2.9787234042553192e-05,
360
- "loss": 0.0012,
361
- "step": 400
362
- },
363
- {
364
- "epoch": 8.72340425531915,
365
- "grad_norm": 0.01985483057796955,
366
- "learning_rate": 2.5531914893617022e-05,
367
- "loss": 0.0013,
368
- "step": 410
369
- },
370
- {
371
- "epoch": 8.936170212765958,
372
- "grad_norm": 0.026989364996552467,
373
- "learning_rate": 2.1276595744680852e-05,
374
- "loss": 0.0013,
375
- "step": 420
376
  },
377
  {
378
  "epoch": 9.0,
379
- "eval_accuracy": 0.181651376146789,
380
- "eval_loss": 0.0706261619925499,
381
- "eval_runtime": 6.7059,
382
- "eval_samples_per_second": 81.272,
383
- "eval_steps_per_second": 10.29,
384
- "step": 423
385
  },
386
  {
387
- "epoch": 9.148936170212766,
388
- "grad_norm": 0.02496664598584175,
389
- "learning_rate": 1.7021276595744682e-05,
390
- "loss": 0.0014,
391
- "step": 430
392
- },
393
- {
394
- "epoch": 9.361702127659575,
395
- "grad_norm": 0.01364427525550127,
396
- "learning_rate": 1.2765957446808511e-05,
397
- "loss": 0.0009,
398
- "step": 440
399
- },
400
- {
401
- "epoch": 9.574468085106384,
402
- "grad_norm": 0.010134860873222351,
403
- "learning_rate": 8.510638297872341e-06,
404
- "loss": 0.0009,
405
- "step": 450
406
- },
407
- {
408
- "epoch": 9.787234042553191,
409
- "grad_norm": 0.03484776243567467,
410
- "learning_rate": 4.255319148936171e-06,
411
- "loss": 0.0011,
412
- "step": 460
413
  },
414
  {
415
  "epoch": 10.0,
416
- "grad_norm": 0.0072807134129107,
417
- "learning_rate": 0.0,
418
- "loss": 0.0007,
419
- "step": 470
420
  },
421
  {
422
  "epoch": 10.0,
423
- "eval_accuracy": 0.1743119266055046,
424
- "eval_loss": 0.07116351276636124,
425
- "eval_runtime": 6.8862,
426
- "eval_samples_per_second": 79.144,
427
- "eval_steps_per_second": 10.02,
428
- "step": 470
 
 
 
 
 
 
 
 
 
 
 
429
  },
430
  {
431
- "epoch": 10.0,
432
- "step": 470,
433
- "total_flos": 5.916629591779738e+17,
434
- "train_loss": 0.006165030080468413,
435
- "train_runtime": 454.1808,
436
- "train_samples_per_second": 16.469,
437
- "train_steps_per_second": 1.035
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
438
  }
439
  ],
440
- "logging_steps": 10,
441
- "max_steps": 470,
442
  "num_input_tokens_seen": 0,
443
- "num_train_epochs": 10,
444
  "save_steps": 500,
445
  "stateful_callbacks": {
446
  "TrainerControl": {
@@ -454,8 +575,8 @@
454
  "attributes": {}
455
  }
456
  },
457
- "total_flos": 5.916629591779738e+17,
458
- "train_batch_size": 16,
459
  "trial_name": null,
460
  "trial_params": null
461
  }
 
1
  {
2
+ "best_metric": 0.25979954936421096,
3
+ "best_model_checkpoint": "./logo-matching-base/checkpoint-510",
4
+ "epoch": 20.0,
5
  "eval_steps": 500,
6
+ "global_step": 680,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 72.06623077392578,
14
+ "learning_rate": 9.5e-06,
15
+ "loss": 0.6833,
16
+ "step": 34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  },
18
  {
19
+ "epoch": 1.0,
20
+ "eval_adjusted_mutual_info_score": 0.0959962302587181,
21
+ "eval_adjusted_rand_score": 0.06912863690017566,
22
+ "eval_completeness_score": 0.5109803955924982,
23
+ "eval_fowlkes_mallows_score": 0.44395093511191686,
24
+ "eval_homogeneity_score": 0.12370192092796624,
25
+ "eval_loss": 0.06912863690017566,
26
+ "eval_pair_confusion_matrix": [
27
+ [
28
+ 16212,
29
+ 31438
30
+ ],
31
+ [
32
+ 2986,
33
+ 10620
34
+ ]
35
+ ],
36
+ "step": 34
 
37
  },
38
  {
39
  "epoch": 2.0,
40
+ "grad_norm": 0.0,
41
+ "learning_rate": 9e-06,
42
+ "loss": 0.5711,
43
+ "step": 68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  },
45
  {
46
+ "epoch": 2.0,
47
+ "eval_adjusted_mutual_info_score": 0.16050612062870448,
48
+ "eval_adjusted_rand_score": 0.04788887456487486,
49
+ "eval_completeness_score": 0.4801879169228485,
50
+ "eval_fowlkes_mallows_score": 0.326241320471177,
51
+ "eval_homogeneity_score": 0.24777791806632807,
52
+ "eval_loss": 0.04788887456487486,
53
+ "eval_pair_confusion_matrix": [
54
+ [
55
+ 30178,
56
+ 17472
57
+ ],
58
+ [
59
+ 7800,
60
+ 5806
61
+ ]
62
+ ],
63
+ "step": 68
64
  },
65
  {
66
  "epoch": 3.0,
67
+ "grad_norm": 0.0,
68
+ "learning_rate": 8.5e-06,
69
+ "loss": 0.5048,
70
+ "step": 102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  },
72
  {
73
+ "epoch": 3.0,
74
+ "eval_adjusted_mutual_info_score": 0.14453579947468986,
75
+ "eval_adjusted_rand_score": 0.06941336059571244,
76
+ "eval_completeness_score": 0.48534870125531976,
77
+ "eval_fowlkes_mallows_score": 0.35623205660821267,
78
+ "eval_homogeneity_score": 0.20687927372388243,
79
+ "eval_loss": 0.06941336059571244,
80
+ "eval_pair_confusion_matrix": [
81
+ [
82
+ 28728,
83
+ 18922
84
+ ],
85
+ [
86
+ 6962,
87
+ 6644
88
+ ]
89
+ ],
90
+ "step": 102
91
  },
92
  {
93
  "epoch": 4.0,
94
+ "grad_norm": 0.0,
95
+ "learning_rate": 8.000000000000001e-06,
96
+ "loss": 0.4474,
97
+ "step": 136
 
 
98
  },
99
  {
100
+ "epoch": 4.0,
101
+ "eval_adjusted_mutual_info_score": 0.15197952267992879,
102
+ "eval_adjusted_rand_score": 0.06395362023662775,
103
+ "eval_completeness_score": 0.5053597877227305,
104
+ "eval_fowlkes_mallows_score": 0.36009183925771726,
105
+ "eval_homogeneity_score": 0.201135083549786,
106
+ "eval_loss": 0.06395362023662775,
107
+ "eval_pair_confusion_matrix": [
108
+ [
109
+ 27632,
110
+ 20018
111
+ ],
112
+ [
113
+ 6716,
114
+ 6890
115
+ ]
116
+ ],
117
+ "step": 136
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  },
119
  {
120
  "epoch": 5.0,
121
+ "grad_norm": 53.12451934814453,
122
+ "learning_rate": 7.500000000000001e-06,
123
+ "loss": 0.4433,
124
+ "step": 170
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  },
126
  {
127
+ "epoch": 5.0,
128
+ "eval_adjusted_mutual_info_score": 0.09225927782155577,
129
+ "eval_adjusted_rand_score": 0.04497696614477031,
130
+ "eval_completeness_score": 0.4856685890606832,
131
+ "eval_fowlkes_mallows_score": 0.41794001145778076,
132
+ "eval_homogeneity_score": 0.12954647762487131,
133
+ "eval_loss": 0.04497696614477031,
134
+ "eval_pair_confusion_matrix": [
135
+ [
136
+ 16938,
137
+ 30712
138
+ ],
139
+ [
140
+ 3792,
141
+ 9814
142
+ ]
143
+ ],
144
+ "step": 170
145
  },
146
  {
147
  "epoch": 6.0,
148
+ "grad_norm": 0.0,
149
+ "learning_rate": 7e-06,
150
+ "loss": 0.4582,
151
+ "step": 204
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  },
153
  {
154
+ "epoch": 6.0,
155
+ "eval_adjusted_mutual_info_score": 0.1550123095541971,
156
+ "eval_adjusted_rand_score": 0.1235971988422464,
157
+ "eval_completeness_score": 0.7625668447266793,
158
+ "eval_fowlkes_mallows_score": 0.5091255095660694,
159
+ "eval_homogeneity_score": 0.13490749128374505,
160
+ "eval_loss": 0.1235971988422464,
161
+ "eval_pair_confusion_matrix": [
162
+ [
163
+ 13668,
164
+ 33982
165
+ ],
166
+ [
167
+ 754,
168
+ 12852
169
+ ]
170
+ ],
171
+ "step": 204
172
  },
173
  {
174
  "epoch": 7.0,
175
+ "grad_norm": 24.434818267822266,
176
+ "learning_rate": 6.5000000000000004e-06,
177
+ "loss": 0.4384,
178
+ "step": 238
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  },
180
  {
181
+ "epoch": 7.0,
182
+ "eval_adjusted_mutual_info_score": 0.17177361413411174,
183
+ "eval_adjusted_rand_score": 0.08374811610033726,
184
+ "eval_completeness_score": 0.5222737901409524,
185
+ "eval_fowlkes_mallows_score": 0.3647858508374196,
186
+ "eval_homogeneity_score": 0.2242118568770657,
187
+ "eval_loss": 0.08374811610033726,
188
+ "eval_pair_confusion_matrix": [
189
+ [
190
+ 29196,
191
+ 18454
192
+ ],
193
+ [
194
+ 6850,
195
+ 6756
196
+ ]
197
+ ],
198
+ "step": 238
199
  },
200
  {
201
  "epoch": 8.0,
202
+ "grad_norm": 0.0,
203
+ "learning_rate": 6e-06,
204
+ "loss": 0.4388,
205
+ "step": 272
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
  },
207
  {
208
+ "epoch": 8.0,
209
+ "eval_adjusted_mutual_info_score": 0.20449439835224686,
210
+ "eval_adjusted_rand_score": 0.20212814342460073,
211
+ "eval_completeness_score": 0.5850787856442333,
212
+ "eval_fowlkes_mallows_score": 0.4533371718722817,
213
+ "eval_homogeneity_score": 0.22520931487447624,
214
+ "eval_loss": 0.20212814342460073,
215
+ "eval_pair_confusion_matrix": [
216
+ [
217
+ 30796,
218
+ 16854
219
+ ],
220
+ [
221
+ 5202,
222
+ 8404
223
+ ]
224
+ ],
225
+ "step": 272
 
226
  },
227
  {
228
  "epoch": 9.0,
229
+ "grad_norm": 0.0,
230
+ "learning_rate": 5.500000000000001e-06,
231
+ "loss": 0.4136,
232
+ "step": 306
 
 
233
  },
234
  {
235
+ "epoch": 9.0,
236
+ "eval_adjusted_mutual_info_score": 0.11177382718405922,
237
+ "eval_adjusted_rand_score": 0.09640002189335507,
238
+ "eval_completeness_score": 0.6506809526372479,
239
+ "eval_fowlkes_mallows_score": 0.4899932693994939,
240
+ "eval_homogeneity_score": 0.10817341204672741,
241
+ "eval_loss": 0.09640002189335507,
242
+ "eval_pair_confusion_matrix": [
243
+ [
244
+ 12770,
245
+ 34880
246
+ ],
247
+ [
248
+ 1174,
249
+ 12432
250
+ ]
251
+ ],
252
+ "step": 306
 
 
 
 
 
 
 
 
253
  },
254
  {
255
  "epoch": 10.0,
256
+ "grad_norm": 0.0,
257
+ "learning_rate": 5e-06,
258
+ "loss": 0.4148,
259
+ "step": 340
260
  },
261
  {
262
  "epoch": 10.0,
263
+ "eval_adjusted_mutual_info_score": 0.13291287868141516,
264
+ "eval_adjusted_rand_score": 0.0173768729388201,
265
+ "eval_completeness_score": 0.46335582053405955,
266
+ "eval_fowlkes_mallows_score": 0.3147845140860169,
267
+ "eval_homogeneity_score": 0.20737389396161876,
268
+ "eval_loss": 0.0173768729388201,
269
+ "eval_pair_confusion_matrix": [
270
+ [
271
+ 28346,
272
+ 19304
273
+ ],
274
+ [
275
+ 7786,
276
+ 5820
277
+ ]
278
+ ],
279
+ "step": 340
280
  },
281
  {
282
+ "epoch": 11.0,
283
+ "grad_norm": 0.0,
284
+ "learning_rate": 4.5e-06,
285
+ "loss": 0.4146,
286
+ "step": 374
287
+ },
288
+ {
289
+ "epoch": 11.0,
290
+ "eval_adjusted_mutual_info_score": 0.15463237739434937,
291
+ "eval_adjusted_rand_score": 0.05775589302463435,
292
+ "eval_completeness_score": 0.5013447616164763,
293
+ "eval_fowlkes_mallows_score": 0.34999174740596783,
294
+ "eval_homogeneity_score": 0.21319532171325858,
295
+ "eval_loss": 0.05775589302463435,
296
+ "eval_pair_confusion_matrix": [
297
+ [
298
+ 28252,
299
+ 19398
300
+ ],
301
+ [
302
+ 7026,
303
+ 6580
304
+ ]
305
+ ],
306
+ "step": 374
307
+ },
308
+ {
309
+ "epoch": 12.0,
310
+ "grad_norm": 0.0,
311
+ "learning_rate": 4.000000000000001e-06,
312
+ "loss": 0.4096,
313
+ "step": 408
314
+ },
315
+ {
316
+ "epoch": 12.0,
317
+ "eval_adjusted_mutual_info_score": 0.15954128854018185,
318
+ "eval_adjusted_rand_score": 0.08353262118433151,
319
+ "eval_completeness_score": 0.48657807669089076,
320
+ "eval_fowlkes_mallows_score": 0.3488784926098972,
321
+ "eval_homogeneity_score": 0.23781381627812734,
322
+ "eval_loss": 0.08353262118433151,
323
+ "eval_pair_confusion_matrix": [
324
+ [
325
+ 31128,
326
+ 16522
327
+ ],
328
+ [
329
+ 7482,
330
+ 6124
331
+ ]
332
+ ],
333
+ "step": 408
334
+ },
335
+ {
336
+ "epoch": 13.0,
337
+ "grad_norm": 0.0,
338
+ "learning_rate": 3.5e-06,
339
+ "loss": 0.3973,
340
+ "step": 442
341
+ },
342
+ {
343
+ "epoch": 13.0,
344
+ "eval_adjusted_mutual_info_score": 0.19041946370338364,
345
+ "eval_adjusted_rand_score": 0.12553878006056823,
346
+ "eval_completeness_score": 0.5046617692993569,
347
+ "eval_fowlkes_mallows_score": 0.36189400936638344,
348
+ "eval_homogeneity_score": 0.27375639307416655,
349
+ "eval_loss": 0.12553878006056823,
350
+ "eval_pair_confusion_matrix": [
351
+ [
352
+ 33812,
353
+ 13838
354
+ ],
355
+ [
356
+ 7670,
357
+ 5936
358
+ ]
359
+ ],
360
+ "step": 442
361
+ },
362
+ {
363
+ "epoch": 14.0,
364
+ "grad_norm": 0.0,
365
+ "learning_rate": 3e-06,
366
+ "loss": 0.4051,
367
+ "step": 476
368
+ },
369
+ {
370
+ "epoch": 14.0,
371
+ "eval_adjusted_mutual_info_score": 0.18691214822396793,
372
+ "eval_adjusted_rand_score": 0.18158266255584393,
373
+ "eval_completeness_score": 0.5245970271191535,
374
+ "eval_fowlkes_mallows_score": 0.41690193788215973,
375
+ "eval_homogeneity_score": 0.24324643119902978,
376
+ "eval_loss": 0.18158266255584393,
377
+ "eval_pair_confusion_matrix": [
378
+ [
379
+ 33010,
380
+ 14640
381
+ ],
382
+ [
383
+ 6422,
384
+ 7184
385
+ ]
386
+ ],
387
+ "step": 476
388
+ },
389
+ {
390
+ "epoch": 15.0,
391
+ "grad_norm": 20.364652633666992,
392
+ "learning_rate": 2.5e-06,
393
+ "loss": 0.4062,
394
+ "step": 510
395
+ },
396
+ {
397
+ "epoch": 15.0,
398
+ "eval_adjusted_mutual_info_score": 0.21823634710165685,
399
+ "eval_adjusted_rand_score": 0.25979954936421096,
400
+ "eval_completeness_score": 0.5641057686568595,
401
+ "eval_fowlkes_mallows_score": 0.47949730143044716,
402
+ "eval_homogeneity_score": 0.2648407343665406,
403
+ "eval_loss": 0.25979954936421096,
404
+ "eval_pair_confusion_matrix": [
405
+ [
406
+ 33538,
407
+ 14112
408
+ ],
409
+ [
410
+ 5216,
411
+ 8390
412
+ ]
413
+ ],
414
+ "step": 510
415
+ },
416
+ {
417
+ "epoch": 16.0,
418
+ "grad_norm": 0.0,
419
+ "learning_rate": 2.0000000000000003e-06,
420
+ "loss": 0.4025,
421
+ "step": 544
422
+ },
423
+ {
424
+ "epoch": 16.0,
425
+ "eval_adjusted_mutual_info_score": 0.10056939420043334,
426
+ "eval_adjusted_rand_score": 0.0907629141131465,
427
+ "eval_completeness_score": 0.5696556835173626,
428
+ "eval_fowlkes_mallows_score": 0.4759933991482618,
429
+ "eval_homogeneity_score": 0.11380033927304208,
430
+ "eval_loss": 0.0907629141131465,
431
+ "eval_pair_confusion_matrix": [
432
+ [
433
+ 14202,
434
+ 33448
435
+ ],
436
+ [
437
+ 1794,
438
+ 11812
439
+ ]
440
+ ],
441
+ "step": 544
442
+ },
443
+ {
444
+ "epoch": 17.0,
445
+ "grad_norm": 0.0,
446
+ "learning_rate": 1.5e-06,
447
+ "loss": 0.4043,
448
+ "step": 578
449
+ },
450
+ {
451
+ "epoch": 17.0,
452
+ "eval_adjusted_mutual_info_score": 0.11845707888523377,
453
+ "eval_adjusted_rand_score": 0.06151994460143414,
454
+ "eval_completeness_score": 0.5431310507543884,
455
+ "eval_fowlkes_mallows_score": 0.4323980307250435,
456
+ "eval_homogeneity_score": 0.14340786222528185,
457
+ "eval_loss": 0.06151994460143414,
458
+ "eval_pair_confusion_matrix": [
459
+ [
460
+ 16966,
461
+ 30684
462
+ ],
463
+ [
464
+ 3408,
465
+ 10198
466
+ ]
467
+ ],
468
+ "step": 578
469
+ },
470
+ {
471
+ "epoch": 18.0,
472
+ "grad_norm": 0.0,
473
+ "learning_rate": 1.0000000000000002e-06,
474
+ "loss": 0.4013,
475
+ "step": 612
476
+ },
477
+ {
478
+ "epoch": 18.0,
479
+ "eval_adjusted_mutual_info_score": 0.20098591140511965,
480
+ "eval_adjusted_rand_score": 0.24115050477114428,
481
+ "eval_completeness_score": 0.5343711342342489,
482
+ "eval_fowlkes_mallows_score": 0.45788735752549203,
483
+ "eval_homogeneity_score": 0.2597787469313228,
484
+ "eval_loss": 0.24115050477114428,
485
+ "eval_pair_confusion_matrix": [
486
+ [
487
+ 34176,
488
+ 13474
489
+ ],
490
+ [
491
+ 5818,
492
+ 7788
493
+ ]
494
+ ],
495
+ "step": 612
496
+ },
497
+ {
498
+ "epoch": 19.0,
499
+ "grad_norm": 0.0,
500
+ "learning_rate": 5.000000000000001e-07,
501
+ "loss": 0.4006,
502
+ "step": 646
503
+ },
504
+ {
505
+ "epoch": 19.0,
506
+ "eval_adjusted_mutual_info_score": 0.252153502376998,
507
+ "eval_adjusted_rand_score": 0.24023922903374736,
508
+ "eval_completeness_score": 0.5460417717065094,
509
+ "eval_fowlkes_mallows_score": 0.4233209236936553,
510
+ "eval_homogeneity_score": 0.34453853702602516,
511
+ "eval_loss": 0.24023922903374736,
512
+ "eval_pair_confusion_matrix": [
513
+ [
514
+ 37986,
515
+ 9664
516
+ ],
517
+ [
518
+ 7382,
519
+ 6224
520
+ ]
521
+ ],
522
+ "step": 646
523
+ },
524
+ {
525
+ "epoch": 20.0,
526
+ "grad_norm": 0.0,
527
+ "learning_rate": 0.0,
528
+ "loss": 0.4044,
529
+ "step": 680
530
+ },
531
+ {
532
+ "epoch": 20.0,
533
+ "eval_adjusted_mutual_info_score": 0.08179538296319702,
534
+ "eval_adjusted_rand_score": 0.040227652358330604,
535
+ "eval_completeness_score": 0.48262386100058413,
536
+ "eval_fowlkes_mallows_score": 0.423484876527761,
537
+ "eval_homogeneity_score": 0.11562820805581293,
538
+ "eval_loss": 0.040227652358330604,
539
+ "eval_pair_confusion_matrix": [
540
+ [
541
+ 15374,
542
+ 32276
543
+ ],
544
+ [
545
+ 3428,
546
+ 10178
547
+ ]
548
+ ],
549
+ "step": 680
550
+ },
551
+ {
552
+ "epoch": 20.0,
553
+ "step": 680,
554
+ "total_flos": 0.0,
555
+ "train_loss": 0.44298483904670266,
556
+ "train_runtime": 986.129,
557
+ "train_samples_per_second": 21.843,
558
+ "train_steps_per_second": 0.69
559
  }
560
  ],
561
+ "logging_steps": 500,
562
+ "max_steps": 680,
563
  "num_input_tokens_seen": 0,
564
+ "num_train_epochs": 20,
565
  "save_steps": 500,
566
  "stateful_callbacks": {
567
  "TrainerControl": {
 
575
  "attributes": {}
576
  }
577
  },
578
+ "total_flos": 0.0,
579
+ "train_batch_size": 32,
580
  "trial_name": null,
581
  "trial_params": null
582
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5440bc76789849f1270a013a667a26ba0df7e77608e41c4d4889d359c200832f
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00f042ac9e32eaa0803d403d8098c83b0a2076477b858022993c5020ef613021
3
  size 5304