bobox commited on
Commit
c232415
·
verified ·
1 Parent(s): f8f2f99

Training in progress, epoch 2, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -548,6 +548,13 @@ You can finetune this model on your own dataset.
548
  | 0.6000 | 20680 | 0.1918 | 0.1494 | 0.0053 |
549
  | 0.7501 | 25850 | 0.2103 | 0.1488 | 0.0082 |
550
  | 0.9001 | 31020 | 0.2056 | 0.1513 | 0.0039 |
 
 
 
 
 
 
 
551
 
552
 
553
  ### Framework Versions
 
548
  | 0.6000 | 20680 | 0.1918 | 0.1494 | 0.0053 |
549
  | 0.7501 | 25850 | 0.2103 | 0.1488 | 0.0082 |
550
  | 0.9001 | 31020 | 0.2056 | 0.1513 | 0.0039 |
551
+ | 1.0501 | 36190 | 0.2067 | 0.1501 | 0.0079 |
552
+ | 1.2001 | 41360 | 0.1987 | 0.1485 | 0.0084 |
553
+ | 1.3501 | 46530 | 0.1987 | 0.1517 | 0.0056 |
554
+ | 1.5001 | 51700 | 0.205 | 0.1490 | 0.0062 |
555
+ | 1.6501 | 56870 | 0.183 | 0.1458 | 0.0061 |
556
+ | 1.8001 | 62040 | 0.1763 | 0.1418 | 0.0080 |
557
+ | 1.9502 | 67210 | 0.1982 | 0.1401 | 0.0066 |
558
 
559
 
560
  ### Framework Versions
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8aaf87bb13046028734be90752771144aaea10be484bb1bf6bd2a0259e7d15a2
3
  size 1130520122
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a4d8431b48e856a491158daf2e05d6d19c3cbee19aa226bb35c138ccb02575f
3
  size 1130520122
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:948aeb47d3b4ece77853054875d7cd8623a91ad195a23f1375df30e23b0bd550
3
  size 565251810
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f777e57dce9511e38a577e3c3f65017f044e27d6bcadc200f36e0f75dd77f5a2
3
  size 565251810
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2cfc2a4e406b13ab6c68c4666a4523963a93147dc76551672404fbc20c90b68
3
  size 14180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44ecceb368c1d95b84fe7a285ca12f63a4025e3b262133ec37fd4ee3222b8d0e
3
  size 14180
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10fdc263b51d98641af52cf837a2f3c4f9e5027460536e6f4b62067f974df7c7
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b8b7ee3d4870162babeb8f348c3bae7fa4103003a4bea7169789a91a35a43fe
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0,
5
  "eval_steps": 5170,
6
- "global_step": 34464,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -145,6 +145,167 @@
145
  "eval_qnli-contrastive_samples_per_second": 342.998,
146
  "eval_qnli-contrastive_steps_per_second": 21.473,
147
  "step": 31020
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  }
149
  ],
150
  "logging_steps": 5170,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
  "eval_steps": 5170,
6
+ "global_step": 68928,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
145
  "eval_qnli-contrastive_samples_per_second": 342.998,
146
  "eval_qnli-contrastive_steps_per_second": 21.473,
147
  "step": 31020
148
+ },
149
+ {
150
+ "epoch": 1.050081244196843,
151
+ "grad_norm": 3.5489258766174316,
152
+ "learning_rate": 4.989171745750203e-06,
153
+ "loss": 0.2067,
154
+ "step": 36190
155
+ },
156
+ {
157
+ "epoch": 1.050081244196843,
158
+ "eval_nli-pairs_loss": 0.15012863278388977,
159
+ "eval_nli-pairs_runtime": 15.698,
160
+ "eval_nli-pairs_samples_per_second": 433.687,
161
+ "eval_nli-pairs_steps_per_second": 27.137,
162
+ "step": 36190
163
+ },
164
+ {
165
+ "epoch": 1.050081244196843,
166
+ "eval_qnli-contrastive_loss": 0.007893337868154049,
167
+ "eval_qnli-contrastive_runtime": 16.0422,
168
+ "eval_qnli-contrastive_samples_per_second": 340.539,
169
+ "eval_qnli-contrastive_steps_per_second": 21.319,
170
+ "step": 36190
171
+ },
172
+ {
173
+ "epoch": 1.2000928505106778,
174
+ "grad_norm": 1.2927824258804321,
175
+ "learning_rate": 4.867136962447637e-06,
176
+ "loss": 0.1987,
177
+ "step": 41360
178
+ },
179
+ {
180
+ "epoch": 1.2000928505106778,
181
+ "eval_nli-pairs_loss": 0.14854447543621063,
182
+ "eval_nli-pairs_runtime": 15.3871,
183
+ "eval_nli-pairs_samples_per_second": 442.449,
184
+ "eval_nli-pairs_steps_per_second": 27.686,
185
+ "step": 41360
186
+ },
187
+ {
188
+ "epoch": 1.2000928505106778,
189
+ "eval_qnli-contrastive_loss": 0.008381461724638939,
190
+ "eval_qnli-contrastive_runtime": 15.7761,
191
+ "eval_qnli-contrastive_samples_per_second": 346.283,
192
+ "eval_qnli-contrastive_steps_per_second": 21.678,
193
+ "step": 41360
194
+ },
195
+ {
196
+ "epoch": 1.3501044568245124,
197
+ "grad_norm": 12.146841049194336,
198
+ "learning_rate": 4.615724011281596e-06,
199
+ "loss": 0.1987,
200
+ "step": 46530
201
+ },
202
+ {
203
+ "epoch": 1.3501044568245124,
204
+ "eval_nli-pairs_loss": 0.15174470841884613,
205
+ "eval_nli-pairs_runtime": 15.5759,
206
+ "eval_nli-pairs_samples_per_second": 437.085,
207
+ "eval_nli-pairs_steps_per_second": 27.35,
208
+ "step": 46530
209
+ },
210
+ {
211
+ "epoch": 1.3501044568245124,
212
+ "eval_qnli-contrastive_loss": 0.00563395069912076,
213
+ "eval_qnli-contrastive_runtime": 15.9485,
214
+ "eval_qnli-contrastive_samples_per_second": 342.54,
215
+ "eval_qnli-contrastive_steps_per_second": 21.444,
216
+ "step": 46530
217
+ },
218
+ {
219
+ "epoch": 1.5001160631383472,
220
+ "grad_norm": 0.12127237021923065,
221
+ "learning_rate": 4.248583657440329e-06,
222
+ "loss": 0.205,
223
+ "step": 51700
224
+ },
225
+ {
226
+ "epoch": 1.5001160631383472,
227
+ "eval_nli-pairs_loss": 0.1490125209093094,
228
+ "eval_nli-pairs_runtime": 15.4029,
229
+ "eval_nli-pairs_samples_per_second": 441.994,
230
+ "eval_nli-pairs_steps_per_second": 27.657,
231
+ "step": 51700
232
+ },
233
+ {
234
+ "epoch": 1.5001160631383472,
235
+ "eval_qnli-contrastive_loss": 0.0062314593233168125,
236
+ "eval_qnli-contrastive_runtime": 15.801,
237
+ "eval_qnli-contrastive_samples_per_second": 345.738,
238
+ "eval_qnli-contrastive_steps_per_second": 21.644,
239
+ "step": 51700
240
+ },
241
+ {
242
+ "epoch": 1.650127669452182,
243
+ "grad_norm": 9.487723350524902,
244
+ "learning_rate": 3.7857309145658974e-06,
245
+ "loss": 0.183,
246
+ "step": 56870
247
+ },
248
+ {
249
+ "epoch": 1.650127669452182,
250
+ "eval_nli-pairs_loss": 0.14577454328536987,
251
+ "eval_nli-pairs_runtime": 15.5234,
252
+ "eval_nli-pairs_samples_per_second": 438.563,
253
+ "eval_nli-pairs_steps_per_second": 27.442,
254
+ "step": 56870
255
+ },
256
+ {
257
+ "epoch": 1.650127669452182,
258
+ "eval_qnli-contrastive_loss": 0.006071700248867273,
259
+ "eval_qnli-contrastive_runtime": 15.7649,
260
+ "eval_qnli-contrastive_samples_per_second": 346.529,
261
+ "eval_qnli-contrastive_steps_per_second": 21.694,
262
+ "step": 56870
263
+ },
264
+ {
265
+ "epoch": 1.8001392757660168,
266
+ "grad_norm": 3.4426660537719727,
267
+ "learning_rate": 3.252653526527395e-06,
268
+ "loss": 0.1763,
269
+ "step": 62040
270
+ },
271
+ {
272
+ "epoch": 1.8001392757660168,
273
+ "eval_nli-pairs_loss": 0.1418175995349884,
274
+ "eval_nli-pairs_runtime": 15.4065,
275
+ "eval_nli-pairs_samples_per_second": 441.89,
276
+ "eval_nli-pairs_steps_per_second": 27.651,
277
+ "step": 62040
278
+ },
279
+ {
280
+ "epoch": 1.8001392757660168,
281
+ "eval_qnli-contrastive_loss": 0.007999507710337639,
282
+ "eval_qnli-contrastive_runtime": 15.8195,
283
+ "eval_qnli-contrastive_samples_per_second": 345.334,
284
+ "eval_qnli-contrastive_steps_per_second": 21.619,
285
+ "step": 62040
286
+ },
287
+ {
288
+ "epoch": 1.9501508820798514,
289
+ "grad_norm": 0.897132158279419,
290
+ "learning_rate": 2.678533013873007e-06,
291
+ "loss": 0.1982,
292
+ "step": 67210
293
+ },
294
+ {
295
+ "epoch": 1.9501508820798514,
296
+ "eval_nli-pairs_loss": 0.14005425572395325,
297
+ "eval_nli-pairs_runtime": 15.4671,
298
+ "eval_nli-pairs_samples_per_second": 440.16,
299
+ "eval_nli-pairs_steps_per_second": 27.542,
300
+ "step": 67210
301
+ },
302
+ {
303
+ "epoch": 1.9501508820798514,
304
+ "eval_qnli-contrastive_loss": 0.006610157899558544,
305
+ "eval_qnli-contrastive_runtime": 15.8954,
306
+ "eval_qnli-contrastive_samples_per_second": 343.684,
307
+ "eval_qnli-contrastive_steps_per_second": 21.516,
308
+ "step": 67210
309
  }
310
  ],
311
  "logging_steps": 5170,