ngwgsang commited on
Commit
b3f1503
·
verified ·
1 Parent(s): df90e5e

Training in progress, epoch 4, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44e654e4d50bd4c08e40f1c40359055b24af92e519f539420a2ae3729b5bff38
3
  size 442668636
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73bbaf13257e8852680c22d11642ea0013f2247fef35d8272beebba796d36512
3
  size 442668636
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a152b235056b4fcbaeb415b6d13c581c062ea5ef61192c30dcf14433ed941558
3
  size 885457146
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf26cb804bb88b36cd19298767e57275ff1af51ef66a60b94073c0dcb74bb3c9
3
  size 885457146
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76b3b8471ca0351c811d90c5b574a45dac1c24b25ffcf13ec6b85586685c4c47
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:049c6f40a328629846cab1b27e3807d44ea469304a69ff0f3d676cc813cde6b3
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8143cf4c2b0cbae224cb6ee44d414097e32f9582a6067851f3fe7a3ab225aca6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7668389514d10a3d53f140c85ff46df71dcd9dc34fbc1ed6530f2d1a175df2a0
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 6.088820139567058,
3
- "best_model_checkpoint": "./results/checkpoint-2748",
4
- "epoch": 3.0,
5
  "eval_steps": 500,
6
- "global_step": 2748,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -232,6 +232,81 @@
232
  "eval_samples_per_second": 271.974,
233
  "eval_steps_per_second": 8.5,
234
  "step": 2748
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
  }
236
  ],
237
  "logging_steps": 100,
@@ -251,7 +326,7 @@
251
  "attributes": {}
252
  }
253
  },
254
- "total_flos": 5783298700015872.0,
255
  "train_batch_size": 32,
256
  "trial_name": null,
257
  "trial_params": null
 
1
  {
2
+ "best_metric": 5.853533426920573,
3
+ "best_model_checkpoint": "./results/checkpoint-3664",
4
+ "epoch": 4.0,
5
  "eval_steps": 500,
6
+ "global_step": 3664,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
232
  "eval_samples_per_second": 271.974,
233
  "eval_steps_per_second": 8.5,
234
  "step": 2748
235
+ },
236
+ {
237
+ "epoch": 3.056768558951965,
238
+ "grad_norm": 25.13582420349121,
239
+ "learning_rate": 1.8537117903930135e-05,
240
+ "loss": 5.5729,
241
+ "step": 2800
242
+ },
243
+ {
244
+ "epoch": 3.165938864628821,
245
+ "grad_norm": 26.646804809570312,
246
+ "learning_rate": 1.8127729257641922e-05,
247
+ "loss": 5.6211,
248
+ "step": 2900
249
+ },
250
+ {
251
+ "epoch": 3.2751091703056767,
252
+ "grad_norm": 28.627378463745117,
253
+ "learning_rate": 1.7718340611353713e-05,
254
+ "loss": 5.5775,
255
+ "step": 3000
256
+ },
257
+ {
258
+ "epoch": 3.3842794759825328,
259
+ "grad_norm": 28.54901123046875,
260
+ "learning_rate": 1.7308951965065504e-05,
261
+ "loss": 5.3086,
262
+ "step": 3100
263
+ },
264
+ {
265
+ "epoch": 3.493449781659389,
266
+ "grad_norm": 27.549345016479492,
267
+ "learning_rate": 1.689956331877729e-05,
268
+ "loss": 5.5229,
269
+ "step": 3200
270
+ },
271
+ {
272
+ "epoch": 3.6026200873362444,
273
+ "grad_norm": 29.306232452392578,
274
+ "learning_rate": 1.649017467248908e-05,
275
+ "loss": 5.6348,
276
+ "step": 3300
277
+ },
278
+ {
279
+ "epoch": 3.7117903930131004,
280
+ "grad_norm": 29.256425857543945,
281
+ "learning_rate": 1.6080786026200872e-05,
282
+ "loss": 5.3936,
283
+ "step": 3400
284
+ },
285
+ {
286
+ "epoch": 3.8209606986899565,
287
+ "grad_norm": 31.816057205200195,
288
+ "learning_rate": 1.5671397379912666e-05,
289
+ "loss": 5.431,
290
+ "step": 3500
291
+ },
292
+ {
293
+ "epoch": 3.930131004366812,
294
+ "grad_norm": 25.876789093017578,
295
+ "learning_rate": 1.5262008733624454e-05,
296
+ "loss": 5.4981,
297
+ "step": 3600
298
+ },
299
+ {
300
+ "epoch": 4.0,
301
+ "eval_avg_mae": 5.853533426920573,
302
+ "eval_loss": 5.8535332679748535,
303
+ "eval_mae_lex": 5.371854305267334,
304
+ "eval_mae_sem": 3.812947988510132,
305
+ "eval_mae_syn": 8.375797271728516,
306
+ "eval_runtime": 26.9386,
307
+ "eval_samples_per_second": 271.989,
308
+ "eval_steps_per_second": 8.501,
309
+ "step": 3664
310
  }
311
  ],
312
  "logging_steps": 100,
 
326
  "attributes": {}
327
  }
328
  },
329
+ "total_flos": 7711064933354496.0,
330
  "train_batch_size": 32,
331
  "trial_name": null,
332
  "trial_params": null