masatochi commited on
Commit
93cd871
·
verified ·
1 Parent(s): 39ccdbb

Training in progress, step 45, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a165fb02f9a3a4bf5c227dc64bde9957f7d973aad681dae8e8b211d4f5d1400c
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6be4e281632c63b92fa5d5edd18e7ca954c0c33b78e3176cc998442208e3d84
3
  size 83945296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1430b0e939ea6e6eca903aa7902aa26cfd4e78beb03a4834c2deff932e024b1a
3
  size 43122580
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c7a079a0ab4dd29eec7e8d870e47356f2f52a2851b4dfee5d10a853fec52488
3
  size 43122580
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab2ed981c27f8081d81d2039e0485e05b085c6f7d5fed74c55bf1d3c4164f221
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efb9ca876132dfc2200ae668af8d96116d67ae8fe0758644fefda8e5b6341153
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35e2941b1419d36fedcd8eb55488740cb386508ea401393ade4c1f5fd25ff6c8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8074483b54111bf60815c86124b01e5486a0c97c3c0bcc7642d6609b17e4381b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.019561097866617763,
5
  "eval_steps": 34,
6
- "global_step": 40,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -303,6 +303,41 @@
303
  "learning_rate": 0.0001982973099683902,
304
  "loss": 1.2364,
305
  "step": 40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
  }
307
  ],
308
  "logging_steps": 1,
@@ -322,7 +357,7 @@
322
  "attributes": {}
323
  }
324
  },
325
- "total_flos": 1.7755816782200832e+17,
326
  "train_batch_size": 3,
327
  "trial_name": null,
328
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.022006235099944985,
5
  "eval_steps": 34,
6
+ "global_step": 45,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
303
  "learning_rate": 0.0001982973099683902,
304
  "loss": 1.2364,
305
  "step": 40
306
+ },
307
+ {
308
+ "epoch": 0.020050125313283207,
309
+ "grad_norm": 1.3894869089126587,
310
+ "learning_rate": 0.0001979409767601366,
311
+ "loss": 1.0156,
312
+ "step": 41
313
+ },
314
+ {
315
+ "epoch": 0.02053915275994865,
316
+ "grad_norm": 1.2144147157669067,
317
+ "learning_rate": 0.00019755119679804367,
318
+ "loss": 0.9838,
319
+ "step": 42
320
+ },
321
+ {
322
+ "epoch": 0.021028180206614098,
323
+ "grad_norm": 1.2613141536712646,
324
+ "learning_rate": 0.0001971281031916114,
325
+ "loss": 0.9763,
326
+ "step": 43
327
+ },
328
+ {
329
+ "epoch": 0.02151720765327954,
330
+ "grad_norm": 1.316279649734497,
331
+ "learning_rate": 0.00019667184042691875,
332
+ "loss": 1.0909,
333
+ "step": 44
334
+ },
335
+ {
336
+ "epoch": 0.022006235099944985,
337
+ "grad_norm": 1.4056583642959595,
338
+ "learning_rate": 0.00019618256431728194,
339
+ "loss": 1.2575,
340
+ "step": 45
341
  }
342
  ],
343
  "logging_steps": 1,
 
357
  "attributes": {}
358
  }
359
  },
360
+ "total_flos": 1.9975293879975936e+17,
361
  "train_batch_size": 3,
362
  "trial_name": null,
363
  "trial_params": null