rakhman-llm commited on
Commit
fc1ffea
·
verified ·
1 Parent(s): c964bc5

Training in progress, step 34500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e4ab360c914af5780dbdab371c96384f68fadc50929f69d6d527fb768d4c4fc
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70be4ea2eb3b605b7bbf19f657f7030b3328a2472cfce6fd98277ee878f3d42d
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:efe48f4318badae6c7301c7e1af6b474ccbe7f71331abdb7cdd8a9a8ea72715a
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ea42d9a3f547f1d5475d09c5cf077e380e3710c708660172e4c053a5a76213e
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6d089c8bc4767a68a0971e6301211196ccb288fdbf802f4d0800d2c395bd6e0
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd91d8604c3c73ee737187beb59c853ab84bc04334806ba928b483da59e07022
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5881a400174694043dc48b7e6a0f928fe1bcddfe30dc261df5a9e4e69d0e718
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b582570b847f10b59692f51f82170b4e892f7a8342116f22c17cd2ce4cb0c5e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.2666666666666666,
5
  "eval_steps": 500,
6
- "global_step": 34000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2403,6 +2403,41 @@
2403
  "learning_rate": 4.8942222222222225e-06,
2404
  "loss": 0.0533,
2405
  "step": 34000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2406
  }
2407
  ],
2408
  "logging_steps": 100,
@@ -2422,7 +2457,7 @@
2422
  "attributes": {}
2423
  }
2424
  },
2425
- "total_flos": 8.281827311616e+16,
2426
  "train_batch_size": 4,
2427
  "trial_name": null,
2428
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.3,
5
  "eval_steps": 500,
6
+ "global_step": 34500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2403
  "learning_rate": 4.8942222222222225e-06,
2404
  "loss": 0.0533,
2405
  "step": 34000
2406
+ },
2407
+ {
2408
+ "epoch": 2.2733333333333334,
2409
+ "grad_norm": 0.2103540301322937,
2410
+ "learning_rate": 4.8497777777777786e-06,
2411
+ "loss": 0.0544,
2412
+ "step": 34100
2413
+ },
2414
+ {
2415
+ "epoch": 2.2800000000000002,
2416
+ "grad_norm": 0.3394009470939636,
2417
+ "learning_rate": 4.805333333333334e-06,
2418
+ "loss": 0.0512,
2419
+ "step": 34200
2420
+ },
2421
+ {
2422
+ "epoch": 2.2866666666666666,
2423
+ "grad_norm": 0.18866145610809326,
2424
+ "learning_rate": 4.760888888888889e-06,
2425
+ "loss": 0.0505,
2426
+ "step": 34300
2427
+ },
2428
+ {
2429
+ "epoch": 2.2933333333333334,
2430
+ "grad_norm": 0.21299579739570618,
2431
+ "learning_rate": 4.716888888888889e-06,
2432
+ "loss": 0.0466,
2433
+ "step": 34400
2434
+ },
2435
+ {
2436
+ "epoch": 2.3,
2437
+ "grad_norm": 0.12839581072330475,
2438
+ "learning_rate": 4.672444444444445e-06,
2439
+ "loss": 0.0484,
2440
+ "step": 34500
2441
  }
2442
  ],
2443
  "logging_steps": 100,
 
2457
  "attributes": {}
2458
  }
2459
  },
2460
+ "total_flos": 8.403618889728e+16,
2461
  "train_batch_size": 4,
2462
  "trial_name": null,
2463
  "trial_params": null