rakhman-llm commited on
Commit
e81f04b
·
verified ·
1 Parent(s): dd37262

Training in progress, step 21500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b28df1971539671747e3a778afbdb5f38bcc6ab00d7766a5153b19f29b78c8d
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9498cb3e3306e8357a032524622b5d26748d8db258d513acaef4c7e389559f2c
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7061f248e70b39002a88a3f1e8979a8a5203e1f6c7433a05e47b1416a56cf81d
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4fcf3c27385ce93621769bdc2dbf10817c3a0a9703e9571bb7778f1cc3f2198
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57271955ed47e1769456cbc5629f14409b6b61904f3f76f733b63102e083bba0
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6135b553c57e576683ea34e2125a95123075dc2da6ac1f60418ada8ca13529ab
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd63ac70e4657463d9ad6dfb25e72c667be965cdfce9d42626f1f00c9c329b89
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3693f6d21626ad0956a90ced9d517a1b2895382ded929533dfbb38ad2b207f63
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.4,
5
  "eval_steps": 500,
6
- "global_step": 21000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1485,6 +1485,41 @@
1485
  "learning_rate": 1.066977777777778e-05,
1486
  "loss": 0.0567,
1487
  "step": 21000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1488
  }
1489
  ],
1490
  "logging_steps": 100,
@@ -1504,7 +1539,7 @@
1504
  "attributes": {}
1505
  }
1506
  },
1507
- "total_flos": 5.115246280704e+16,
1508
  "train_batch_size": 4,
1509
  "trial_name": null,
1510
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.4333333333333333,
5
  "eval_steps": 500,
6
+ "global_step": 21500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1485
  "learning_rate": 1.066977777777778e-05,
1486
  "loss": 0.0567,
1487
  "step": 21000
1488
+ },
1489
+ {
1490
+ "epoch": 1.4066666666666667,
1491
+ "grad_norm": 0.4028960168361664,
1492
+ "learning_rate": 1.0625333333333335e-05,
1493
+ "loss": 0.0558,
1494
+ "step": 21100
1495
+ },
1496
+ {
1497
+ "epoch": 1.4133333333333333,
1498
+ "grad_norm": 0.30364444851875305,
1499
+ "learning_rate": 1.058088888888889e-05,
1500
+ "loss": 0.0588,
1501
+ "step": 21200
1502
+ },
1503
+ {
1504
+ "epoch": 1.42,
1505
+ "grad_norm": 0.34276890754699707,
1506
+ "learning_rate": 1.0536444444444445e-05,
1507
+ "loss": 0.0537,
1508
+ "step": 21300
1509
+ },
1510
+ {
1511
+ "epoch": 1.4266666666666667,
1512
+ "grad_norm": 0.1914118230342865,
1513
+ "learning_rate": 1.0492e-05,
1514
+ "loss": 0.058,
1515
+ "step": 21400
1516
+ },
1517
+ {
1518
+ "epoch": 1.4333333333333333,
1519
+ "grad_norm": 0.3174405097961426,
1520
+ "learning_rate": 1.0447555555555557e-05,
1521
+ "loss": 0.0576,
1522
+ "step": 21500
1523
  }
1524
  ],
1525
  "logging_steps": 100,
 
1539
  "attributes": {}
1540
  }
1541
  },
1542
+ "total_flos": 5.237037858816e+16,
1543
  "train_batch_size": 4,
1544
  "trial_name": null,
1545
  "trial_params": null