rakhman-llm commited on
Commit
05d772f
·
verified ·
1 Parent(s): d5ca595

Training in progress, step 22000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9498cb3e3306e8357a032524622b5d26748d8db258d513acaef4c7e389559f2c
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cf00b77a940691d2a87a25214434a0d48f9864c65fc3b0b19e53bccdbef0226
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b4fcf3c27385ce93621769bdc2dbf10817c3a0a9703e9571bb7778f1cc3f2198
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2faa9161687d4c30fe8c5c851dcaf7629e9d07b563b4b1ca8c6d9fe9490b63c8
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6135b553c57e576683ea34e2125a95123075dc2da6ac1f60418ada8ca13529ab
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92097f0b98245def4fd493d6f93f0c40c3a4e7e4f11939d1bbd3057b8f076619
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3693f6d21626ad0956a90ced9d517a1b2895382ded929533dfbb38ad2b207f63
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a00533389a4678d5ecaa8f25d54446914041611d7541f6bf05750bdc13a462b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.4333333333333333,
5
  "eval_steps": 500,
6
- "global_step": 21500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1520,6 +1520,41 @@
1520
  "learning_rate": 1.0447555555555557e-05,
1521
  "loss": 0.0576,
1522
  "step": 21500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1523
  }
1524
  ],
1525
  "logging_steps": 100,
@@ -1539,7 +1574,7 @@
1539
  "attributes": {}
1540
  }
1541
  },
1542
- "total_flos": 5.237037858816e+16,
1543
  "train_batch_size": 4,
1544
  "trial_name": null,
1545
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.4666666666666668,
5
  "eval_steps": 500,
6
+ "global_step": 22000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1520
  "learning_rate": 1.0447555555555557e-05,
1521
  "loss": 0.0576,
1522
  "step": 21500
1523
+ },
1524
+ {
1525
+ "epoch": 1.44,
1526
+ "grad_norm": 0.1996719092130661,
1527
+ "learning_rate": 1.0403111111111113e-05,
1528
+ "loss": 0.0569,
1529
+ "step": 21600
1530
+ },
1531
+ {
1532
+ "epoch": 1.4466666666666668,
1533
+ "grad_norm": 0.2424512505531311,
1534
+ "learning_rate": 1.0358666666666668e-05,
1535
+ "loss": 0.0592,
1536
+ "step": 21700
1537
+ },
1538
+ {
1539
+ "epoch": 1.4533333333333334,
1540
+ "grad_norm": 0.139984592795372,
1541
+ "learning_rate": 1.0314222222222223e-05,
1542
+ "loss": 0.0541,
1543
+ "step": 21800
1544
+ },
1545
+ {
1546
+ "epoch": 1.46,
1547
+ "grad_norm": 0.21083594858646393,
1548
+ "learning_rate": 1.0269777777777778e-05,
1549
+ "loss": 0.0509,
1550
+ "step": 21900
1551
+ },
1552
+ {
1553
+ "epoch": 1.4666666666666668,
1554
+ "grad_norm": 0.2649674713611603,
1555
+ "learning_rate": 1.0225333333333334e-05,
1556
+ "loss": 0.0624,
1557
+ "step": 22000
1558
  }
1559
  ],
1560
  "logging_steps": 100,
 
1574
  "attributes": {}
1575
  }
1576
  },
1577
+ "total_flos": 5.358829436928e+16,
1578
  "train_batch_size": 4,
1579
  "trial_name": null,
1580
  "trial_params": null