rakhman-llm commited on
Commit
d869ecc
·
verified ·
1 Parent(s): f7cfc1b

Training in progress, step 22500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4cf00b77a940691d2a87a25214434a0d48f9864c65fc3b0b19e53bccdbef0226
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0f4dd5c7f06f434e4d8cbac5754e2c65f64bd46ac822a7e90462688dcf30e65
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2faa9161687d4c30fe8c5c851dcaf7629e9d07b563b4b1ca8c6d9fe9490b63c8
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a565ab0875e5f8e974ebb52445ef3770a07896c54aaad2beab4684116687c64d
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92097f0b98245def4fd493d6f93f0c40c3a4e7e4f11939d1bbd3057b8f076619
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4371c359f3f61a0b206a73cb497201135751765261ece06b66d5005fa4a3d9e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a00533389a4678d5ecaa8f25d54446914041611d7541f6bf05750bdc13a462b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c6c8e99b80f348613f137bd980faaac34ec4fa21cbf485269f4beb839740a21
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.4666666666666668,
5
  "eval_steps": 500,
6
- "global_step": 22000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1555,6 +1555,41 @@
1555
  "learning_rate": 1.0225333333333334e-05,
1556
  "loss": 0.0624,
1557
  "step": 22000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1558
  }
1559
  ],
1560
  "logging_steps": 100,
@@ -1574,7 +1609,7 @@
1574
  "attributes": {}
1575
  }
1576
  },
1577
- "total_flos": 5.358829436928e+16,
1578
  "train_batch_size": 4,
1579
  "trial_name": null,
1580
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.5,
5
  "eval_steps": 500,
6
+ "global_step": 22500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1555
  "learning_rate": 1.0225333333333334e-05,
1556
  "loss": 0.0624,
1557
  "step": 22000
1558
+ },
1559
+ {
1560
+ "epoch": 1.4733333333333334,
1561
+ "grad_norm": 0.2268645465373993,
1562
+ "learning_rate": 1.0180888888888889e-05,
1563
+ "loss": 0.0504,
1564
+ "step": 22100
1565
+ },
1566
+ {
1567
+ "epoch": 1.48,
1568
+ "grad_norm": 0.1687782257795334,
1569
+ "learning_rate": 1.0136444444444444e-05,
1570
+ "loss": 0.05,
1571
+ "step": 22200
1572
+ },
1573
+ {
1574
+ "epoch": 1.4866666666666668,
1575
+ "grad_norm": 0.3606027364730835,
1576
+ "learning_rate": 1.0092000000000001e-05,
1577
+ "loss": 0.0558,
1578
+ "step": 22300
1579
+ },
1580
+ {
1581
+ "epoch": 1.4933333333333334,
1582
+ "grad_norm": 0.18675172328948975,
1583
+ "learning_rate": 1.0047555555555558e-05,
1584
+ "loss": 0.058,
1585
+ "step": 22400
1586
+ },
1587
+ {
1588
+ "epoch": 1.5,
1589
+ "grad_norm": 0.29337164759635925,
1590
+ "learning_rate": 1.0003111111111113e-05,
1591
+ "loss": 0.0564,
1592
+ "step": 22500
1593
  }
1594
  ],
1595
  "logging_steps": 100,
 
1609
  "attributes": {}
1610
  }
1611
  },
1612
+ "total_flos": 5.48062101504e+16,
1613
  "train_batch_size": 4,
1614
  "trial_name": null,
1615
  "trial_params": null