rakhman-llm commited on
Commit
f94482a
·
verified ·
1 Parent(s): b11002c

Training in progress, step 21000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c86189a77f037cbdd789ce6756a983f35d8424562557bec70f1d6f9478782956
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b28df1971539671747e3a778afbdb5f38bcc6ab00d7766a5153b19f29b78c8d
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f3b9b0a558aef18a5c43688fb84bceaf30915a8e0f42a94e913404b25ec6988
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7061f248e70b39002a88a3f1e8979a8a5203e1f6c7433a05e47b1416a56cf81d
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1cb24b28fd79d1b2e08e97a1214134b31144058db4370cb477a455701ee578a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57271955ed47e1769456cbc5629f14409b6b61904f3f76f733b63102e083bba0
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:02d6f63efc0f911cd45ec87b983b2cf047da0e86188af05e21eda256bea09b7e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd63ac70e4657463d9ad6dfb25e72c667be965cdfce9d42626f1f00c9c329b89
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.3666666666666667,
5
  "eval_steps": 500,
6
- "global_step": 20500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1450,6 +1450,41 @@
1450
  "learning_rate": 1.0891555555555557e-05,
1451
  "loss": 0.0609,
1452
  "step": 20500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1453
  }
1454
  ],
1455
  "logging_steps": 100,
@@ -1469,7 +1504,7 @@
1469
  "attributes": {}
1470
  }
1471
  },
1472
- "total_flos": 4.993454702592e+16,
1473
  "train_batch_size": 4,
1474
  "trial_name": null,
1475
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.4,
5
  "eval_steps": 500,
6
+ "global_step": 21000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1450
  "learning_rate": 1.0891555555555557e-05,
1451
  "loss": 0.0609,
1452
  "step": 20500
1453
+ },
1454
+ {
1455
+ "epoch": 1.3733333333333333,
1456
+ "grad_norm": 0.21939997375011444,
1457
+ "learning_rate": 1.0847111111111112e-05,
1458
+ "loss": 0.0612,
1459
+ "step": 20600
1460
+ },
1461
+ {
1462
+ "epoch": 1.38,
1463
+ "grad_norm": 0.23522862792015076,
1464
+ "learning_rate": 1.0802666666666667e-05,
1465
+ "loss": 0.0555,
1466
+ "step": 20700
1467
+ },
1468
+ {
1469
+ "epoch": 1.3866666666666667,
1470
+ "grad_norm": 0.1318577080965042,
1471
+ "learning_rate": 1.0758222222222223e-05,
1472
+ "loss": 0.0608,
1473
+ "step": 20800
1474
+ },
1475
+ {
1476
+ "epoch": 1.3933333333333333,
1477
+ "grad_norm": 0.11281125992536545,
1478
+ "learning_rate": 1.0714222222222224e-05,
1479
+ "loss": 0.0579,
1480
+ "step": 20900
1481
+ },
1482
+ {
1483
+ "epoch": 1.4,
1484
+ "grad_norm": 0.15145792067050934,
1485
+ "learning_rate": 1.066977777777778e-05,
1486
+ "loss": 0.0567,
1487
+ "step": 21000
1488
  }
1489
  ],
1490
  "logging_steps": 100,
 
1504
  "attributes": {}
1505
  }
1506
  },
1507
+ "total_flos": 5.115246280704e+16,
1508
  "train_batch_size": 4,
1509
  "trial_name": null,
1510
  "trial_params": null