rakhman-llm commited on
Commit
074103b
·
verified ·
1 Parent(s): ee389fe

Training in progress, step 17000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:175884cade4e067c9fa3009c5873e488ef4e170548016dcad81294d3815c5e3d
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:688c3b1d28f6b863bee92004557adc364c4b2e5ee4097d484ed3aaba40d50c14
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e366c1ad2c23dec12c48d6136cf496f7476337ed3fe80251ddb60c2daadec8e
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a993357ca1b551b4a094ee2409999ca1a4657c3428fde811fa9954bc1b8b645
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f95e1dd2cd7704ca98aa1bf80c979e7e594652b8372e360c80de457ba4b96d7
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8fa477eb7529f96adee96884288f62ffdf70dbb1416c416ce42fdc1eba5e518
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:edb1f065dac584748e029b7253ed918a3ede6790f2209ff8eedc0176fafb2e83
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57233dc32b29f0dc0010f99ddb975758e3cef5e747558908f115a1d09e80f707
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.1,
5
  "eval_steps": 500,
6
- "global_step": 16500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1170,6 +1170,41 @@
1170
  "learning_rate": 1.2668444444444446e-05,
1171
  "loss": 0.0544,
1172
  "step": 16500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1173
  }
1174
  ],
1175
  "logging_steps": 100,
@@ -1189,7 +1224,7 @@
1189
  "attributes": {}
1190
  }
1191
  },
1192
- "total_flos": 4.019122077696e+16,
1193
  "train_batch_size": 4,
1194
  "trial_name": null,
1195
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.1333333333333333,
5
  "eval_steps": 500,
6
+ "global_step": 17000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1170
  "learning_rate": 1.2668444444444446e-05,
1171
  "loss": 0.0544,
1172
  "step": 16500
1173
+ },
1174
+ {
1175
+ "epoch": 1.1066666666666667,
1176
+ "grad_norm": 0.17457172274589539,
1177
+ "learning_rate": 1.2624444444444446e-05,
1178
+ "loss": 0.0587,
1179
+ "step": 16600
1180
+ },
1181
+ {
1182
+ "epoch": 1.1133333333333333,
1183
+ "grad_norm": 0.1655428558588028,
1184
+ "learning_rate": 1.2580000000000002e-05,
1185
+ "loss": 0.0553,
1186
+ "step": 16700
1187
+ },
1188
+ {
1189
+ "epoch": 1.12,
1190
+ "grad_norm": 0.23929822444915771,
1191
+ "learning_rate": 1.2535555555555557e-05,
1192
+ "loss": 0.0595,
1193
+ "step": 16800
1194
+ },
1195
+ {
1196
+ "epoch": 1.1266666666666667,
1197
+ "grad_norm": 0.1591082215309143,
1198
+ "learning_rate": 1.2491111111111112e-05,
1199
+ "loss": 0.0589,
1200
+ "step": 16900
1201
+ },
1202
+ {
1203
+ "epoch": 1.1333333333333333,
1204
+ "grad_norm": 0.14291773736476898,
1205
+ "learning_rate": 1.2446666666666667e-05,
1206
+ "loss": 0.058,
1207
+ "step": 17000
1208
  }
1209
  ],
1210
  "logging_steps": 100,
 
1224
  "attributes": {}
1225
  }
1226
  },
1227
+ "total_flos": 4.140913655808e+16,
1228
  "train_batch_size": 4,
1229
  "trial_name": null,
1230
  "trial_params": null