rakhman-llm commited on
Commit
18e40ac
·
verified ·
1 Parent(s): 567f7f1

Training in progress, step 19000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bfa8b32014bbb1116102bb096f351b477c375f02a76f56941cd43b6b2b8c9ae0
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:935e2f81b626e71b298466c51f498096f378414ac276b4d4ea19f4650105a4cf
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:24c06d2ce08030ea9353adcf0c618c2748ba4afafcd7e2ef1fa5088554e20156
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b16c83dc7260ee2348e5de945bd725aa12ab974c28241b500cd256e88062c5e
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40162c830f5df6b739cc1f24fd1ff7e4f55f1a27766e6deeff7911f41b300f3c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46737d1e5768cf0de571a9bd47793403b2513f0c1ee54de55bfe17b1b1fdc49a
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b23d12d924a2c5ac71a2463338c282c759dd6e8f289f7165dd510cc1f6cd61fa
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db8de303a11187ec0581f91507bd8428f995b03d53d2e944a4543899602e0467
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.2333333333333334,
5
  "eval_steps": 500,
6
- "global_step": 18500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1310,6 +1310,41 @@
1310
  "learning_rate": 1.178e-05,
1311
  "loss": 0.0657,
1312
  "step": 18500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1313
  }
1314
  ],
1315
  "logging_steps": 100,
@@ -1329,7 +1364,7 @@
1329
  "attributes": {}
1330
  }
1331
  },
1332
- "total_flos": 4.506288390144e+16,
1333
  "train_batch_size": 4,
1334
  "trial_name": null,
1335
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.2666666666666666,
5
  "eval_steps": 500,
6
+ "global_step": 19000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1310
  "learning_rate": 1.178e-05,
1311
  "loss": 0.0657,
1312
  "step": 18500
1313
+ },
1314
+ {
1315
+ "epoch": 1.24,
1316
+ "grad_norm": 0.21437957882881165,
1317
+ "learning_rate": 1.1735555555555556e-05,
1318
+ "loss": 0.0631,
1319
+ "step": 18600
1320
+ },
1321
+ {
1322
+ "epoch": 1.2466666666666666,
1323
+ "grad_norm": 0.09303513169288635,
1324
+ "learning_rate": 1.1691555555555556e-05,
1325
+ "loss": 0.0594,
1326
+ "step": 18700
1327
+ },
1328
+ {
1329
+ "epoch": 1.2533333333333334,
1330
+ "grad_norm": 0.13789591193199158,
1331
+ "learning_rate": 1.1647111111111111e-05,
1332
+ "loss": 0.0644,
1333
+ "step": 18800
1334
+ },
1335
+ {
1336
+ "epoch": 1.26,
1337
+ "grad_norm": 0.19540788233280182,
1338
+ "learning_rate": 1.1602666666666666e-05,
1339
+ "loss": 0.0564,
1340
+ "step": 18900
1341
+ },
1342
+ {
1343
+ "epoch": 1.2666666666666666,
1344
+ "grad_norm": 0.18746204674243927,
1345
+ "learning_rate": 1.1558222222222223e-05,
1346
+ "loss": 0.0581,
1347
+ "step": 19000
1348
  }
1349
  ],
1350
  "logging_steps": 100,
 
1364
  "attributes": {}
1365
  }
1366
  },
1367
+ "total_flos": 4.628079968256e+16,
1368
  "train_batch_size": 4,
1369
  "trial_name": null,
1370
  "trial_params": null