rootxhacker commited on
Commit
d2b62b1
·
verified ·
1 Parent(s): f0dfec2

Training in progress, step 25000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9bc54f61ae85f9bb8331e1cdd5923e3fd960989060b059e73b0d0e134dae9e8f
3
  size 36730224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6635748f5efd9f5844c8d4b5d5db81b7d2ecb035e0b0b74854654c135879fa40
3
  size 36730224
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35291a9f57ededb6486e5a5291f2f75b65ad3ee7378c172fe3cff039858844f4
3
  size 73588346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad541eebf67a51f73b18019a22769780ac860d88ba5ea0fa90f19a9d6cea9ec7
3
  size 73588346
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6abd220bb5c699b08784d9e5bd7e4f3c387ae6cf3a2fc509bcb49366bfaee15
3
- size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4a89f558d7b243194fee72d6e3b515995ba03b6bdbb2a1ac92b9b326a12ba3e
3
+ size 14308
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ead8696fbf0049adb3c84fc53d3c6dc113682fab1d3e945183397e530adbbee
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a37acbb5f6130f7db2eedbc963d0b24fea22de7a0acc314ee6d2189d860dec7
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d220408f294f0cc03e84f4b2538adfda082b3a1de023a6d0f995deed07fa75a2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32c12577cf6cc4241331592a413ca96d437bc677a535a9ee4eb64f282b42fe2f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 24500,
3
  "best_metric": 1.4431298971176147,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-24500",
5
- "epoch": 1.884470425351896,
6
  "eval_steps": 250,
7
- "global_step": 24500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -4222,6 +4222,92 @@
4222
  "eval_samples_per_second": 55.613,
4223
  "eval_steps_per_second": 13.903,
4224
  "step": 24500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4225
  }
4226
  ],
4227
  "logging_steps": 50,
 
2
  "best_global_step": 24500,
3
  "best_metric": 1.4431298971176147,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-24500",
5
+ "epoch": 1.9229290054611183,
6
  "eval_steps": 250,
7
+ "global_step": 25000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
4222
  "eval_samples_per_second": 55.613,
4223
  "eval_steps_per_second": 13.903,
4224
  "step": 24500
4225
+ },
4226
+ {
4227
+ "epoch": 1.8883162833628182,
4228
+ "grad_norm": 1.8027464151382446,
4229
+ "learning_rate": 1.4220800705856402e-05,
4230
+ "loss": 1.4855,
4231
+ "step": 24550
4232
+ },
4233
+ {
4234
+ "epoch": 1.8921621413737406,
4235
+ "grad_norm": 1.139756679534912,
4236
+ "learning_rate": 1.3963457225837285e-05,
4237
+ "loss": 1.3773,
4238
+ "step": 24600
4239
+ },
4240
+ {
4241
+ "epoch": 1.8960079993846626,
4242
+ "grad_norm": 1.377536654472351,
4243
+ "learning_rate": 1.3706113745818166e-05,
4244
+ "loss": 1.4274,
4245
+ "step": 24650
4246
+ },
4247
+ {
4248
+ "epoch": 1.899853857395585,
4249
+ "grad_norm": 1.2132219076156616,
4250
+ "learning_rate": 1.3448770265799051e-05,
4251
+ "loss": 1.3772,
4252
+ "step": 24700
4253
+ },
4254
+ {
4255
+ "epoch": 1.9036997154065072,
4256
+ "grad_norm": 1.7106857299804688,
4257
+ "learning_rate": 1.3191426785779932e-05,
4258
+ "loss": 1.41,
4259
+ "step": 24750
4260
+ },
4261
+ {
4262
+ "epoch": 1.9036997154065072,
4263
+ "eval_loss": 1.472328782081604,
4264
+ "eval_runtime": 18.0789,
4265
+ "eval_samples_per_second": 55.313,
4266
+ "eval_steps_per_second": 13.828,
4267
+ "step": 24750
4268
+ },
4269
+ {
4270
+ "epoch": 1.9075455734174294,
4271
+ "grad_norm": 0.9809736013412476,
4272
+ "learning_rate": 1.2939230175361197e-05,
4273
+ "loss": 1.4547,
4274
+ "step": 24800
4275
+ },
4276
+ {
4277
+ "epoch": 1.9113914314283518,
4278
+ "grad_norm": 1.476722240447998,
4279
+ "learning_rate": 1.2681886695342082e-05,
4280
+ "loss": 1.4546,
4281
+ "step": 24850
4282
+ },
4283
+ {
4284
+ "epoch": 1.9152372894392737,
4285
+ "grad_norm": 2.078511953353882,
4286
+ "learning_rate": 1.2424543215322965e-05,
4287
+ "loss": 1.4971,
4288
+ "step": 24900
4289
+ },
4290
+ {
4291
+ "epoch": 1.9190831474501961,
4292
+ "grad_norm": 0.7233028411865234,
4293
+ "learning_rate": 1.2167199735303847e-05,
4294
+ "loss": 1.3622,
4295
+ "step": 24950
4296
+ },
4297
+ {
4298
+ "epoch": 1.9229290054611183,
4299
+ "grad_norm": 1.3686310052871704,
4300
+ "learning_rate": 1.1909856255284731e-05,
4301
+ "loss": 1.5232,
4302
+ "step": 25000
4303
+ },
4304
+ {
4305
+ "epoch": 1.9229290054611183,
4306
+ "eval_loss": 1.461082935333252,
4307
+ "eval_runtime": 18.2695,
4308
+ "eval_samples_per_second": 54.736,
4309
+ "eval_steps_per_second": 13.684,
4310
+ "step": 25000
4311
  }
4312
  ],
4313
  "logging_steps": 50,