rootxhacker commited on
Commit
2082e3a
·
verified ·
1 Parent(s): 117d525

Training in progress, step 26000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2e333303611bdcfe53850b8382bc5c6096b913ec43665a75beb67494ee5bcb0
3
  size 36730224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7380e911b47a593b6e6e294e7827a8de2e4e3f7b1b83607bf62e1583c2d7f7a3
3
  size 36730224
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66d78210b05e1c87c136a3c08a8840809f76d0b17ce40128a18c0fba7403a0f6
3
  size 73588346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7af7e30906067165a3669470320cb653b4ffd33b1d8a610f3716b94a2f716fce
3
  size 73588346
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc69f04c57ac233f68440607df3025c3f527f698c64e56350e0ea45b99be0781
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35c260873e53de579d314af7f23d76fd9a02c023129e1ca7c8f1d3d5f4fae1d0
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92ee59d15723c28ba4726ae8f5244e001c8225a696c80d61ce6d100eb1539722
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60e9205691ce5148ac9b5c101f7716ac13075c3c3735582239bfb45d9a860e75
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cfd4ecc27bf3b1377a8048f15a651b111fa15d35d5db24035804ad1d2f0064c3
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e14edeb9123c5eec7e26cdb8d77f3aaa83054aadc39c2057a183c66b1f00314
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 24500,
3
  "best_metric": 1.4431298971176147,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-24500",
5
- "epoch": 1.9613875855703409,
6
  "eval_steps": 250,
7
- "global_step": 25500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -4394,6 +4394,92 @@
4394
  "eval_samples_per_second": 55.388,
4395
  "eval_steps_per_second": 13.847,
4396
  "step": 25500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4397
  }
4398
  ],
4399
  "logging_steps": 50,
 
2
  "best_global_step": 24500,
3
  "best_metric": 1.4431298971176147,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-24500",
5
+ "epoch": 1.9998461656795632,
6
  "eval_steps": 250,
7
+ "global_step": 26000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
4394
  "eval_samples_per_second": 55.388,
4395
  "eval_steps_per_second": 13.847,
4396
  "step": 25500
4397
+ },
4398
+ {
4399
+ "epoch": 1.9652334435812628,
4400
+ "grad_norm": 1.8318700790405273,
4401
+ "learning_rate": 9.079077975074445e-06,
4402
+ "loss": 1.42,
4403
+ "step": 25550
4404
+ },
4405
+ {
4406
+ "epoch": 1.9690793015921852,
4407
+ "grad_norm": 1.7966841459274292,
4408
+ "learning_rate": 8.821734495055328e-06,
4409
+ "loss": 1.3236,
4410
+ "step": 25600
4411
+ },
4412
+ {
4413
+ "epoch": 1.9729251596031074,
4414
+ "grad_norm": 0.7579635977745056,
4415
+ "learning_rate": 8.564391015036211e-06,
4416
+ "loss": 1.3957,
4417
+ "step": 25650
4418
+ },
4419
+ {
4420
+ "epoch": 1.9767710176140296,
4421
+ "grad_norm": 1.4515990018844604,
4422
+ "learning_rate": 8.307047535017094e-06,
4423
+ "loss": 1.3347,
4424
+ "step": 25700
4425
+ },
4426
+ {
4427
+ "epoch": 1.980616875624952,
4428
+ "grad_norm": 1.5671380758285522,
4429
+ "learning_rate": 8.049704054997977e-06,
4430
+ "loss": 1.4624,
4431
+ "step": 25750
4432
+ },
4433
+ {
4434
+ "epoch": 1.980616875624952,
4435
+ "eval_loss": 1.450337290763855,
4436
+ "eval_runtime": 17.9548,
4437
+ "eval_samples_per_second": 55.695,
4438
+ "eval_steps_per_second": 13.924,
4439
+ "step": 25750
4440
+ },
4441
+ {
4442
+ "epoch": 1.984462733635874,
4443
+ "grad_norm": 1.7020714282989502,
4444
+ "learning_rate": 7.79236057497886e-06,
4445
+ "loss": 1.3822,
4446
+ "step": 25800
4447
+ },
4448
+ {
4449
+ "epoch": 1.9883085916467964,
4450
+ "grad_norm": 1.297658920288086,
4451
+ "learning_rate": 7.535017094959743e-06,
4452
+ "loss": 1.4008,
4453
+ "step": 25850
4454
+ },
4455
+ {
4456
+ "epoch": 1.9921544496577186,
4457
+ "grad_norm": 1.8151623010635376,
4458
+ "learning_rate": 7.277673614940627e-06,
4459
+ "loss": 1.4408,
4460
+ "step": 25900
4461
+ },
4462
+ {
4463
+ "epoch": 1.9960003076686408,
4464
+ "grad_norm": 0.8869682550430298,
4465
+ "learning_rate": 7.02033013492151e-06,
4466
+ "loss": 1.4767,
4467
+ "step": 25950
4468
+ },
4469
+ {
4470
+ "epoch": 1.9998461656795632,
4471
+ "grad_norm": 1.898775339126587,
4472
+ "learning_rate": 6.762986654902392e-06,
4473
+ "loss": 1.5032,
4474
+ "step": 26000
4475
+ },
4476
+ {
4477
+ "epoch": 1.9998461656795632,
4478
+ "eval_loss": 1.4542045593261719,
4479
+ "eval_runtime": 18.0059,
4480
+ "eval_samples_per_second": 55.537,
4481
+ "eval_steps_per_second": 13.884,
4482
+ "step": 26000
4483
  }
4484
  ],
4485
  "logging_steps": 50,