rootxhacker commited on
Commit
d0c331f
·
verified ·
1 Parent(s): ee09753

Training in progress, step 25500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6635748f5efd9f5844c8d4b5d5db81b7d2ecb035e0b0b74854654c135879fa40
3
  size 36730224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2e333303611bdcfe53850b8382bc5c6096b913ec43665a75beb67494ee5bcb0
3
  size 36730224
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad541eebf67a51f73b18019a22769780ac860d88ba5ea0fa90f19a9d6cea9ec7
3
  size 73588346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66d78210b05e1c87c136a3c08a8840809f76d0b17ce40128a18c0fba7403a0f6
3
  size 73588346
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4a89f558d7b243194fee72d6e3b515995ba03b6bdbb2a1ac92b9b326a12ba3e
3
- size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc69f04c57ac233f68440607df3025c3f527f698c64e56350e0ea45b99be0781
3
+ size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a37acbb5f6130f7db2eedbc963d0b24fea22de7a0acc314ee6d2189d860dec7
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92ee59d15723c28ba4726ae8f5244e001c8225a696c80d61ce6d100eb1539722
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32c12577cf6cc4241331592a413ca96d437bc677a535a9ee4eb64f282b42fe2f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfd4ecc27bf3b1377a8048f15a651b111fa15d35d5db24035804ad1d2f0064c3
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 24500,
3
  "best_metric": 1.4431298971176147,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-24500",
5
- "epoch": 1.9229290054611183,
6
  "eval_steps": 250,
7
- "global_step": 25000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -4308,6 +4308,92 @@
4308
  "eval_samples_per_second": 54.736,
4309
  "eval_steps_per_second": 13.684,
4310
  "step": 25000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4311
  }
4312
  ],
4313
  "logging_steps": 50,
 
2
  "best_global_step": 24500,
3
  "best_metric": 1.4431298971176147,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-24500",
5
+ "epoch": 1.9613875855703409,
6
  "eval_steps": 250,
7
+ "global_step": 25500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
4308
  "eval_samples_per_second": 54.736,
4309
  "eval_steps_per_second": 13.684,
4310
  "step": 25000
4311
+ },
4312
+ {
4313
+ "epoch": 1.9267748634720405,
4314
+ "grad_norm": 1.1179672479629517,
4315
+ "learning_rate": 1.1652512775265614e-05,
4316
+ "loss": 1.5076,
4317
+ "step": 25050
4318
+ },
4319
+ {
4320
+ "epoch": 1.930620721482963,
4321
+ "grad_norm": 0.9407248497009277,
4322
+ "learning_rate": 1.1395169295246496e-05,
4323
+ "loss": 1.468,
4324
+ "step": 25100
4325
+ },
4326
+ {
4327
+ "epoch": 1.9344665794938851,
4328
+ "grad_norm": 1.498488426208496,
4329
+ "learning_rate": 1.113782581522738e-05,
4330
+ "loss": 1.4566,
4331
+ "step": 25150
4332
+ },
4333
+ {
4334
+ "epoch": 1.9383124375048073,
4335
+ "grad_norm": 0.6983101963996887,
4336
+ "learning_rate": 1.0880482335208264e-05,
4337
+ "loss": 1.4621,
4338
+ "step": 25200
4339
+ },
4340
+ {
4341
+ "epoch": 1.9421582955157297,
4342
+ "grad_norm": 1.954953908920288,
4343
+ "learning_rate": 1.0623138855189145e-05,
4344
+ "loss": 1.417,
4345
+ "step": 25250
4346
+ },
4347
+ {
4348
+ "epoch": 1.9421582955157297,
4349
+ "eval_loss": 1.4591727256774902,
4350
+ "eval_runtime": 18.0732,
4351
+ "eval_samples_per_second": 55.331,
4352
+ "eval_steps_per_second": 13.833,
4353
+ "step": 25250
4354
+ },
4355
+ {
4356
+ "epoch": 1.9460041535266517,
4357
+ "grad_norm": 1.6467170715332031,
4358
+ "learning_rate": 1.036579537517003e-05,
4359
+ "loss": 1.4942,
4360
+ "step": 25300
4361
+ },
4362
+ {
4363
+ "epoch": 1.949850011537574,
4364
+ "grad_norm": 1.4509849548339844,
4365
+ "learning_rate": 1.0108451895150913e-05,
4366
+ "loss": 1.4539,
4367
+ "step": 25350
4368
+ },
4369
+ {
4370
+ "epoch": 1.9536958695484963,
4371
+ "grad_norm": 1.6131352186203003,
4372
+ "learning_rate": 9.851108415131796e-06,
4373
+ "loss": 1.3993,
4374
+ "step": 25400
4375
+ },
4376
+ {
4377
+ "epoch": 1.9575417275594185,
4378
+ "grad_norm": 1.880043387413025,
4379
+ "learning_rate": 9.593764935112679e-06,
4380
+ "loss": 1.4449,
4381
+ "step": 25450
4382
+ },
4383
+ {
4384
+ "epoch": 1.9613875855703409,
4385
+ "grad_norm": 1.3041406869888306,
4386
+ "learning_rate": 9.336421455093562e-06,
4387
+ "loss": 1.4918,
4388
+ "step": 25500
4389
+ },
4390
+ {
4391
+ "epoch": 1.9613875855703409,
4392
+ "eval_loss": 1.4548134803771973,
4393
+ "eval_runtime": 18.0544,
4394
+ "eval_samples_per_second": 55.388,
4395
+ "eval_steps_per_second": 13.847,
4396
+ "step": 25500
4397
  }
4398
  ],
4399
  "logging_steps": 50,