Training in progress, step 26500, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 36730224
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f2e93e89fb68bb8962ff13343b1f03461f74663e88695cc877535d81fccd21cd
|
3 |
size 36730224
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 73588346
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06161b85e01debd263697b27f956188143b84ef8f31f2d7a79af45d05330fb3b
|
3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b56501523df118c1a33e60d970ee258e92691efddadd68cb368e352ca4fb0c1
|
3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:76592baa9e3b0e3d15e021e247d3cfa4915cd052c2c669b30b628ff835c5a245
|
3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3d9b1af634bbca91339a4e0183f53f86b46f1f5a7d978b27638787d68fcb88bd
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": 24500,
|
3 |
"best_metric": 1.4431298971176147,
|
4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-24500",
|
5 |
-
"epoch":
|
6 |
"eval_steps": 250,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -4480,6 +4480,92 @@
|
|
4480 |
"eval_samples_per_second": 55.537,
|
4481 |
"eval_steps_per_second": 13.884,
|
4482 |
"step": 26000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4483 |
}
|
4484 |
],
|
4485 |
"logging_steps": 50,
|
|
|
2 |
"best_global_step": 24500,
|
3 |
"best_metric": 1.4431298971176147,
|
4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-24500",
|
5 |
+
"epoch": 2.0383047457887855,
|
6 |
"eval_steps": 250,
|
7 |
+
"global_step": 26500,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
4480 |
"eval_samples_per_second": 55.537,
|
4481 |
"eval_steps_per_second": 13.884,
|
4482 |
"step": 26000
|
4483 |
+
},
|
4484 |
+
{
|
4485 |
+
"epoch": 2.003692023690485,
|
4486 |
+
"grad_norm": 1.7356750965118408,
|
4487 |
+
"learning_rate": 6.505643174883276e-06,
|
4488 |
+
"loss": 1.3839,
|
4489 |
+
"step": 26050
|
4490 |
+
},
|
4491 |
+
{
|
4492 |
+
"epoch": 2.0075378817014076,
|
4493 |
+
"grad_norm": 2.3067352771759033,
|
4494 |
+
"learning_rate": 6.248299694864159e-06,
|
4495 |
+
"loss": 1.4348,
|
4496 |
+
"step": 26100
|
4497 |
+
},
|
4498 |
+
{
|
4499 |
+
"epoch": 2.01138373971233,
|
4500 |
+
"grad_norm": 1.343248724937439,
|
4501 |
+
"learning_rate": 5.990956214845041e-06,
|
4502 |
+
"loss": 1.3703,
|
4503 |
+
"step": 26150
|
4504 |
+
},
|
4505 |
+
{
|
4506 |
+
"epoch": 2.015229597723252,
|
4507 |
+
"grad_norm": 1.9424471855163574,
|
4508 |
+
"learning_rate": 5.733612734825925e-06,
|
4509 |
+
"loss": 1.4304,
|
4510 |
+
"step": 26200
|
4511 |
+
},
|
4512 |
+
{
|
4513 |
+
"epoch": 2.0190754557341744,
|
4514 |
+
"grad_norm": 1.5383673906326294,
|
4515 |
+
"learning_rate": 5.476269254806808e-06,
|
4516 |
+
"loss": 1.4118,
|
4517 |
+
"step": 26250
|
4518 |
+
},
|
4519 |
+
{
|
4520 |
+
"epoch": 2.0190754557341744,
|
4521 |
+
"eval_loss": 1.474881649017334,
|
4522 |
+
"eval_runtime": 18.1751,
|
4523 |
+
"eval_samples_per_second": 55.02,
|
4524 |
+
"eval_steps_per_second": 13.755,
|
4525 |
+
"step": 26250
|
4526 |
+
},
|
4527 |
+
{
|
4528 |
+
"epoch": 2.0229213137450963,
|
4529 |
+
"grad_norm": 1.803488850593567,
|
4530 |
+
"learning_rate": 5.2189257747876905e-06,
|
4531 |
+
"loss": 1.4537,
|
4532 |
+
"step": 26300
|
4533 |
+
},
|
4534 |
+
{
|
4535 |
+
"epoch": 2.0267671717560187,
|
4536 |
+
"grad_norm": 1.8623336553573608,
|
4537 |
+
"learning_rate": 4.961582294768574e-06,
|
4538 |
+
"loss": 1.3659,
|
4539 |
+
"step": 26350
|
4540 |
+
},
|
4541 |
+
{
|
4542 |
+
"epoch": 2.030613029766941,
|
4543 |
+
"grad_norm": 1.1901572942733765,
|
4544 |
+
"learning_rate": 4.7042388147494575e-06,
|
4545 |
+
"loss": 1.4175,
|
4546 |
+
"step": 26400
|
4547 |
+
},
|
4548 |
+
{
|
4549 |
+
"epoch": 2.034458887777863,
|
4550 |
+
"grad_norm": 1.2967520952224731,
|
4551 |
+
"learning_rate": 4.4468953347303406e-06,
|
4552 |
+
"loss": 1.458,
|
4553 |
+
"step": 26450
|
4554 |
+
},
|
4555 |
+
{
|
4556 |
+
"epoch": 2.0383047457887855,
|
4557 |
+
"grad_norm": 1.2987436056137085,
|
4558 |
+
"learning_rate": 4.189551854711224e-06,
|
4559 |
+
"loss": 1.3965,
|
4560 |
+
"step": 26500
|
4561 |
+
},
|
4562 |
+
{
|
4563 |
+
"epoch": 2.0383047457887855,
|
4564 |
+
"eval_loss": 1.4528058767318726,
|
4565 |
+
"eval_runtime": 18.2495,
|
4566 |
+
"eval_samples_per_second": 54.796,
|
4567 |
+
"eval_steps_per_second": 13.699,
|
4568 |
+
"step": 26500
|
4569 |
}
|
4570 |
],
|
4571 |
"logging_steps": 50,
|