Training in progress, step 25500, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 36730224
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b2e333303611bdcfe53850b8382bc5c6096b913ec43665a75beb67494ee5bcb0
|
3 |
size 36730224
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 73588346
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:66d78210b05e1c87c136a3c08a8840809f76d0b17ce40128a18c0fba7403a0f6
|
3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc69f04c57ac233f68440607df3025c3f527f698c64e56350e0ea45b99be0781
|
3 |
+
size 14244
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:92ee59d15723c28ba4726ae8f5244e001c8225a696c80d61ce6d100eb1539722
|
3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cfd4ecc27bf3b1377a8048f15a651b111fa15d35d5db24035804ad1d2f0064c3
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": 24500,
|
3 |
"best_metric": 1.4431298971176147,
|
4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-24500",
|
5 |
-
"epoch": 1.
|
6 |
"eval_steps": 250,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -4308,6 +4308,92 @@
|
|
4308 |
"eval_samples_per_second": 54.736,
|
4309 |
"eval_steps_per_second": 13.684,
|
4310 |
"step": 25000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4311 |
}
|
4312 |
],
|
4313 |
"logging_steps": 50,
|
|
|
2 |
"best_global_step": 24500,
|
3 |
"best_metric": 1.4431298971176147,
|
4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-24500",
|
5 |
+
"epoch": 1.9613875855703409,
|
6 |
"eval_steps": 250,
|
7 |
+
"global_step": 25500,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
4308 |
"eval_samples_per_second": 54.736,
|
4309 |
"eval_steps_per_second": 13.684,
|
4310 |
"step": 25000
|
4311 |
+
},
|
4312 |
+
{
|
4313 |
+
"epoch": 1.9267748634720405,
|
4314 |
+
"grad_norm": 1.1179672479629517,
|
4315 |
+
"learning_rate": 1.1652512775265614e-05,
|
4316 |
+
"loss": 1.5076,
|
4317 |
+
"step": 25050
|
4318 |
+
},
|
4319 |
+
{
|
4320 |
+
"epoch": 1.930620721482963,
|
4321 |
+
"grad_norm": 0.9407248497009277,
|
4322 |
+
"learning_rate": 1.1395169295246496e-05,
|
4323 |
+
"loss": 1.468,
|
4324 |
+
"step": 25100
|
4325 |
+
},
|
4326 |
+
{
|
4327 |
+
"epoch": 1.9344665794938851,
|
4328 |
+
"grad_norm": 1.498488426208496,
|
4329 |
+
"learning_rate": 1.113782581522738e-05,
|
4330 |
+
"loss": 1.4566,
|
4331 |
+
"step": 25150
|
4332 |
+
},
|
4333 |
+
{
|
4334 |
+
"epoch": 1.9383124375048073,
|
4335 |
+
"grad_norm": 0.6983101963996887,
|
4336 |
+
"learning_rate": 1.0880482335208264e-05,
|
4337 |
+
"loss": 1.4621,
|
4338 |
+
"step": 25200
|
4339 |
+
},
|
4340 |
+
{
|
4341 |
+
"epoch": 1.9421582955157297,
|
4342 |
+
"grad_norm": 1.954953908920288,
|
4343 |
+
"learning_rate": 1.0623138855189145e-05,
|
4344 |
+
"loss": 1.417,
|
4345 |
+
"step": 25250
|
4346 |
+
},
|
4347 |
+
{
|
4348 |
+
"epoch": 1.9421582955157297,
|
4349 |
+
"eval_loss": 1.4591727256774902,
|
4350 |
+
"eval_runtime": 18.0732,
|
4351 |
+
"eval_samples_per_second": 55.331,
|
4352 |
+
"eval_steps_per_second": 13.833,
|
4353 |
+
"step": 25250
|
4354 |
+
},
|
4355 |
+
{
|
4356 |
+
"epoch": 1.9460041535266517,
|
4357 |
+
"grad_norm": 1.6467170715332031,
|
4358 |
+
"learning_rate": 1.036579537517003e-05,
|
4359 |
+
"loss": 1.4942,
|
4360 |
+
"step": 25300
|
4361 |
+
},
|
4362 |
+
{
|
4363 |
+
"epoch": 1.949850011537574,
|
4364 |
+
"grad_norm": 1.4509849548339844,
|
4365 |
+
"learning_rate": 1.0108451895150913e-05,
|
4366 |
+
"loss": 1.4539,
|
4367 |
+
"step": 25350
|
4368 |
+
},
|
4369 |
+
{
|
4370 |
+
"epoch": 1.9536958695484963,
|
4371 |
+
"grad_norm": 1.6131352186203003,
|
4372 |
+
"learning_rate": 9.851108415131796e-06,
|
4373 |
+
"loss": 1.3993,
|
4374 |
+
"step": 25400
|
4375 |
+
},
|
4376 |
+
{
|
4377 |
+
"epoch": 1.9575417275594185,
|
4378 |
+
"grad_norm": 1.880043387413025,
|
4379 |
+
"learning_rate": 9.593764935112679e-06,
|
4380 |
+
"loss": 1.4449,
|
4381 |
+
"step": 25450
|
4382 |
+
},
|
4383 |
+
{
|
4384 |
+
"epoch": 1.9613875855703409,
|
4385 |
+
"grad_norm": 1.3041406869888306,
|
4386 |
+
"learning_rate": 9.336421455093562e-06,
|
4387 |
+
"loss": 1.4918,
|
4388 |
+
"step": 25500
|
4389 |
+
},
|
4390 |
+
{
|
4391 |
+
"epoch": 1.9613875855703409,
|
4392 |
+
"eval_loss": 1.4548134803771973,
|
4393 |
+
"eval_runtime": 18.0544,
|
4394 |
+
"eval_samples_per_second": 55.388,
|
4395 |
+
"eval_steps_per_second": 13.847,
|
4396 |
+
"step": 25500
|
4397 |
}
|
4398 |
],
|
4399 |
"logging_steps": 50,
|