Training in progress, step 37500, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 36730224
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:23dcc181cec0e10d847e208901d6626875ec6b62c6c0f26d433995f3783e85d0
|
3 |
size 36730224
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1544
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7432e9c2d93150abfd716acce42331362afc030b8c095403869cbf1291225746
|
3 |
size 1544
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 73588346
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b8434a82bb738c0cecb4adf19f6cc16b50060235313456c478572cc44126b33
|
3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9c5a317d14f0c4fea42d2b4ca5f020a67142d647545709c1c9006f254a7d7349
|
3 |
+
size 14308
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f4d13c8e281c1623531d4d497478b975ea082917bb2c61878d98cb5fb27c6af9
|
3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:23ce717bb1ff7d19ce8b39673c5e006d14b3fec124190d834c88a63ab05da6d0
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"best_global_step": 36750,
|
3 |
"best_metric": 0.9847651720046997,
|
4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-34000",
|
5 |
-
"epoch": 2.
|
6 |
"eval_steps": 250,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -6372,6 +6372,92 @@
|
|
6372 |
"eval_samples_per_second": 57.868,
|
6373 |
"eval_steps_per_second": 14.467,
|
6374 |
"step": 37000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6375 |
}
|
6376 |
],
|
6377 |
"logging_steps": 50,
|
|
|
2 |
"best_global_step": 36750,
|
3 |
"best_metric": 0.9847651720046997,
|
4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-34000",
|
5 |
+
"epoch": 2.8843935081916774,
|
6 |
"eval_steps": 250,
|
7 |
+
"global_step": 37500,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
6372 |
"eval_samples_per_second": 57.868,
|
6373 |
"eval_steps_per_second": 14.467,
|
6374 |
"step": 37000
|
6375 |
+
},
|
6376 |
+
{
|
6377 |
+
"epoch": 2.8497807860933775,
|
6378 |
+
"grad_norm": 1.3013421297073364,
|
6379 |
+
"learning_rate": 1.0227774459133055e-05,
|
6380 |
+
"loss": 0.9717,
|
6381 |
+
"step": 37050
|
6382 |
+
},
|
6383 |
+
{
|
6384 |
+
"epoch": 2.8536266441043,
|
6385 |
+
"grad_norm": 1.263071060180664,
|
6386 |
+
"learning_rate": 9.968054437316573e-06,
|
6387 |
+
"loss": 1.0259,
|
6388 |
+
"step": 37100
|
6389 |
+
},
|
6390 |
+
{
|
6391 |
+
"epoch": 2.857472502115222,
|
6392 |
+
"grad_norm": 1.1513851881027222,
|
6393 |
+
"learning_rate": 9.70833441550009e-06,
|
6394 |
+
"loss": 1.0015,
|
6395 |
+
"step": 37150
|
6396 |
+
},
|
6397 |
+
{
|
6398 |
+
"epoch": 2.8613183601261443,
|
6399 |
+
"grad_norm": 0.7431422472000122,
|
6400 |
+
"learning_rate": 9.448614393683609e-06,
|
6401 |
+
"loss": 0.999,
|
6402 |
+
"step": 37200
|
6403 |
+
},
|
6404 |
+
{
|
6405 |
+
"epoch": 2.8651642181370662,
|
6406 |
+
"grad_norm": 0.6744217872619629,
|
6407 |
+
"learning_rate": 9.188894371867128e-06,
|
6408 |
+
"loss": 0.9285,
|
6409 |
+
"step": 37250
|
6410 |
+
},
|
6411 |
+
{
|
6412 |
+
"epoch": 2.8651642181370662,
|
6413 |
+
"eval_loss": 0.994976282119751,
|
6414 |
+
"eval_runtime": 17.3294,
|
6415 |
+
"eval_samples_per_second": 57.705,
|
6416 |
+
"eval_steps_per_second": 14.426,
|
6417 |
+
"step": 37250
|
6418 |
+
},
|
6419 |
+
{
|
6420 |
+
"epoch": 2.8690100761479886,
|
6421 |
+
"grad_norm": 1.2962367534637451,
|
6422 |
+
"learning_rate": 8.929174350050646e-06,
|
6423 |
+
"loss": 0.9433,
|
6424 |
+
"step": 37300
|
6425 |
+
},
|
6426 |
+
{
|
6427 |
+
"epoch": 2.8728559341589106,
|
6428 |
+
"grad_norm": 0.9955423474311829,
|
6429 |
+
"learning_rate": 8.669454328234164e-06,
|
6430 |
+
"loss": 1.0447,
|
6431 |
+
"step": 37350
|
6432 |
+
},
|
6433 |
+
{
|
6434 |
+
"epoch": 2.876701792169833,
|
6435 |
+
"grad_norm": 0.5840064287185669,
|
6436 |
+
"learning_rate": 8.409734306417682e-06,
|
6437 |
+
"loss": 0.9504,
|
6438 |
+
"step": 37400
|
6439 |
+
},
|
6440 |
+
{
|
6441 |
+
"epoch": 2.8805476501807554,
|
6442 |
+
"grad_norm": 1.0777620077133179,
|
6443 |
+
"learning_rate": 8.1500142846012e-06,
|
6444 |
+
"loss": 0.9635,
|
6445 |
+
"step": 37450
|
6446 |
+
},
|
6447 |
+
{
|
6448 |
+
"epoch": 2.8843935081916774,
|
6449 |
+
"grad_norm": 0.9312844276428223,
|
6450 |
+
"learning_rate": 7.890294262784718e-06,
|
6451 |
+
"loss": 1.005,
|
6452 |
+
"step": 37500
|
6453 |
+
},
|
6454 |
+
{
|
6455 |
+
"epoch": 2.8843935081916774,
|
6456 |
+
"eval_loss": 0.9864250421524048,
|
6457 |
+
"eval_runtime": 17.2238,
|
6458 |
+
"eval_samples_per_second": 58.059,
|
6459 |
+
"eval_steps_per_second": 14.515,
|
6460 |
+
"step": 37500
|
6461 |
}
|
6462 |
],
|
6463 |
"logging_steps": 50,
|