Training in progress, step 24500, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 36730224
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9bc54f61ae85f9bb8331e1cdd5923e3fd960989060b059e73b0d0e134dae9e8f
|
3 |
size 36730224
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 73588346
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:35291a9f57ededb6486e5a5291f2f75b65ad3ee7378c172fe3cff039858844f4
|
3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e6abd220bb5c699b08784d9e5bd7e4f3c387ae6cf3a2fc509bcb49366bfaee15
|
3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 988
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ead8696fbf0049adb3c84fc53d3c6dc113682fab1d3e945183397e530adbbee
|
3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d220408f294f0cc03e84f4b2538adfda082b3a1de023a6d0f995deed07fa75a2
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
{
|
2 |
-
"best_global_step":
|
3 |
-
"best_metric": 1.
|
4 |
-
"best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-
|
5 |
-
"epoch": 1.
|
6 |
"eval_steps": 250,
|
7 |
-
"global_step":
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
@@ -4136,6 +4136,92 @@
|
|
4136 |
"eval_samples_per_second": 55.354,
|
4137 |
"eval_steps_per_second": 13.839,
|
4138 |
"step": 24000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4139 |
}
|
4140 |
],
|
4141 |
"logging_steps": 50,
|
|
|
1 |
{
|
2 |
+
"best_global_step": 24500,
|
3 |
+
"best_metric": 1.4431298971176147,
|
4 |
+
"best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-24500",
|
5 |
+
"epoch": 1.884470425351896,
|
6 |
"eval_steps": 250,
|
7 |
+
"global_step": 24500,
|
8 |
"is_hyper_param_search": false,
|
9 |
"is_local_process_zero": true,
|
10 |
"is_world_process_zero": true,
|
|
|
4136 |
"eval_samples_per_second": 55.354,
|
4137 |
"eval_steps_per_second": 13.839,
|
4138 |
"step": 24000
|
4139 |
+
},
|
4140 |
+
{
|
4141 |
+
"epoch": 1.8498577032535959,
|
4142 |
+
"grad_norm": 1.3933135271072388,
|
4143 |
+
"learning_rate": 1.679423550604757e-05,
|
4144 |
+
"loss": 1.45,
|
4145 |
+
"step": 24050
|
4146 |
+
},
|
4147 |
+
{
|
4148 |
+
"epoch": 1.853703561264518,
|
4149 |
+
"grad_norm": 1.1157580614089966,
|
4150 |
+
"learning_rate": 1.6536892026028454e-05,
|
4151 |
+
"loss": 1.4916,
|
4152 |
+
"step": 24100
|
4153 |
+
},
|
4154 |
+
{
|
4155 |
+
"epoch": 1.8575494192754403,
|
4156 |
+
"grad_norm": 1.7401970624923706,
|
4157 |
+
"learning_rate": 1.6279548546009337e-05,
|
4158 |
+
"loss": 1.4563,
|
4159 |
+
"step": 24150
|
4160 |
+
},
|
4161 |
+
{
|
4162 |
+
"epoch": 1.8613952772863627,
|
4163 |
+
"grad_norm": 1.4699925184249878,
|
4164 |
+
"learning_rate": 1.602220506599022e-05,
|
4165 |
+
"loss": 1.4211,
|
4166 |
+
"step": 24200
|
4167 |
+
},
|
4168 |
+
{
|
4169 |
+
"epoch": 1.8652411352972849,
|
4170 |
+
"grad_norm": 1.1760289669036865,
|
4171 |
+
"learning_rate": 1.5764861585971103e-05,
|
4172 |
+
"loss": 1.4212,
|
4173 |
+
"step": 24250
|
4174 |
+
},
|
4175 |
+
{
|
4176 |
+
"epoch": 1.8652411352972849,
|
4177 |
+
"eval_loss": 1.460072636604309,
|
4178 |
+
"eval_runtime": 17.8176,
|
4179 |
+
"eval_samples_per_second": 56.124,
|
4180 |
+
"eval_steps_per_second": 14.031,
|
4181 |
+
"step": 24250
|
4182 |
+
},
|
4183 |
+
{
|
4184 |
+
"epoch": 1.869086993308207,
|
4185 |
+
"grad_norm": 1.8243287801742554,
|
4186 |
+
"learning_rate": 1.5507518105951986e-05,
|
4187 |
+
"loss": 1.4594,
|
4188 |
+
"step": 24300
|
4189 |
+
},
|
4190 |
+
{
|
4191 |
+
"epoch": 1.8729328513191295,
|
4192 |
+
"grad_norm": 0.8821312785148621,
|
4193 |
+
"learning_rate": 1.5250174625932868e-05,
|
4194 |
+
"loss": 1.3837,
|
4195 |
+
"step": 24350
|
4196 |
+
},
|
4197 |
+
{
|
4198 |
+
"epoch": 1.8767787093300514,
|
4199 |
+
"grad_norm": 1.673240065574646,
|
4200 |
+
"learning_rate": 1.4992831145913753e-05,
|
4201 |
+
"loss": 1.395,
|
4202 |
+
"step": 24400
|
4203 |
+
},
|
4204 |
+
{
|
4205 |
+
"epoch": 1.8806245673409738,
|
4206 |
+
"grad_norm": 1.4853135347366333,
|
4207 |
+
"learning_rate": 1.4735487665894636e-05,
|
4208 |
+
"loss": 1.5031,
|
4209 |
+
"step": 24450
|
4210 |
+
},
|
4211 |
+
{
|
4212 |
+
"epoch": 1.884470425351896,
|
4213 |
+
"grad_norm": 2.507054567337036,
|
4214 |
+
"learning_rate": 1.4478144185875517e-05,
|
4215 |
+
"loss": 1.3909,
|
4216 |
+
"step": 24500
|
4217 |
+
},
|
4218 |
+
{
|
4219 |
+
"epoch": 1.884470425351896,
|
4220 |
+
"eval_loss": 1.4431298971176147,
|
4221 |
+
"eval_runtime": 17.9815,
|
4222 |
+
"eval_samples_per_second": 55.613,
|
4223 |
+
"eval_steps_per_second": 13.903,
|
4224 |
+
"step": 24500
|
4225 |
}
|
4226 |
],
|
4227 |
"logging_steps": 50,
|