Training in progress, step 1582, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a3cefa9cc9931df991990225dda8db2ab2b296ecec5376d3adbbd57dd025f251
|
3 |
size 27024
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 64038
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af39469519bf0540869d03be9be233cf1ac2272ce3a2dc64c22a57eb235270b7
|
3 |
size 64038
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9844928e8ff1f83b94d21d8e4a0ac1d7395933b78df97b0f4b735d807fff0be7
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:07d0239ab7b0008047305900da779b181c954fd4cb175600ded5cc5061c12424
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 396,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -8355,6 +8355,2764 @@
|
|
8355 |
"eval_samples_per_second": 260.743,
|
8356 |
"eval_steps_per_second": 130.382,
|
8357 |
"step": 1188
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8358 |
}
|
8359 |
],
|
8360 |
"logging_steps": 1,
|
@@ -8369,12 +11127,12 @@
|
|
8369 |
"should_evaluate": false,
|
8370 |
"should_log": false,
|
8371 |
"should_save": true,
|
8372 |
-
"should_training_stop":
|
8373 |
},
|
8374 |
"attributes": {}
|
8375 |
}
|
8376 |
},
|
8377 |
-
"total_flos":
|
8378 |
"train_batch_size": 2,
|
8379 |
"trial_name": null,
|
8380 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.05191608758788734,
|
5 |
"eval_steps": 396,
|
6 |
+
"global_step": 1582,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
8355 |
"eval_samples_per_second": 260.743,
|
8356 |
"eval_steps_per_second": 130.382,
|
8357 |
"step": 1188
|
8358 |
+
},
|
8359 |
+
{
|
8360 |
+
"epoch": 0.039019107548671335,
|
8361 |
+
"grad_norm": 0.058529648929834366,
|
8362 |
+
"learning_rate": 2.9289321881345254e-05,
|
8363 |
+
"loss": 11.729,
|
8364 |
+
"step": 1189
|
8365 |
+
},
|
8366 |
+
{
|
8367 |
+
"epoch": 0.03905192429177366,
|
8368 |
+
"grad_norm": 0.05605306103825569,
|
8369 |
+
"learning_rate": 2.9148150109678417e-05,
|
8370 |
+
"loss": 11.7411,
|
8371 |
+
"step": 1190
|
8372 |
+
},
|
8373 |
+
{
|
8374 |
+
"epoch": 0.039084741034876,
|
8375 |
+
"grad_norm": 0.09039860218763351,
|
8376 |
+
"learning_rate": 2.9007261311475596e-05,
|
8377 |
+
"loss": 11.7141,
|
8378 |
+
"step": 1191
|
8379 |
+
},
|
8380 |
+
{
|
8381 |
+
"epoch": 0.039117557777978325,
|
8382 |
+
"grad_norm": 0.04979782924056053,
|
8383 |
+
"learning_rate": 2.8866656049429162e-05,
|
8384 |
+
"loss": 11.7393,
|
8385 |
+
"step": 1192
|
8386 |
+
},
|
8387 |
+
{
|
8388 |
+
"epoch": 0.03915037452108065,
|
8389 |
+
"grad_norm": 0.04439384490251541,
|
8390 |
+
"learning_rate": 2.8726334885098983e-05,
|
8391 |
+
"loss": 11.7431,
|
8392 |
+
"step": 1193
|
8393 |
+
},
|
8394 |
+
{
|
8395 |
+
"epoch": 0.03918319126418299,
|
8396 |
+
"grad_norm": 0.07974643260240555,
|
8397 |
+
"learning_rate": 2.858629837891029e-05,
|
8398 |
+
"loss": 11.7416,
|
8399 |
+
"step": 1194
|
8400 |
+
},
|
8401 |
+
{
|
8402 |
+
"epoch": 0.039216008007285315,
|
8403 |
+
"grad_norm": 0.07264185696840286,
|
8404 |
+
"learning_rate": 2.84465470901515e-05,
|
8405 |
+
"loss": 11.7248,
|
8406 |
+
"step": 1195
|
8407 |
+
},
|
8408 |
+
{
|
8409 |
+
"epoch": 0.03924882475038765,
|
8410 |
+
"grad_norm": 0.08348803222179413,
|
8411 |
+
"learning_rate": 2.8307081576971806e-05,
|
8412 |
+
"loss": 11.7171,
|
8413 |
+
"step": 1196
|
8414 |
+
},
|
8415 |
+
{
|
8416 |
+
"epoch": 0.03928164149348998,
|
8417 |
+
"grad_norm": 0.10170138627290726,
|
8418 |
+
"learning_rate": 2.816790239637914e-05,
|
8419 |
+
"loss": 11.7241,
|
8420 |
+
"step": 1197
|
8421 |
+
},
|
8422 |
+
{
|
8423 |
+
"epoch": 0.03931445823659231,
|
8424 |
+
"grad_norm": 0.0884389728307724,
|
8425 |
+
"learning_rate": 2.8029010104237785e-05,
|
8426 |
+
"loss": 11.7193,
|
8427 |
+
"step": 1198
|
8428 |
+
},
|
8429 |
+
{
|
8430 |
+
"epoch": 0.03934727497969464,
|
8431 |
+
"grad_norm": 0.055237989872694016,
|
8432 |
+
"learning_rate": 2.7890405255266284e-05,
|
8433 |
+
"loss": 11.734,
|
8434 |
+
"step": 1199
|
8435 |
+
},
|
8436 |
+
{
|
8437 |
+
"epoch": 0.039380091722796974,
|
8438 |
+
"grad_norm": 0.056070491671562195,
|
8439 |
+
"learning_rate": 2.7752088403035127e-05,
|
8440 |
+
"loss": 11.7383,
|
8441 |
+
"step": 1200
|
8442 |
+
},
|
8443 |
+
{
|
8444 |
+
"epoch": 0.0394129084658993,
|
8445 |
+
"grad_norm": 0.04988478496670723,
|
8446 |
+
"learning_rate": 2.7614060099964566e-05,
|
8447 |
+
"loss": 11.7352,
|
8448 |
+
"step": 1201
|
8449 |
+
},
|
8450 |
+
{
|
8451 |
+
"epoch": 0.03944572520900163,
|
8452 |
+
"grad_norm": 0.0998385101556778,
|
8453 |
+
"learning_rate": 2.7476320897322504e-05,
|
8454 |
+
"loss": 11.7177,
|
8455 |
+
"step": 1202
|
8456 |
+
},
|
8457 |
+
{
|
8458 |
+
"epoch": 0.039478541952103964,
|
8459 |
+
"grad_norm": 0.05293150618672371,
|
8460 |
+
"learning_rate": 2.733887134522214e-05,
|
8461 |
+
"loss": 11.7361,
|
8462 |
+
"step": 1203
|
8463 |
+
},
|
8464 |
+
{
|
8465 |
+
"epoch": 0.03951135869520629,
|
8466 |
+
"grad_norm": 0.05001327767968178,
|
8467 |
+
"learning_rate": 2.720171199261987e-05,
|
8468 |
+
"loss": 11.7417,
|
8469 |
+
"step": 1204
|
8470 |
+
},
|
8471 |
+
{
|
8472 |
+
"epoch": 0.039544175438308626,
|
8473 |
+
"grad_norm": 0.08260013163089752,
|
8474 |
+
"learning_rate": 2.706484338731302e-05,
|
8475 |
+
"loss": 11.7286,
|
8476 |
+
"step": 1205
|
8477 |
+
},
|
8478 |
+
{
|
8479 |
+
"epoch": 0.039576992181410954,
|
8480 |
+
"grad_norm": 0.06903617084026337,
|
8481 |
+
"learning_rate": 2.6928266075937823e-05,
|
8482 |
+
"loss": 11.7297,
|
8483 |
+
"step": 1206
|
8484 |
+
},
|
8485 |
+
{
|
8486 |
+
"epoch": 0.03960980892451329,
|
8487 |
+
"grad_norm": 0.0735381469130516,
|
8488 |
+
"learning_rate": 2.6791980603967016e-05,
|
8489 |
+
"loss": 11.7206,
|
8490 |
+
"step": 1207
|
8491 |
+
},
|
8492 |
+
{
|
8493 |
+
"epoch": 0.039642625667615616,
|
8494 |
+
"grad_norm": 0.06369069963693619,
|
8495 |
+
"learning_rate": 2.665598751570777e-05,
|
8496 |
+
"loss": 11.7154,
|
8497 |
+
"step": 1208
|
8498 |
+
},
|
8499 |
+
{
|
8500 |
+
"epoch": 0.03967544241071795,
|
8501 |
+
"grad_norm": 0.05109398439526558,
|
8502 |
+
"learning_rate": 2.652028735429961e-05,
|
8503 |
+
"loss": 11.7406,
|
8504 |
+
"step": 1209
|
8505 |
+
},
|
8506 |
+
{
|
8507 |
+
"epoch": 0.03970825915382028,
|
8508 |
+
"grad_norm": 0.0450345017015934,
|
8509 |
+
"learning_rate": 2.638488066171201e-05,
|
8510 |
+
"loss": 11.7352,
|
8511 |
+
"step": 1210
|
8512 |
+
},
|
8513 |
+
{
|
8514 |
+
"epoch": 0.03974107589692261,
|
8515 |
+
"grad_norm": 0.03986643627285957,
|
8516 |
+
"learning_rate": 2.6249767978742502e-05,
|
8517 |
+
"loss": 11.7437,
|
8518 |
+
"step": 1211
|
8519 |
+
},
|
8520 |
+
{
|
8521 |
+
"epoch": 0.03977389264002494,
|
8522 |
+
"grad_norm": 0.08292041718959808,
|
8523 |
+
"learning_rate": 2.6114949845014248e-05,
|
8524 |
+
"loss": 11.7309,
|
8525 |
+
"step": 1212
|
8526 |
+
},
|
8527 |
+
{
|
8528 |
+
"epoch": 0.03980670938312727,
|
8529 |
+
"grad_norm": 0.07779522985219955,
|
8530 |
+
"learning_rate": 2.5980426798974155e-05,
|
8531 |
+
"loss": 11.7294,
|
8532 |
+
"step": 1213
|
8533 |
+
},
|
8534 |
+
{
|
8535 |
+
"epoch": 0.0398395261262296,
|
8536 |
+
"grad_norm": 0.1359282284975052,
|
8537 |
+
"learning_rate": 2.584619937789047e-05,
|
8538 |
+
"loss": 11.7031,
|
8539 |
+
"step": 1214
|
8540 |
+
},
|
8541 |
+
{
|
8542 |
+
"epoch": 0.03987234286933193,
|
8543 |
+
"grad_norm": 0.09008144587278366,
|
8544 |
+
"learning_rate": 2.5712268117850767e-05,
|
8545 |
+
"loss": 11.7197,
|
8546 |
+
"step": 1215
|
8547 |
+
},
|
8548 |
+
{
|
8549 |
+
"epoch": 0.039905159612434266,
|
8550 |
+
"grad_norm": 0.1032906025648117,
|
8551 |
+
"learning_rate": 2.5578633553759878e-05,
|
8552 |
+
"loss": 11.7126,
|
8553 |
+
"step": 1216
|
8554 |
+
},
|
8555 |
+
{
|
8556 |
+
"epoch": 0.03993797635553659,
|
8557 |
+
"grad_norm": 0.11131299287080765,
|
8558 |
+
"learning_rate": 2.544529621933759e-05,
|
8559 |
+
"loss": 11.7138,
|
8560 |
+
"step": 1217
|
8561 |
+
},
|
8562 |
+
{
|
8563 |
+
"epoch": 0.03997079309863893,
|
8564 |
+
"grad_norm": 0.1221027597784996,
|
8565 |
+
"learning_rate": 2.531225664711657e-05,
|
8566 |
+
"loss": 11.7228,
|
8567 |
+
"step": 1218
|
8568 |
+
},
|
8569 |
+
{
|
8570 |
+
"epoch": 0.040003609841741256,
|
8571 |
+
"grad_norm": 0.09028609842061996,
|
8572 |
+
"learning_rate": 2.5179515368440365e-05,
|
8573 |
+
"loss": 11.7201,
|
8574 |
+
"step": 1219
|
8575 |
+
},
|
8576 |
+
{
|
8577 |
+
"epoch": 0.04003642658484359,
|
8578 |
+
"grad_norm": 0.05342724919319153,
|
8579 |
+
"learning_rate": 2.504707291346107e-05,
|
8580 |
+
"loss": 11.7375,
|
8581 |
+
"step": 1220
|
8582 |
+
},
|
8583 |
+
{
|
8584 |
+
"epoch": 0.04006924332794592,
|
8585 |
+
"grad_norm": 0.05592009052634239,
|
8586 |
+
"learning_rate": 2.4914929811137334e-05,
|
8587 |
+
"loss": 11.7331,
|
8588 |
+
"step": 1221
|
8589 |
+
},
|
8590 |
+
{
|
8591 |
+
"epoch": 0.040102060071048246,
|
8592 |
+
"grad_norm": 0.1136632189154625,
|
8593 |
+
"learning_rate": 2.4783086589232295e-05,
|
8594 |
+
"loss": 11.7182,
|
8595 |
+
"step": 1222
|
8596 |
+
},
|
8597 |
+
{
|
8598 |
+
"epoch": 0.04013487681415058,
|
8599 |
+
"grad_norm": 0.1705104261636734,
|
8600 |
+
"learning_rate": 2.4651543774311327e-05,
|
8601 |
+
"loss": 11.6987,
|
8602 |
+
"step": 1223
|
8603 |
+
},
|
8604 |
+
{
|
8605 |
+
"epoch": 0.04016769355725291,
|
8606 |
+
"grad_norm": 0.06176282465457916,
|
8607 |
+
"learning_rate": 2.4520301891740084e-05,
|
8608 |
+
"loss": 11.7449,
|
8609 |
+
"step": 1224
|
8610 |
+
},
|
8611 |
+
{
|
8612 |
+
"epoch": 0.04020051030035524,
|
8613 |
+
"grad_norm": 0.17717483639717102,
|
8614 |
+
"learning_rate": 2.438936146568225e-05,
|
8615 |
+
"loss": 11.7011,
|
8616 |
+
"step": 1225
|
8617 |
+
},
|
8618 |
+
{
|
8619 |
+
"epoch": 0.04023332704345757,
|
8620 |
+
"grad_norm": 0.12815691530704498,
|
8621 |
+
"learning_rate": 2.425872301909764e-05,
|
8622 |
+
"loss": 11.705,
|
8623 |
+
"step": 1226
|
8624 |
+
},
|
8625 |
+
{
|
8626 |
+
"epoch": 0.040266143786559905,
|
8627 |
+
"grad_norm": 0.0928877592086792,
|
8628 |
+
"learning_rate": 2.4128387073739912e-05,
|
8629 |
+
"loss": 11.7332,
|
8630 |
+
"step": 1227
|
8631 |
+
},
|
8632 |
+
{
|
8633 |
+
"epoch": 0.04029896052966223,
|
8634 |
+
"grad_norm": 0.05088043212890625,
|
8635 |
+
"learning_rate": 2.3998354150154555e-05,
|
8636 |
+
"loss": 11.7367,
|
8637 |
+
"step": 1228
|
8638 |
+
},
|
8639 |
+
{
|
8640 |
+
"epoch": 0.04033177727276457,
|
8641 |
+
"grad_norm": 0.05271236598491669,
|
8642 |
+
"learning_rate": 2.386862476767694e-05,
|
8643 |
+
"loss": 11.7369,
|
8644 |
+
"step": 1229
|
8645 |
+
},
|
8646 |
+
{
|
8647 |
+
"epoch": 0.040364594015866895,
|
8648 |
+
"grad_norm": 0.04992619529366493,
|
8649 |
+
"learning_rate": 2.3739199444430027e-05,
|
8650 |
+
"loss": 11.7413,
|
8651 |
+
"step": 1230
|
8652 |
+
},
|
8653 |
+
{
|
8654 |
+
"epoch": 0.04039741075896923,
|
8655 |
+
"grad_norm": 0.05252360180020332,
|
8656 |
+
"learning_rate": 2.3610078697322424e-05,
|
8657 |
+
"loss": 11.7381,
|
8658 |
+
"step": 1231
|
8659 |
+
},
|
8660 |
+
{
|
8661 |
+
"epoch": 0.04043022750207156,
|
8662 |
+
"grad_norm": 0.048930808901786804,
|
8663 |
+
"learning_rate": 2.3481263042046353e-05,
|
8664 |
+
"loss": 11.7371,
|
8665 |
+
"step": 1232
|
8666 |
+
},
|
8667 |
+
{
|
8668 |
+
"epoch": 0.040463044245173885,
|
8669 |
+
"grad_norm": 0.08468115329742432,
|
8670 |
+
"learning_rate": 2.3352752993075478e-05,
|
8671 |
+
"loss": 11.7384,
|
8672 |
+
"step": 1233
|
8673 |
+
},
|
8674 |
+
{
|
8675 |
+
"epoch": 0.04049586098827622,
|
8676 |
+
"grad_norm": 0.0556829497218132,
|
8677 |
+
"learning_rate": 2.3224549063662927e-05,
|
8678 |
+
"loss": 11.7416,
|
8679 |
+
"step": 1234
|
8680 |
+
},
|
8681 |
+
{
|
8682 |
+
"epoch": 0.04052867773137855,
|
8683 |
+
"grad_norm": 0.049658242613077164,
|
8684 |
+
"learning_rate": 2.3096651765839206e-05,
|
8685 |
+
"loss": 11.7362,
|
8686 |
+
"step": 1235
|
8687 |
+
},
|
8688 |
+
{
|
8689 |
+
"epoch": 0.04056149447448088,
|
8690 |
+
"grad_norm": 0.08207622915506363,
|
8691 |
+
"learning_rate": 2.2969061610410224e-05,
|
8692 |
+
"loss": 11.7377,
|
8693 |
+
"step": 1236
|
8694 |
+
},
|
8695 |
+
{
|
8696 |
+
"epoch": 0.04059431121758321,
|
8697 |
+
"grad_norm": 0.058620791882276535,
|
8698 |
+
"learning_rate": 2.2841779106955153e-05,
|
8699 |
+
"loss": 11.7386,
|
8700 |
+
"step": 1237
|
8701 |
+
},
|
8702 |
+
{
|
8703 |
+
"epoch": 0.040627127960685544,
|
8704 |
+
"grad_norm": 0.050690505653619766,
|
8705 |
+
"learning_rate": 2.2714804763824448e-05,
|
8706 |
+
"loss": 11.7405,
|
8707 |
+
"step": 1238
|
8708 |
+
},
|
8709 |
+
{
|
8710 |
+
"epoch": 0.04065994470378787,
|
8711 |
+
"grad_norm": 0.06698761135339737,
|
8712 |
+
"learning_rate": 2.2588139088137883e-05,
|
8713 |
+
"loss": 11.7384,
|
8714 |
+
"step": 1239
|
8715 |
+
},
|
8716 |
+
{
|
8717 |
+
"epoch": 0.040692761446890206,
|
8718 |
+
"grad_norm": 0.04978778958320618,
|
8719 |
+
"learning_rate": 2.246178258578234e-05,
|
8720 |
+
"loss": 11.7408,
|
8721 |
+
"step": 1240
|
8722 |
+
},
|
8723 |
+
{
|
8724 |
+
"epoch": 0.040725578189992534,
|
8725 |
+
"grad_norm": 0.039015837013721466,
|
8726 |
+
"learning_rate": 2.233573576140996e-05,
|
8727 |
+
"loss": 11.7424,
|
8728 |
+
"step": 1241
|
8729 |
+
},
|
8730 |
+
{
|
8731 |
+
"epoch": 0.04075839493309486,
|
8732 |
+
"grad_norm": 0.06051992252469063,
|
8733 |
+
"learning_rate": 2.2209999118436043e-05,
|
8734 |
+
"loss": 11.7326,
|
8735 |
+
"step": 1242
|
8736 |
+
},
|
8737 |
+
{
|
8738 |
+
"epoch": 0.040791211676197196,
|
8739 |
+
"grad_norm": 0.054549023509025574,
|
8740 |
+
"learning_rate": 2.2084573159037115e-05,
|
8741 |
+
"loss": 11.7397,
|
8742 |
+
"step": 1243
|
8743 |
+
},
|
8744 |
+
{
|
8745 |
+
"epoch": 0.040824028419299524,
|
8746 |
+
"grad_norm": 0.05742061510682106,
|
8747 |
+
"learning_rate": 2.1959458384148822e-05,
|
8748 |
+
"loss": 11.7387,
|
8749 |
+
"step": 1244
|
8750 |
+
},
|
8751 |
+
{
|
8752 |
+
"epoch": 0.04085684516240186,
|
8753 |
+
"grad_norm": 0.12012781947851181,
|
8754 |
+
"learning_rate": 2.1834655293463956e-05,
|
8755 |
+
"loss": 11.7169,
|
8756 |
+
"step": 1245
|
8757 |
+
},
|
8758 |
+
{
|
8759 |
+
"epoch": 0.040889661905504186,
|
8760 |
+
"grad_norm": 0.07369308173656464,
|
8761 |
+
"learning_rate": 2.171016438543059e-05,
|
8762 |
+
"loss": 11.7307,
|
8763 |
+
"step": 1246
|
8764 |
+
},
|
8765 |
+
{
|
8766 |
+
"epoch": 0.04092247864860652,
|
8767 |
+
"grad_norm": 0.08169244229793549,
|
8768 |
+
"learning_rate": 2.158598615724986e-05,
|
8769 |
+
"loss": 11.7227,
|
8770 |
+
"step": 1247
|
8771 |
+
},
|
8772 |
+
{
|
8773 |
+
"epoch": 0.04095529539170885,
|
8774 |
+
"grad_norm": 0.08290760964155197,
|
8775 |
+
"learning_rate": 2.1462121104874124e-05,
|
8776 |
+
"loss": 11.7336,
|
8777 |
+
"step": 1248
|
8778 |
+
},
|
8779 |
+
{
|
8780 |
+
"epoch": 0.04098811213481118,
|
8781 |
+
"grad_norm": 0.0537097342312336,
|
8782 |
+
"learning_rate": 2.133856972300503e-05,
|
8783 |
+
"loss": 11.7372,
|
8784 |
+
"step": 1249
|
8785 |
+
},
|
8786 |
+
{
|
8787 |
+
"epoch": 0.04102092887791351,
|
8788 |
+
"grad_norm": 0.047034166753292084,
|
8789 |
+
"learning_rate": 2.1215332505091345e-05,
|
8790 |
+
"loss": 11.7358,
|
8791 |
+
"step": 1250
|
8792 |
+
},
|
8793 |
+
{
|
8794 |
+
"epoch": 0.041053745621015846,
|
8795 |
+
"grad_norm": 0.05744253471493721,
|
8796 |
+
"learning_rate": 2.109240994332724e-05,
|
8797 |
+
"loss": 11.7403,
|
8798 |
+
"step": 1251
|
8799 |
+
},
|
8800 |
+
{
|
8801 |
+
"epoch": 0.04108656236411817,
|
8802 |
+
"grad_norm": 0.07626025378704071,
|
8803 |
+
"learning_rate": 2.096980252865005e-05,
|
8804 |
+
"loss": 11.7423,
|
8805 |
+
"step": 1252
|
8806 |
+
},
|
8807 |
+
{
|
8808 |
+
"epoch": 0.0411193791072205,
|
8809 |
+
"grad_norm": 0.056532811373472214,
|
8810 |
+
"learning_rate": 2.0847510750738563e-05,
|
8811 |
+
"loss": 11.7316,
|
8812 |
+
"step": 1253
|
8813 |
+
},
|
8814 |
+
{
|
8815 |
+
"epoch": 0.041152195850322836,
|
8816 |
+
"grad_norm": 0.041129954159259796,
|
8817 |
+
"learning_rate": 2.0725535098010906e-05,
|
8818 |
+
"loss": 11.7427,
|
8819 |
+
"step": 1254
|
8820 |
+
},
|
8821 |
+
{
|
8822 |
+
"epoch": 0.04118501259342516,
|
8823 |
+
"grad_norm": 0.08624063432216644,
|
8824 |
+
"learning_rate": 2.0603876057622607e-05,
|
8825 |
+
"loss": 11.7224,
|
8826 |
+
"step": 1255
|
8827 |
+
},
|
8828 |
+
{
|
8829 |
+
"epoch": 0.0412178293365275,
|
8830 |
+
"grad_norm": 0.059852100908756256,
|
8831 |
+
"learning_rate": 2.048253411546477e-05,
|
8832 |
+
"loss": 11.7336,
|
8833 |
+
"step": 1256
|
8834 |
+
},
|
8835 |
+
{
|
8836 |
+
"epoch": 0.041250646079629825,
|
8837 |
+
"grad_norm": 0.06759022921323776,
|
8838 |
+
"learning_rate": 2.0361509756161978e-05,
|
8839 |
+
"loss": 11.7367,
|
8840 |
+
"step": 1257
|
8841 |
+
},
|
8842 |
+
{
|
8843 |
+
"epoch": 0.04128346282273216,
|
8844 |
+
"grad_norm": 0.04637046158313751,
|
8845 |
+
"learning_rate": 2.0240803463070425e-05,
|
8846 |
+
"loss": 11.734,
|
8847 |
+
"step": 1258
|
8848 |
+
},
|
8849 |
+
{
|
8850 |
+
"epoch": 0.04131627956583449,
|
8851 |
+
"grad_norm": 0.04505132883787155,
|
8852 |
+
"learning_rate": 2.0120415718276054e-05,
|
8853 |
+
"loss": 11.7408,
|
8854 |
+
"step": 1259
|
8855 |
+
},
|
8856 |
+
{
|
8857 |
+
"epoch": 0.04134909630893682,
|
8858 |
+
"grad_norm": 0.07667926698923111,
|
8859 |
+
"learning_rate": 2.00003470025925e-05,
|
8860 |
+
"loss": 11.7356,
|
8861 |
+
"step": 1260
|
8862 |
+
},
|
8863 |
+
{
|
8864 |
+
"epoch": 0.04138191305203915,
|
8865 |
+
"grad_norm": 0.073739193379879,
|
8866 |
+
"learning_rate": 1.9880597795559232e-05,
|
8867 |
+
"loss": 11.7263,
|
8868 |
+
"step": 1261
|
8869 |
+
},
|
8870 |
+
{
|
8871 |
+
"epoch": 0.04141472979514148,
|
8872 |
+
"grad_norm": 0.0515030212700367,
|
8873 |
+
"learning_rate": 1.9761168575439736e-05,
|
8874 |
+
"loss": 11.7363,
|
8875 |
+
"step": 1262
|
8876 |
+
},
|
8877 |
+
{
|
8878 |
+
"epoch": 0.04144754653824381,
|
8879 |
+
"grad_norm": 0.04036729410290718,
|
8880 |
+
"learning_rate": 1.9642059819219405e-05,
|
8881 |
+
"loss": 11.738,
|
8882 |
+
"step": 1263
|
8883 |
+
},
|
8884 |
+
{
|
8885 |
+
"epoch": 0.04148036328134614,
|
8886 |
+
"grad_norm": 0.053808197379112244,
|
8887 |
+
"learning_rate": 1.9523272002603742e-05,
|
8888 |
+
"loss": 11.7373,
|
8889 |
+
"step": 1264
|
8890 |
+
},
|
8891 |
+
{
|
8892 |
+
"epoch": 0.041513180024448475,
|
8893 |
+
"grad_norm": 0.09468836337327957,
|
8894 |
+
"learning_rate": 1.9404805600016528e-05,
|
8895 |
+
"loss": 11.7351,
|
8896 |
+
"step": 1265
|
8897 |
+
},
|
8898 |
+
{
|
8899 |
+
"epoch": 0.0415459967675508,
|
8900 |
+
"grad_norm": 0.05191861838102341,
|
8901 |
+
"learning_rate": 1.9286661084597856e-05,
|
8902 |
+
"loss": 11.7408,
|
8903 |
+
"step": 1266
|
8904 |
+
},
|
8905 |
+
{
|
8906 |
+
"epoch": 0.04157881351065314,
|
8907 |
+
"grad_norm": 0.1117781326174736,
|
8908 |
+
"learning_rate": 1.916883892820216e-05,
|
8909 |
+
"loss": 11.728,
|
8910 |
+
"step": 1267
|
8911 |
+
},
|
8912 |
+
{
|
8913 |
+
"epoch": 0.041611630253755465,
|
8914 |
+
"grad_norm": 0.0627184510231018,
|
8915 |
+
"learning_rate": 1.9051339601396467e-05,
|
8916 |
+
"loss": 11.7349,
|
8917 |
+
"step": 1268
|
8918 |
+
},
|
8919 |
+
{
|
8920 |
+
"epoch": 0.0416444469968578,
|
8921 |
+
"grad_norm": 0.08758054673671722,
|
8922 |
+
"learning_rate": 1.893416357345843e-05,
|
8923 |
+
"loss": 11.7256,
|
8924 |
+
"step": 1269
|
8925 |
+
},
|
8926 |
+
{
|
8927 |
+
"epoch": 0.04167726373996013,
|
8928 |
+
"grad_norm": 0.07504897564649582,
|
8929 |
+
"learning_rate": 1.8817311312374564e-05,
|
8930 |
+
"loss": 11.7284,
|
8931 |
+
"step": 1270
|
8932 |
+
},
|
8933 |
+
{
|
8934 |
+
"epoch": 0.04171008048306246,
|
8935 |
+
"grad_norm": 0.07263998687267303,
|
8936 |
+
"learning_rate": 1.870078328483821e-05,
|
8937 |
+
"loss": 11.7208,
|
8938 |
+
"step": 1271
|
8939 |
+
},
|
8940 |
+
{
|
8941 |
+
"epoch": 0.04174289722616479,
|
8942 |
+
"grad_norm": 0.05032264068722725,
|
8943 |
+
"learning_rate": 1.8584579956247784e-05,
|
8944 |
+
"loss": 11.7421,
|
8945 |
+
"step": 1272
|
8946 |
+
},
|
8947 |
+
{
|
8948 |
+
"epoch": 0.04177571396926712,
|
8949 |
+
"grad_norm": 0.06325464695692062,
|
8950 |
+
"learning_rate": 1.8468701790704956e-05,
|
8951 |
+
"loss": 11.7355,
|
8952 |
+
"step": 1273
|
8953 |
+
},
|
8954 |
+
{
|
8955 |
+
"epoch": 0.04180853071236945,
|
8956 |
+
"grad_norm": 0.04435846954584122,
|
8957 |
+
"learning_rate": 1.8353149251012647e-05,
|
8958 |
+
"loss": 11.7407,
|
8959 |
+
"step": 1274
|
8960 |
+
},
|
8961 |
+
{
|
8962 |
+
"epoch": 0.04184134745547178,
|
8963 |
+
"grad_norm": 0.10255692899227142,
|
8964 |
+
"learning_rate": 1.8237922798673302e-05,
|
8965 |
+
"loss": 11.726,
|
8966 |
+
"step": 1275
|
8967 |
+
},
|
8968 |
+
{
|
8969 |
+
"epoch": 0.041874164198574114,
|
8970 |
+
"grad_norm": 0.04318875074386597,
|
8971 |
+
"learning_rate": 1.8123022893887065e-05,
|
8972 |
+
"loss": 11.7376,
|
8973 |
+
"step": 1276
|
8974 |
+
},
|
8975 |
+
{
|
8976 |
+
"epoch": 0.04190698094167644,
|
8977 |
+
"grad_norm": 0.06178098917007446,
|
8978 |
+
"learning_rate": 1.8008449995549813e-05,
|
8979 |
+
"loss": 11.7365,
|
8980 |
+
"step": 1277
|
8981 |
+
},
|
8982 |
+
{
|
8983 |
+
"epoch": 0.041939797684778776,
|
8984 |
+
"grad_norm": 0.053335610777139664,
|
8985 |
+
"learning_rate": 1.7894204561251417e-05,
|
8986 |
+
"loss": 11.7318,
|
8987 |
+
"step": 1278
|
8988 |
+
},
|
8989 |
+
{
|
8990 |
+
"epoch": 0.041972614427881104,
|
8991 |
+
"grad_norm": 0.07568799704313278,
|
8992 |
+
"learning_rate": 1.7780287047273936e-05,
|
8993 |
+
"loss": 11.7325,
|
8994 |
+
"step": 1279
|
8995 |
+
},
|
8996 |
+
{
|
8997 |
+
"epoch": 0.04200543117098344,
|
8998 |
+
"grad_norm": 0.08910274505615234,
|
8999 |
+
"learning_rate": 1.7666697908589745e-05,
|
9000 |
+
"loss": 11.7218,
|
9001 |
+
"step": 1280
|
9002 |
+
},
|
9003 |
+
{
|
9004 |
+
"epoch": 0.042038247914085766,
|
9005 |
+
"grad_norm": 0.057899799197912216,
|
9006 |
+
"learning_rate": 1.7553437598859712e-05,
|
9007 |
+
"loss": 11.7343,
|
9008 |
+
"step": 1281
|
9009 |
+
},
|
9010 |
+
{
|
9011 |
+
"epoch": 0.042071064657188094,
|
9012 |
+
"grad_norm": 0.05727062746882439,
|
9013 |
+
"learning_rate": 1.744050657043137e-05,
|
9014 |
+
"loss": 11.7355,
|
9015 |
+
"step": 1282
|
9016 |
+
},
|
9017 |
+
{
|
9018 |
+
"epoch": 0.04210388140029043,
|
9019 |
+
"grad_norm": 0.059841498732566833,
|
9020 |
+
"learning_rate": 1.7327905274337232e-05,
|
9021 |
+
"loss": 11.7392,
|
9022 |
+
"step": 1283
|
9023 |
+
},
|
9024 |
+
{
|
9025 |
+
"epoch": 0.042136698143392756,
|
9026 |
+
"grad_norm": 0.07449740171432495,
|
9027 |
+
"learning_rate": 1.7215634160292825e-05,
|
9028 |
+
"loss": 11.7362,
|
9029 |
+
"step": 1284
|
9030 |
+
},
|
9031 |
+
{
|
9032 |
+
"epoch": 0.04216951488649509,
|
9033 |
+
"grad_norm": 0.07426772266626358,
|
9034 |
+
"learning_rate": 1.7103693676694975e-05,
|
9035 |
+
"loss": 11.7089,
|
9036 |
+
"step": 1285
|
9037 |
+
},
|
9038 |
+
{
|
9039 |
+
"epoch": 0.04220233162959742,
|
9040 |
+
"grad_norm": 0.05877260863780975,
|
9041 |
+
"learning_rate": 1.699208427062008e-05,
|
9042 |
+
"loss": 11.7424,
|
9043 |
+
"step": 1286
|
9044 |
+
},
|
9045 |
+
{
|
9046 |
+
"epoch": 0.04223514837269975,
|
9047 |
+
"grad_norm": 0.05815764144062996,
|
9048 |
+
"learning_rate": 1.688080638782218e-05,
|
9049 |
+
"loss": 11.7347,
|
9050 |
+
"step": 1287
|
9051 |
+
},
|
9052 |
+
{
|
9053 |
+
"epoch": 0.04226796511580208,
|
9054 |
+
"grad_norm": 0.05138293653726578,
|
9055 |
+
"learning_rate": 1.6769860472731257e-05,
|
9056 |
+
"loss": 11.7325,
|
9057 |
+
"step": 1288
|
9058 |
+
},
|
9059 |
+
{
|
9060 |
+
"epoch": 0.042300781858904415,
|
9061 |
+
"grad_norm": 0.07835102081298828,
|
9062 |
+
"learning_rate": 1.6659246968451526e-05,
|
9063 |
+
"loss": 11.7427,
|
9064 |
+
"step": 1289
|
9065 |
+
},
|
9066 |
+
{
|
9067 |
+
"epoch": 0.04233359860200674,
|
9068 |
+
"grad_norm": 0.04566236585378647,
|
9069 |
+
"learning_rate": 1.6548966316759518e-05,
|
9070 |
+
"loss": 11.7357,
|
9071 |
+
"step": 1290
|
9072 |
+
},
|
9073 |
+
{
|
9074 |
+
"epoch": 0.04236641534510908,
|
9075 |
+
"grad_norm": 0.04487951472401619,
|
9076 |
+
"learning_rate": 1.6439018958102405e-05,
|
9077 |
+
"loss": 11.7335,
|
9078 |
+
"step": 1291
|
9079 |
+
},
|
9080 |
+
{
|
9081 |
+
"epoch": 0.042399232088211405,
|
9082 |
+
"grad_norm": 0.07058252394199371,
|
9083 |
+
"learning_rate": 1.632940533159626e-05,
|
9084 |
+
"loss": 11.7387,
|
9085 |
+
"step": 1292
|
9086 |
+
},
|
9087 |
+
{
|
9088 |
+
"epoch": 0.04243204883131373,
|
9089 |
+
"grad_norm": 0.11475636065006256,
|
9090 |
+
"learning_rate": 1.6220125875024295e-05,
|
9091 |
+
"loss": 11.7246,
|
9092 |
+
"step": 1293
|
9093 |
+
},
|
9094 |
+
{
|
9095 |
+
"epoch": 0.04246486557441607,
|
9096 |
+
"grad_norm": 0.06470225006341934,
|
9097 |
+
"learning_rate": 1.6111181024835e-05,
|
9098 |
+
"loss": 11.7437,
|
9099 |
+
"step": 1294
|
9100 |
+
},
|
9101 |
+
{
|
9102 |
+
"epoch": 0.042497682317518395,
|
9103 |
+
"grad_norm": 0.056174106895923615,
|
9104 |
+
"learning_rate": 1.6002571216140527e-05,
|
9105 |
+
"loss": 11.7388,
|
9106 |
+
"step": 1295
|
9107 |
+
},
|
9108 |
+
{
|
9109 |
+
"epoch": 0.04253049906062073,
|
9110 |
+
"grad_norm": 0.07417520880699158,
|
9111 |
+
"learning_rate": 1.5894296882714976e-05,
|
9112 |
+
"loss": 11.7234,
|
9113 |
+
"step": 1296
|
9114 |
+
},
|
9115 |
+
{
|
9116 |
+
"epoch": 0.04256331580372306,
|
9117 |
+
"grad_norm": 0.05535079911351204,
|
9118 |
+
"learning_rate": 1.5786358456992524e-05,
|
9119 |
+
"loss": 11.7346,
|
9120 |
+
"step": 1297
|
9121 |
+
},
|
9122 |
+
{
|
9123 |
+
"epoch": 0.04259613254682539,
|
9124 |
+
"grad_norm": 0.08101984858512878,
|
9125 |
+
"learning_rate": 1.5678756370065794e-05,
|
9126 |
+
"loss": 11.7255,
|
9127 |
+
"step": 1298
|
9128 |
+
},
|
9129 |
+
{
|
9130 |
+
"epoch": 0.04262894928992772,
|
9131 |
+
"grad_norm": 0.043854404240846634,
|
9132 |
+
"learning_rate": 1.5571491051684108e-05,
|
9133 |
+
"loss": 11.7406,
|
9134 |
+
"step": 1299
|
9135 |
+
},
|
9136 |
+
{
|
9137 |
+
"epoch": 0.042661766033030055,
|
9138 |
+
"grad_norm": 0.06458664685487747,
|
9139 |
+
"learning_rate": 1.5464562930251814e-05,
|
9140 |
+
"loss": 11.734,
|
9141 |
+
"step": 1300
|
9142 |
+
},
|
9143 |
+
{
|
9144 |
+
"epoch": 0.04269458277613238,
|
9145 |
+
"grad_norm": 0.05817029997706413,
|
9146 |
+
"learning_rate": 1.5357972432826505e-05,
|
9147 |
+
"loss": 11.742,
|
9148 |
+
"step": 1301
|
9149 |
+
},
|
9150 |
+
{
|
9151 |
+
"epoch": 0.04272739951923472,
|
9152 |
+
"grad_norm": 0.10505834966897964,
|
9153 |
+
"learning_rate": 1.525171998511733e-05,
|
9154 |
+
"loss": 11.7191,
|
9155 |
+
"step": 1302
|
9156 |
+
},
|
9157 |
+
{
|
9158 |
+
"epoch": 0.042760216262337045,
|
9159 |
+
"grad_norm": 0.08963669836521149,
|
9160 |
+
"learning_rate": 1.5145806011483376e-05,
|
9161 |
+
"loss": 11.7124,
|
9162 |
+
"step": 1303
|
9163 |
+
},
|
9164 |
+
{
|
9165 |
+
"epoch": 0.04279303300543937,
|
9166 |
+
"grad_norm": 0.0702606588602066,
|
9167 |
+
"learning_rate": 1.5040230934931832e-05,
|
9168 |
+
"loss": 11.7317,
|
9169 |
+
"step": 1304
|
9170 |
+
},
|
9171 |
+
{
|
9172 |
+
"epoch": 0.04282584974854171,
|
9173 |
+
"grad_norm": 0.04868291690945625,
|
9174 |
+
"learning_rate": 1.4934995177116385e-05,
|
9175 |
+
"loss": 11.736,
|
9176 |
+
"step": 1305
|
9177 |
+
},
|
9178 |
+
{
|
9179 |
+
"epoch": 0.042858666491644035,
|
9180 |
+
"grad_norm": 0.12275619059801102,
|
9181 |
+
"learning_rate": 1.4830099158335563e-05,
|
9182 |
+
"loss": 11.7185,
|
9183 |
+
"step": 1306
|
9184 |
+
},
|
9185 |
+
{
|
9186 |
+
"epoch": 0.04289148323474637,
|
9187 |
+
"grad_norm": 0.05797203257679939,
|
9188 |
+
"learning_rate": 1.472554329753103e-05,
|
9189 |
+
"loss": 11.7413,
|
9190 |
+
"step": 1307
|
9191 |
+
},
|
9192 |
+
{
|
9193 |
+
"epoch": 0.0429242999778487,
|
9194 |
+
"grad_norm": 0.07233511656522751,
|
9195 |
+
"learning_rate": 1.462132801228585e-05,
|
9196 |
+
"loss": 11.7216,
|
9197 |
+
"step": 1308
|
9198 |
+
},
|
9199 |
+
{
|
9200 |
+
"epoch": 0.04295711672095103,
|
9201 |
+
"grad_norm": 0.047736991196870804,
|
9202 |
+
"learning_rate": 1.4517453718822872e-05,
|
9203 |
+
"loss": 11.7428,
|
9204 |
+
"step": 1309
|
9205 |
+
},
|
9206 |
+
{
|
9207 |
+
"epoch": 0.04298993346405336,
|
9208 |
+
"grad_norm": 0.036908071488142014,
|
9209 |
+
"learning_rate": 1.4413920832003124e-05,
|
9210 |
+
"loss": 11.7444,
|
9211 |
+
"step": 1310
|
9212 |
+
},
|
9213 |
+
{
|
9214 |
+
"epoch": 0.043022750207155694,
|
9215 |
+
"grad_norm": 0.06554009020328522,
|
9216 |
+
"learning_rate": 1.4310729765324039e-05,
|
9217 |
+
"loss": 11.7402,
|
9218 |
+
"step": 1311
|
9219 |
+
},
|
9220 |
+
{
|
9221 |
+
"epoch": 0.04305556695025802,
|
9222 |
+
"grad_norm": 0.11155742406845093,
|
9223 |
+
"learning_rate": 1.4207880930917871e-05,
|
9224 |
+
"loss": 11.7047,
|
9225 |
+
"step": 1312
|
9226 |
+
},
|
9227 |
+
{
|
9228 |
+
"epoch": 0.04308838369336035,
|
9229 |
+
"grad_norm": 0.08036734163761139,
|
9230 |
+
"learning_rate": 1.4105374739550092e-05,
|
9231 |
+
"loss": 11.7244,
|
9232 |
+
"step": 1313
|
9233 |
+
},
|
9234 |
+
{
|
9235 |
+
"epoch": 0.043121200436462684,
|
9236 |
+
"grad_norm": 0.05277916043996811,
|
9237 |
+
"learning_rate": 1.400321160061765e-05,
|
9238 |
+
"loss": 11.7287,
|
9239 |
+
"step": 1314
|
9240 |
+
},
|
9241 |
+
{
|
9242 |
+
"epoch": 0.04315401717956501,
|
9243 |
+
"grad_norm": 0.08592399209737778,
|
9244 |
+
"learning_rate": 1.3901391922147367e-05,
|
9245 |
+
"loss": 11.7372,
|
9246 |
+
"step": 1315
|
9247 |
+
},
|
9248 |
+
{
|
9249 |
+
"epoch": 0.043186833922667346,
|
9250 |
+
"grad_norm": 0.04584338515996933,
|
9251 |
+
"learning_rate": 1.3799916110794398e-05,
|
9252 |
+
"loss": 11.7403,
|
9253 |
+
"step": 1316
|
9254 |
+
},
|
9255 |
+
{
|
9256 |
+
"epoch": 0.043219650665769674,
|
9257 |
+
"grad_norm": 0.04567974805831909,
|
9258 |
+
"learning_rate": 1.3698784571840484e-05,
|
9259 |
+
"loss": 11.7509,
|
9260 |
+
"step": 1317
|
9261 |
+
},
|
9262 |
+
{
|
9263 |
+
"epoch": 0.04325246740887201,
|
9264 |
+
"grad_norm": 0.05535854771733284,
|
9265 |
+
"learning_rate": 1.3597997709192378e-05,
|
9266 |
+
"loss": 11.7335,
|
9267 |
+
"step": 1318
|
9268 |
+
},
|
9269 |
+
{
|
9270 |
+
"epoch": 0.043285284151974336,
|
9271 |
+
"grad_norm": 0.06601449102163315,
|
9272 |
+
"learning_rate": 1.3497555925380257e-05,
|
9273 |
+
"loss": 11.7385,
|
9274 |
+
"step": 1319
|
9275 |
+
},
|
9276 |
+
{
|
9277 |
+
"epoch": 0.04331810089507667,
|
9278 |
+
"grad_norm": 0.1070541962981224,
|
9279 |
+
"learning_rate": 1.339745962155613e-05,
|
9280 |
+
"loss": 11.7174,
|
9281 |
+
"step": 1320
|
9282 |
+
},
|
9283 |
+
{
|
9284 |
+
"epoch": 0.043350917638179,
|
9285 |
+
"grad_norm": 0.10094062238931656,
|
9286 |
+
"learning_rate": 1.3297709197492158e-05,
|
9287 |
+
"loss": 11.7397,
|
9288 |
+
"step": 1321
|
9289 |
+
},
|
9290 |
+
{
|
9291 |
+
"epoch": 0.04338373438128133,
|
9292 |
+
"grad_norm": 0.04681110009551048,
|
9293 |
+
"learning_rate": 1.3198305051579062e-05,
|
9294 |
+
"loss": 11.7308,
|
9295 |
+
"step": 1322
|
9296 |
+
},
|
9297 |
+
{
|
9298 |
+
"epoch": 0.04341655112438366,
|
9299 |
+
"grad_norm": 0.06381148099899292,
|
9300 |
+
"learning_rate": 1.3099247580824692e-05,
|
9301 |
+
"loss": 11.7374,
|
9302 |
+
"step": 1323
|
9303 |
+
},
|
9304 |
+
{
|
9305 |
+
"epoch": 0.04344936786748599,
|
9306 |
+
"grad_norm": 0.05641457810997963,
|
9307 |
+
"learning_rate": 1.3000537180852212e-05,
|
9308 |
+
"loss": 11.7294,
|
9309 |
+
"step": 1324
|
9310 |
+
},
|
9311 |
+
{
|
9312 |
+
"epoch": 0.04348218461058832,
|
9313 |
+
"grad_norm": 0.0542159304022789,
|
9314 |
+
"learning_rate": 1.2902174245898668e-05,
|
9315 |
+
"loss": 11.733,
|
9316 |
+
"step": 1325
|
9317 |
+
},
|
9318 |
+
{
|
9319 |
+
"epoch": 0.04351500135369065,
|
9320 |
+
"grad_norm": 0.059829600155353546,
|
9321 |
+
"learning_rate": 1.2804159168813346e-05,
|
9322 |
+
"loss": 11.7385,
|
9323 |
+
"step": 1326
|
9324 |
+
},
|
9325 |
+
{
|
9326 |
+
"epoch": 0.043547818096792985,
|
9327 |
+
"grad_norm": 0.06166405975818634,
|
9328 |
+
"learning_rate": 1.2706492341056297e-05,
|
9329 |
+
"loss": 11.7387,
|
9330 |
+
"step": 1327
|
9331 |
+
},
|
9332 |
+
{
|
9333 |
+
"epoch": 0.04358063483989531,
|
9334 |
+
"grad_norm": 0.06158718839287758,
|
9335 |
+
"learning_rate": 1.2609174152696657e-05,
|
9336 |
+
"loss": 11.7299,
|
9337 |
+
"step": 1328
|
9338 |
+
},
|
9339 |
+
{
|
9340 |
+
"epoch": 0.04361345158299765,
|
9341 |
+
"grad_norm": 0.045416850596666336,
|
9342 |
+
"learning_rate": 1.2512204992411126e-05,
|
9343 |
+
"loss": 11.7317,
|
9344 |
+
"step": 1329
|
9345 |
+
},
|
9346 |
+
{
|
9347 |
+
"epoch": 0.043646268326099975,
|
9348 |
+
"grad_norm": 0.048914626240730286,
|
9349 |
+
"learning_rate": 1.2415585247482498e-05,
|
9350 |
+
"loss": 11.7408,
|
9351 |
+
"step": 1330
|
9352 |
+
},
|
9353 |
+
{
|
9354 |
+
"epoch": 0.04367908506920231,
|
9355 |
+
"grad_norm": 0.08317140489816666,
|
9356 |
+
"learning_rate": 1.2319315303797962e-05,
|
9357 |
+
"loss": 11.7279,
|
9358 |
+
"step": 1331
|
9359 |
+
},
|
9360 |
+
{
|
9361 |
+
"epoch": 0.04371190181230464,
|
9362 |
+
"grad_norm": 0.06030471622943878,
|
9363 |
+
"learning_rate": 1.222339554584767e-05,
|
9364 |
+
"loss": 11.7413,
|
9365 |
+
"step": 1332
|
9366 |
+
},
|
9367 |
+
{
|
9368 |
+
"epoch": 0.043744718555406965,
|
9369 |
+
"grad_norm": 0.0738886371254921,
|
9370 |
+
"learning_rate": 1.2127826356723227e-05,
|
9371 |
+
"loss": 11.7314,
|
9372 |
+
"step": 1333
|
9373 |
+
},
|
9374 |
+
{
|
9375 |
+
"epoch": 0.0437775352985093,
|
9376 |
+
"grad_norm": 0.05801105499267578,
|
9377 |
+
"learning_rate": 1.2032608118116062e-05,
|
9378 |
+
"loss": 11.7374,
|
9379 |
+
"step": 1334
|
9380 |
+
},
|
9381 |
+
{
|
9382 |
+
"epoch": 0.04381035204161163,
|
9383 |
+
"grad_norm": 0.06434651464223862,
|
9384 |
+
"learning_rate": 1.1937741210315966e-05,
|
9385 |
+
"loss": 11.7391,
|
9386 |
+
"step": 1335
|
9387 |
+
},
|
9388 |
+
{
|
9389 |
+
"epoch": 0.04384316878471396,
|
9390 |
+
"grad_norm": 0.06541498005390167,
|
9391 |
+
"learning_rate": 1.1843226012209529e-05,
|
9392 |
+
"loss": 11.7333,
|
9393 |
+
"step": 1336
|
9394 |
+
},
|
9395 |
+
{
|
9396 |
+
"epoch": 0.04387598552781629,
|
9397 |
+
"grad_norm": 0.10305175930261612,
|
9398 |
+
"learning_rate": 1.1749062901278708e-05,
|
9399 |
+
"loss": 11.7183,
|
9400 |
+
"step": 1337
|
9401 |
+
},
|
9402 |
+
{
|
9403 |
+
"epoch": 0.043908802270918625,
|
9404 |
+
"grad_norm": 0.058159247040748596,
|
9405 |
+
"learning_rate": 1.1655252253599225e-05,
|
9406 |
+
"loss": 11.7348,
|
9407 |
+
"step": 1338
|
9408 |
+
},
|
9409 |
+
{
|
9410 |
+
"epoch": 0.04394161901402095,
|
9411 |
+
"grad_norm": 0.07037179917097092,
|
9412 |
+
"learning_rate": 1.1561794443839102e-05,
|
9413 |
+
"loss": 11.716,
|
9414 |
+
"step": 1339
|
9415 |
+
},
|
9416 |
+
{
|
9417 |
+
"epoch": 0.04397443575712329,
|
9418 |
+
"grad_norm": 0.043836165219545364,
|
9419 |
+
"learning_rate": 1.1468689845257242e-05,
|
9420 |
+
"loss": 11.7448,
|
9421 |
+
"step": 1340
|
9422 |
+
},
|
9423 |
+
{
|
9424 |
+
"epoch": 0.044007252500225615,
|
9425 |
+
"grad_norm": 0.06859394907951355,
|
9426 |
+
"learning_rate": 1.1375938829701794e-05,
|
9427 |
+
"loss": 11.7412,
|
9428 |
+
"step": 1341
|
9429 |
+
},
|
9430 |
+
{
|
9431 |
+
"epoch": 0.04404006924332795,
|
9432 |
+
"grad_norm": 0.06597665697336197,
|
9433 |
+
"learning_rate": 1.128354176760873e-05,
|
9434 |
+
"loss": 11.7364,
|
9435 |
+
"step": 1342
|
9436 |
+
},
|
9437 |
+
{
|
9438 |
+
"epoch": 0.04407288598643028,
|
9439 |
+
"grad_norm": 0.06730660796165466,
|
9440 |
+
"learning_rate": 1.1191499028000451e-05,
|
9441 |
+
"loss": 11.7349,
|
9442 |
+
"step": 1343
|
9443 |
+
},
|
9444 |
+
{
|
9445 |
+
"epoch": 0.044105702729532605,
|
9446 |
+
"grad_norm": 0.060405902564525604,
|
9447 |
+
"learning_rate": 1.1099810978484182e-05,
|
9448 |
+
"loss": 11.7293,
|
9449 |
+
"step": 1344
|
9450 |
+
},
|
9451 |
+
{
|
9452 |
+
"epoch": 0.04413851947263494,
|
9453 |
+
"grad_norm": 0.07341492176055908,
|
9454 |
+
"learning_rate": 1.1008477985250554e-05,
|
9455 |
+
"loss": 11.7315,
|
9456 |
+
"step": 1345
|
9457 |
+
},
|
9458 |
+
{
|
9459 |
+
"epoch": 0.04417133621573727,
|
9460 |
+
"grad_norm": 0.0642743930220604,
|
9461 |
+
"learning_rate": 1.091750041307218e-05,
|
9462 |
+
"loss": 11.7274,
|
9463 |
+
"step": 1346
|
9464 |
+
},
|
9465 |
+
{
|
9466 |
+
"epoch": 0.0442041529588396,
|
9467 |
+
"grad_norm": 0.0817587673664093,
|
9468 |
+
"learning_rate": 1.0826878625302173e-05,
|
9469 |
+
"loss": 11.7277,
|
9470 |
+
"step": 1347
|
9471 |
+
},
|
9472 |
+
{
|
9473 |
+
"epoch": 0.04423696970194193,
|
9474 |
+
"grad_norm": 0.09008070826530457,
|
9475 |
+
"learning_rate": 1.073661298387265e-05,
|
9476 |
+
"loss": 11.7146,
|
9477 |
+
"step": 1348
|
9478 |
+
},
|
9479 |
+
{
|
9480 |
+
"epoch": 0.044269786445044264,
|
9481 |
+
"grad_norm": 0.09602459520101547,
|
9482 |
+
"learning_rate": 1.0646703849293316e-05,
|
9483 |
+
"loss": 11.7162,
|
9484 |
+
"step": 1349
|
9485 |
+
},
|
9486 |
+
{
|
9487 |
+
"epoch": 0.04430260318814659,
|
9488 |
+
"grad_norm": 0.05127155780792236,
|
9489 |
+
"learning_rate": 1.0557151580650105e-05,
|
9490 |
+
"loss": 11.7467,
|
9491 |
+
"step": 1350
|
9492 |
+
},
|
9493 |
+
{
|
9494 |
+
"epoch": 0.044335419931248926,
|
9495 |
+
"grad_norm": 0.09199243783950806,
|
9496 |
+
"learning_rate": 1.0467956535603596e-05,
|
9497 |
+
"loss": 11.7232,
|
9498 |
+
"step": 1351
|
9499 |
+
},
|
9500 |
+
{
|
9501 |
+
"epoch": 0.044368236674351254,
|
9502 |
+
"grad_norm": 0.0443350151181221,
|
9503 |
+
"learning_rate": 1.0379119070387678e-05,
|
9504 |
+
"loss": 11.7387,
|
9505 |
+
"step": 1352
|
9506 |
+
},
|
9507 |
+
{
|
9508 |
+
"epoch": 0.04440105341745358,
|
9509 |
+
"grad_norm": 0.05941386893391609,
|
9510 |
+
"learning_rate": 1.0290639539808156e-05,
|
9511 |
+
"loss": 11.731,
|
9512 |
+
"step": 1353
|
9513 |
+
},
|
9514 |
+
{
|
9515 |
+
"epoch": 0.044433870160555916,
|
9516 |
+
"grad_norm": 0.08545467257499695,
|
9517 |
+
"learning_rate": 1.0202518297241237e-05,
|
9518 |
+
"loss": 11.7226,
|
9519 |
+
"step": 1354
|
9520 |
+
},
|
9521 |
+
{
|
9522 |
+
"epoch": 0.044466686903658244,
|
9523 |
+
"grad_norm": 0.055701982229948044,
|
9524 |
+
"learning_rate": 1.0114755694632195e-05,
|
9525 |
+
"loss": 11.7394,
|
9526 |
+
"step": 1355
|
9527 |
+
},
|
9528 |
+
{
|
9529 |
+
"epoch": 0.04449950364676058,
|
9530 |
+
"grad_norm": 0.05899910256266594,
|
9531 |
+
"learning_rate": 1.0027352082493901e-05,
|
9532 |
+
"loss": 11.7319,
|
9533 |
+
"step": 1356
|
9534 |
+
},
|
9535 |
+
{
|
9536 |
+
"epoch": 0.044532320389862906,
|
9537 |
+
"grad_norm": 0.11653304100036621,
|
9538 |
+
"learning_rate": 9.940307809905536e-06,
|
9539 |
+
"loss": 11.7179,
|
9540 |
+
"step": 1357
|
9541 |
+
},
|
9542 |
+
{
|
9543 |
+
"epoch": 0.04456513713296524,
|
9544 |
+
"grad_norm": 0.09984663128852844,
|
9545 |
+
"learning_rate": 9.853623224511076e-06,
|
9546 |
+
"loss": 11.7266,
|
9547 |
+
"step": 1358
|
9548 |
+
},
|
9549 |
+
{
|
9550 |
+
"epoch": 0.04459795387606757,
|
9551 |
+
"grad_norm": 0.08613155782222748,
|
9552 |
+
"learning_rate": 9.767298672517922e-06,
|
9553 |
+
"loss": 11.7276,
|
9554 |
+
"step": 1359
|
9555 |
+
},
|
9556 |
+
{
|
9557 |
+
"epoch": 0.0446307706191699,
|
9558 |
+
"grad_norm": 0.06240138038992882,
|
9559 |
+
"learning_rate": 9.681334498695648e-06,
|
9560 |
+
"loss": 11.7331,
|
9561 |
+
"step": 1360
|
9562 |
+
},
|
9563 |
+
{
|
9564 |
+
"epoch": 0.04466358736227223,
|
9565 |
+
"grad_norm": 0.04993334785103798,
|
9566 |
+
"learning_rate": 9.595731046374424e-06,
|
9567 |
+
"loss": 11.7409,
|
9568 |
+
"step": 1361
|
9569 |
+
},
|
9570 |
+
{
|
9571 |
+
"epoch": 0.044696404105374565,
|
9572 |
+
"grad_norm": 0.05479804426431656,
|
9573 |
+
"learning_rate": 9.510488657443772e-06,
|
9574 |
+
"loss": 11.7357,
|
9575 |
+
"step": 1362
|
9576 |
+
},
|
9577 |
+
{
|
9578 |
+
"epoch": 0.04472922084847689,
|
9579 |
+
"grad_norm": 0.05745505541563034,
|
9580 |
+
"learning_rate": 9.425607672351166e-06,
|
9581 |
+
"loss": 11.7442,
|
9582 |
+
"step": 1363
|
9583 |
+
},
|
9584 |
+
{
|
9585 |
+
"epoch": 0.04476203759157922,
|
9586 |
+
"grad_norm": 0.07815633714199066,
|
9587 |
+
"learning_rate": 9.34108843010072e-06,
|
9588 |
+
"loss": 11.7371,
|
9589 |
+
"step": 1364
|
9590 |
+
},
|
9591 |
+
{
|
9592 |
+
"epoch": 0.044794854334681555,
|
9593 |
+
"grad_norm": 0.06175662577152252,
|
9594 |
+
"learning_rate": 9.256931268251756e-06,
|
9595 |
+
"loss": 11.7266,
|
9596 |
+
"step": 1365
|
9597 |
+
},
|
9598 |
+
{
|
9599 |
+
"epoch": 0.04482767107778388,
|
9600 |
+
"grad_norm": 0.060856085270643234,
|
9601 |
+
"learning_rate": 9.173136522917457e-06,
|
9602 |
+
"loss": 11.7347,
|
9603 |
+
"step": 1366
|
9604 |
+
},
|
9605 |
+
{
|
9606 |
+
"epoch": 0.04486048782088622,
|
9607 |
+
"grad_norm": 0.056870121508836746,
|
9608 |
+
"learning_rate": 9.089704528763654e-06,
|
9609 |
+
"loss": 11.7403,
|
9610 |
+
"step": 1367
|
9611 |
+
},
|
9612 |
+
{
|
9613 |
+
"epoch": 0.044893304563988545,
|
9614 |
+
"grad_norm": 0.04246772453188896,
|
9615 |
+
"learning_rate": 9.006635619007309e-06,
|
9616 |
+
"loss": 11.7445,
|
9617 |
+
"step": 1368
|
9618 |
+
},
|
9619 |
+
{
|
9620 |
+
"epoch": 0.04492612130709088,
|
9621 |
+
"grad_norm": 0.07839620858430862,
|
9622 |
+
"learning_rate": 8.923930125415291e-06,
|
9623 |
+
"loss": 11.7184,
|
9624 |
+
"step": 1369
|
9625 |
+
},
|
9626 |
+
{
|
9627 |
+
"epoch": 0.04495893805019321,
|
9628 |
+
"grad_norm": 0.05189070478081703,
|
9629 |
+
"learning_rate": 8.841588378303067e-06,
|
9630 |
+
"loss": 11.7422,
|
9631 |
+
"step": 1370
|
9632 |
+
},
|
9633 |
+
{
|
9634 |
+
"epoch": 0.04499175479329554,
|
9635 |
+
"grad_norm": 0.09642992168664932,
|
9636 |
+
"learning_rate": 8.759610706533316e-06,
|
9637 |
+
"loss": 11.7203,
|
9638 |
+
"step": 1371
|
9639 |
+
},
|
9640 |
+
{
|
9641 |
+
"epoch": 0.04502457153639787,
|
9642 |
+
"grad_norm": 0.05947954207658768,
|
9643 |
+
"learning_rate": 8.677997437514629e-06,
|
9644 |
+
"loss": 11.7382,
|
9645 |
+
"step": 1372
|
9646 |
+
},
|
9647 |
+
{
|
9648 |
+
"epoch": 0.0450573882795002,
|
9649 |
+
"grad_norm": 0.06777901202440262,
|
9650 |
+
"learning_rate": 8.596748897200247e-06,
|
9651 |
+
"loss": 11.7326,
|
9652 |
+
"step": 1373
|
9653 |
+
},
|
9654 |
+
{
|
9655 |
+
"epoch": 0.04509020502260253,
|
9656 |
+
"grad_norm": 0.04685264453291893,
|
9657 |
+
"learning_rate": 8.515865410086743e-06,
|
9658 |
+
"loss": 11.7397,
|
9659 |
+
"step": 1374
|
9660 |
+
},
|
9661 |
+
{
|
9662 |
+
"epoch": 0.04512302176570486,
|
9663 |
+
"grad_norm": 0.0895228236913681,
|
9664 |
+
"learning_rate": 8.435347299212682e-06,
|
9665 |
+
"loss": 11.7351,
|
9666 |
+
"step": 1375
|
9667 |
+
},
|
9668 |
+
{
|
9669 |
+
"epoch": 0.045155838508807195,
|
9670 |
+
"grad_norm": 0.10537441819906235,
|
9671 |
+
"learning_rate": 8.355194886157324e-06,
|
9672 |
+
"loss": 11.7093,
|
9673 |
+
"step": 1376
|
9674 |
+
},
|
9675 |
+
{
|
9676 |
+
"epoch": 0.04518865525190952,
|
9677 |
+
"grad_norm": 0.08188740164041519,
|
9678 |
+
"learning_rate": 8.275408491039493e-06,
|
9679 |
+
"loss": 11.7136,
|
9680 |
+
"step": 1377
|
9681 |
+
},
|
9682 |
+
{
|
9683 |
+
"epoch": 0.04522147199501186,
|
9684 |
+
"grad_norm": 0.1262494921684265,
|
9685 |
+
"learning_rate": 8.195988432516078e-06,
|
9686 |
+
"loss": 11.7147,
|
9687 |
+
"step": 1378
|
9688 |
+
},
|
9689 |
+
{
|
9690 |
+
"epoch": 0.045254288738114185,
|
9691 |
+
"grad_norm": 0.04000650346279144,
|
9692 |
+
"learning_rate": 8.116935027780893e-06,
|
9693 |
+
"loss": 11.7441,
|
9694 |
+
"step": 1379
|
9695 |
+
},
|
9696 |
+
{
|
9697 |
+
"epoch": 0.04528710548121652,
|
9698 |
+
"grad_norm": 0.06261011958122253,
|
9699 |
+
"learning_rate": 8.038248592563413e-06,
|
9700 |
+
"loss": 11.7365,
|
9701 |
+
"step": 1380
|
9702 |
+
},
|
9703 |
+
{
|
9704 |
+
"epoch": 0.04531992222431885,
|
9705 |
+
"grad_norm": 0.05025480315089226,
|
9706 |
+
"learning_rate": 7.959929441127457e-06,
|
9707 |
+
"loss": 11.7381,
|
9708 |
+
"step": 1381
|
9709 |
+
},
|
9710 |
+
{
|
9711 |
+
"epoch": 0.04535273896742118,
|
9712 |
+
"grad_norm": 0.06580398231744766,
|
9713 |
+
"learning_rate": 7.881977886269931e-06,
|
9714 |
+
"loss": 11.7398,
|
9715 |
+
"step": 1382
|
9716 |
+
},
|
9717 |
+
{
|
9718 |
+
"epoch": 0.04538555571052351,
|
9719 |
+
"grad_norm": 0.04904273897409439,
|
9720 |
+
"learning_rate": 7.804394239319669e-06,
|
9721 |
+
"loss": 11.7415,
|
9722 |
+
"step": 1383
|
9723 |
+
},
|
9724 |
+
{
|
9725 |
+
"epoch": 0.04541837245362584,
|
9726 |
+
"grad_norm": 0.06415777653455734,
|
9727 |
+
"learning_rate": 7.727178810136093e-06,
|
9728 |
+
"loss": 11.7346,
|
9729 |
+
"step": 1384
|
9730 |
+
},
|
9731 |
+
{
|
9732 |
+
"epoch": 0.04545118919672817,
|
9733 |
+
"grad_norm": 0.06482280045747757,
|
9734 |
+
"learning_rate": 7.65033190710801e-06,
|
9735 |
+
"loss": 11.7323,
|
9736 |
+
"step": 1385
|
9737 |
+
},
|
9738 |
+
{
|
9739 |
+
"epoch": 0.0454840059398305,
|
9740 |
+
"grad_norm": 0.0635392963886261,
|
9741 |
+
"learning_rate": 7.5738538371523225e-06,
|
9742 |
+
"loss": 11.7418,
|
9743 |
+
"step": 1386
|
9744 |
+
},
|
9745 |
+
{
|
9746 |
+
"epoch": 0.045516822682932834,
|
9747 |
+
"grad_norm": 0.11412329226732254,
|
9748 |
+
"learning_rate": 7.497744905713011e-06,
|
9749 |
+
"loss": 11.7104,
|
9750 |
+
"step": 1387
|
9751 |
+
},
|
9752 |
+
{
|
9753 |
+
"epoch": 0.04554963942603516,
|
9754 |
+
"grad_norm": 0.05159192159771919,
|
9755 |
+
"learning_rate": 7.4220054167596145e-06,
|
9756 |
+
"loss": 11.7381,
|
9757 |
+
"step": 1388
|
9758 |
+
},
|
9759 |
+
{
|
9760 |
+
"epoch": 0.045582456169137496,
|
9761 |
+
"grad_norm": 0.06285438686609268,
|
9762 |
+
"learning_rate": 7.34663567278624e-06,
|
9763 |
+
"loss": 11.7404,
|
9764 |
+
"step": 1389
|
9765 |
+
},
|
9766 |
+
{
|
9767 |
+
"epoch": 0.045615272912239824,
|
9768 |
+
"grad_norm": 0.05733206868171692,
|
9769 |
+
"learning_rate": 7.27163597481022e-06,
|
9770 |
+
"loss": 11.7352,
|
9771 |
+
"step": 1390
|
9772 |
+
},
|
9773 |
+
{
|
9774 |
+
"epoch": 0.04564808965534216,
|
9775 |
+
"grad_norm": 0.05429792404174805,
|
9776 |
+
"learning_rate": 7.19700662237105e-06,
|
9777 |
+
"loss": 11.7338,
|
9778 |
+
"step": 1391
|
9779 |
+
},
|
9780 |
+
{
|
9781 |
+
"epoch": 0.045680906398444486,
|
9782 |
+
"grad_norm": 0.1139855831861496,
|
9783 |
+
"learning_rate": 7.122747913529048e-06,
|
9784 |
+
"loss": 11.7291,
|
9785 |
+
"step": 1392
|
9786 |
+
},
|
9787 |
+
{
|
9788 |
+
"epoch": 0.04571372314154682,
|
9789 |
+
"grad_norm": 0.06199384853243828,
|
9790 |
+
"learning_rate": 7.0488601448642335e-06,
|
9791 |
+
"loss": 11.7329,
|
9792 |
+
"step": 1393
|
9793 |
+
},
|
9794 |
+
{
|
9795 |
+
"epoch": 0.04574653988464915,
|
9796 |
+
"grad_norm": 0.1386241763830185,
|
9797 |
+
"learning_rate": 6.975343611475194e-06,
|
9798 |
+
"loss": 11.7176,
|
9799 |
+
"step": 1394
|
9800 |
+
},
|
9801 |
+
{
|
9802 |
+
"epoch": 0.045779356627751476,
|
9803 |
+
"grad_norm": 0.062151163816452026,
|
9804 |
+
"learning_rate": 6.9021986069777835e-06,
|
9805 |
+
"loss": 11.7298,
|
9806 |
+
"step": 1395
|
9807 |
+
},
|
9808 |
+
{
|
9809 |
+
"epoch": 0.04581217337085381,
|
9810 |
+
"grad_norm": 0.04375346377491951,
|
9811 |
+
"learning_rate": 6.829425423504021e-06,
|
9812 |
+
"loss": 11.7444,
|
9813 |
+
"step": 1396
|
9814 |
+
},
|
9815 |
+
{
|
9816 |
+
"epoch": 0.04584499011395614,
|
9817 |
+
"grad_norm": 0.057462386786937714,
|
9818 |
+
"learning_rate": 6.757024351700969e-06,
|
9819 |
+
"loss": 11.7397,
|
9820 |
+
"step": 1397
|
9821 |
+
},
|
9822 |
+
{
|
9823 |
+
"epoch": 0.04587780685705847,
|
9824 |
+
"grad_norm": 0.13280917704105377,
|
9825 |
+
"learning_rate": 6.684995680729489e-06,
|
9826 |
+
"loss": 11.7147,
|
9827 |
+
"step": 1398
|
9828 |
+
},
|
9829 |
+
{
|
9830 |
+
"epoch": 0.0459106236001608,
|
9831 |
+
"grad_norm": 0.05270727351307869,
|
9832 |
+
"learning_rate": 6.613339698263088e-06,
|
9833 |
+
"loss": 11.7358,
|
9834 |
+
"step": 1399
|
9835 |
+
},
|
9836 |
+
{
|
9837 |
+
"epoch": 0.045943440343263135,
|
9838 |
+
"grad_norm": 0.06059174984693527,
|
9839 |
+
"learning_rate": 6.542056690486853e-06,
|
9840 |
+
"loss": 11.7239,
|
9841 |
+
"step": 1400
|
9842 |
+
},
|
9843 |
+
{
|
9844 |
+
"epoch": 0.04597625708636546,
|
9845 |
+
"grad_norm": 0.09412822127342224,
|
9846 |
+
"learning_rate": 6.471146942096274e-06,
|
9847 |
+
"loss": 11.7235,
|
9848 |
+
"step": 1401
|
9849 |
+
},
|
9850 |
+
{
|
9851 |
+
"epoch": 0.0460090738294678,
|
9852 |
+
"grad_norm": 0.0670003667473793,
|
9853 |
+
"learning_rate": 6.4006107362960195e-06,
|
9854 |
+
"loss": 11.7387,
|
9855 |
+
"step": 1402
|
9856 |
+
},
|
9857 |
+
{
|
9858 |
+
"epoch": 0.046041890572570125,
|
9859 |
+
"grad_norm": 0.07799314707517624,
|
9860 |
+
"learning_rate": 6.330448354798901e-06,
|
9861 |
+
"loss": 11.7321,
|
9862 |
+
"step": 1403
|
9863 |
+
},
|
9864 |
+
{
|
9865 |
+
"epoch": 0.04607470731567245,
|
9866 |
+
"grad_norm": 0.08222674578428268,
|
9867 |
+
"learning_rate": 6.260660077824753e-06,
|
9868 |
+
"loss": 11.7312,
|
9869 |
+
"step": 1404
|
9870 |
+
},
|
9871 |
+
{
|
9872 |
+
"epoch": 0.04610752405877479,
|
9873 |
+
"grad_norm": 0.07971063256263733,
|
9874 |
+
"learning_rate": 6.1912461840992646e-06,
|
9875 |
+
"loss": 11.7342,
|
9876 |
+
"step": 1405
|
9877 |
+
},
|
9878 |
+
{
|
9879 |
+
"epoch": 0.046140340801877115,
|
9880 |
+
"grad_norm": 0.08488437533378601,
|
9881 |
+
"learning_rate": 6.12220695085286e-06,
|
9882 |
+
"loss": 11.7193,
|
9883 |
+
"step": 1406
|
9884 |
+
},
|
9885 |
+
{
|
9886 |
+
"epoch": 0.04617315754497945,
|
9887 |
+
"grad_norm": 0.03914647921919823,
|
9888 |
+
"learning_rate": 6.053542653819666e-06,
|
9889 |
+
"loss": 11.7411,
|
9890 |
+
"step": 1407
|
9891 |
+
},
|
9892 |
+
{
|
9893 |
+
"epoch": 0.04620597428808178,
|
9894 |
+
"grad_norm": 0.09059201925992966,
|
9895 |
+
"learning_rate": 5.985253567236304e-06,
|
9896 |
+
"loss": 11.7133,
|
9897 |
+
"step": 1408
|
9898 |
+
},
|
9899 |
+
{
|
9900 |
+
"epoch": 0.04623879103118411,
|
9901 |
+
"grad_norm": 0.07787547260522842,
|
9902 |
+
"learning_rate": 5.917339963840896e-06,
|
9903 |
+
"loss": 11.7339,
|
9904 |
+
"step": 1409
|
9905 |
+
},
|
9906 |
+
{
|
9907 |
+
"epoch": 0.04627160777428644,
|
9908 |
+
"grad_norm": 0.06361488997936249,
|
9909 |
+
"learning_rate": 5.849802114871927e-06,
|
9910 |
+
"loss": 11.7447,
|
9911 |
+
"step": 1410
|
9912 |
+
},
|
9913 |
+
{
|
9914 |
+
"epoch": 0.046304424517388774,
|
9915 |
+
"grad_norm": 0.0768633708357811,
|
9916 |
+
"learning_rate": 5.782640290067131e-06,
|
9917 |
+
"loss": 11.7454,
|
9918 |
+
"step": 1411
|
9919 |
+
},
|
9920 |
+
{
|
9921 |
+
"epoch": 0.0463372412604911,
|
9922 |
+
"grad_norm": 0.06700538843870163,
|
9923 |
+
"learning_rate": 5.715854757662486e-06,
|
9924 |
+
"loss": 11.7372,
|
9925 |
+
"step": 1412
|
9926 |
+
},
|
9927 |
+
{
|
9928 |
+
"epoch": 0.04637005800359344,
|
9929 |
+
"grad_norm": 0.049638547003269196,
|
9930 |
+
"learning_rate": 5.649445784391061e-06,
|
9931 |
+
"loss": 11.7369,
|
9932 |
+
"step": 1413
|
9933 |
+
},
|
9934 |
+
{
|
9935 |
+
"epoch": 0.046402874746695764,
|
9936 |
+
"grad_norm": 0.09619002044200897,
|
9937 |
+
"learning_rate": 5.583413635482082e-06,
|
9938 |
+
"loss": 11.732,
|
9939 |
+
"step": 1414
|
9940 |
+
},
|
9941 |
+
{
|
9942 |
+
"epoch": 0.04643569148979809,
|
9943 |
+
"grad_norm": 0.06225225329399109,
|
9944 |
+
"learning_rate": 5.517758574659682e-06,
|
9945 |
+
"loss": 11.7385,
|
9946 |
+
"step": 1415
|
9947 |
+
},
|
9948 |
+
{
|
9949 |
+
"epoch": 0.04646850823290043,
|
9950 |
+
"grad_norm": 0.08197441697120667,
|
9951 |
+
"learning_rate": 5.452480864142007e-06,
|
9952 |
+
"loss": 11.7249,
|
9953 |
+
"step": 1416
|
9954 |
+
},
|
9955 |
+
{
|
9956 |
+
"epoch": 0.046501324976002754,
|
9957 |
+
"grad_norm": 0.07723643630743027,
|
9958 |
+
"learning_rate": 5.3875807646401096e-06,
|
9959 |
+
"loss": 11.7269,
|
9960 |
+
"step": 1417
|
9961 |
+
},
|
9962 |
+
{
|
9963 |
+
"epoch": 0.04653414171910509,
|
9964 |
+
"grad_norm": 0.05070832744240761,
|
9965 |
+
"learning_rate": 5.323058535356907e-06,
|
9966 |
+
"loss": 11.7406,
|
9967 |
+
"step": 1418
|
9968 |
+
},
|
9969 |
+
{
|
9970 |
+
"epoch": 0.04656695846220742,
|
9971 |
+
"grad_norm": 0.05780310556292534,
|
9972 |
+
"learning_rate": 5.258914433986139e-06,
|
9973 |
+
"loss": 11.7367,
|
9974 |
+
"step": 1419
|
9975 |
+
},
|
9976 |
+
{
|
9977 |
+
"epoch": 0.04659977520530975,
|
9978 |
+
"grad_norm": 0.06705296784639359,
|
9979 |
+
"learning_rate": 5.19514871671134e-06,
|
9980 |
+
"loss": 11.7349,
|
9981 |
+
"step": 1420
|
9982 |
+
},
|
9983 |
+
{
|
9984 |
+
"epoch": 0.04663259194841208,
|
9985 |
+
"grad_norm": 0.1350971907377243,
|
9986 |
+
"learning_rate": 5.131761638204879e-06,
|
9987 |
+
"loss": 11.6973,
|
9988 |
+
"step": 1421
|
9989 |
+
},
|
9990 |
+
{
|
9991 |
+
"epoch": 0.046665408691514414,
|
9992 |
+
"grad_norm": 0.06943950057029724,
|
9993 |
+
"learning_rate": 5.068753451626839e-06,
|
9994 |
+
"loss": 11.7209,
|
9995 |
+
"step": 1422
|
9996 |
+
},
|
9997 |
+
{
|
9998 |
+
"epoch": 0.04669822543461674,
|
9999 |
+
"grad_norm": 0.05553746595978737,
|
10000 |
+
"learning_rate": 5.006124408624046e-06,
|
10001 |
+
"loss": 11.7332,
|
10002 |
+
"step": 1423
|
10003 |
+
},
|
10004 |
+
{
|
10005 |
+
"epoch": 0.04673104217771907,
|
10006 |
+
"grad_norm": 0.08447279036045074,
|
10007 |
+
"learning_rate": 4.9438747593291255e-06,
|
10008 |
+
"loss": 11.7195,
|
10009 |
+
"step": 1424
|
10010 |
+
},
|
10011 |
+
{
|
10012 |
+
"epoch": 0.046763858920821404,
|
10013 |
+
"grad_norm": 0.03792978823184967,
|
10014 |
+
"learning_rate": 4.882004752359426e-06,
|
10015 |
+
"loss": 11.736,
|
10016 |
+
"step": 1425
|
10017 |
+
},
|
10018 |
+
{
|
10019 |
+
"epoch": 0.04679667566392373,
|
10020 |
+
"grad_norm": 0.0918356254696846,
|
10021 |
+
"learning_rate": 4.82051463481602e-06,
|
10022 |
+
"loss": 11.7271,
|
10023 |
+
"step": 1426
|
10024 |
+
},
|
10025 |
+
{
|
10026 |
+
"epoch": 0.046829492407026066,
|
10027 |
+
"grad_norm": 0.043581221252679825,
|
10028 |
+
"learning_rate": 4.759404652282795e-06,
|
10029 |
+
"loss": 11.7369,
|
10030 |
+
"step": 1427
|
10031 |
+
},
|
10032 |
+
{
|
10033 |
+
"epoch": 0.046862309150128394,
|
10034 |
+
"grad_norm": 0.08697935193777084,
|
10035 |
+
"learning_rate": 4.698675048825429e-06,
|
10036 |
+
"loss": 11.7272,
|
10037 |
+
"step": 1428
|
10038 |
+
},
|
10039 |
+
{
|
10040 |
+
"epoch": 0.04689512589323073,
|
10041 |
+
"grad_norm": 0.0908113569021225,
|
10042 |
+
"learning_rate": 4.6383260669903836e-06,
|
10043 |
+
"loss": 11.7258,
|
10044 |
+
"step": 1429
|
10045 |
+
},
|
10046 |
+
{
|
10047 |
+
"epoch": 0.046927942636333056,
|
10048 |
+
"grad_norm": 0.08184558153152466,
|
10049 |
+
"learning_rate": 4.578357947803946e-06,
|
10050 |
+
"loss": 11.7312,
|
10051 |
+
"step": 1430
|
10052 |
+
},
|
10053 |
+
{
|
10054 |
+
"epoch": 0.04696075937943539,
|
10055 |
+
"grad_norm": 0.08386525511741638,
|
10056 |
+
"learning_rate": 4.518770930771366e-06,
|
10057 |
+
"loss": 11.7392,
|
10058 |
+
"step": 1431
|
10059 |
+
},
|
10060 |
+
{
|
10061 |
+
"epoch": 0.04699357612253772,
|
10062 |
+
"grad_norm": 0.06235777959227562,
|
10063 |
+
"learning_rate": 4.45956525387573e-06,
|
10064 |
+
"loss": 11.7362,
|
10065 |
+
"step": 1432
|
10066 |
+
},
|
10067 |
+
{
|
10068 |
+
"epoch": 0.04702639286564005,
|
10069 |
+
"grad_norm": 0.0771624892950058,
|
10070 |
+
"learning_rate": 4.400741153577137e-06,
|
10071 |
+
"loss": 11.73,
|
10072 |
+
"step": 1433
|
10073 |
+
},
|
10074 |
+
{
|
10075 |
+
"epoch": 0.04705920960874238,
|
10076 |
+
"grad_norm": 0.0810970589518547,
|
10077 |
+
"learning_rate": 4.342298864811745e-06,
|
10078 |
+
"loss": 11.7254,
|
10079 |
+
"step": 1434
|
10080 |
+
},
|
10081 |
+
{
|
10082 |
+
"epoch": 0.04709202635184471,
|
10083 |
+
"grad_norm": 0.07403910905122757,
|
10084 |
+
"learning_rate": 4.284238620990766e-06,
|
10085 |
+
"loss": 11.7266,
|
10086 |
+
"step": 1435
|
10087 |
+
},
|
10088 |
+
{
|
10089 |
+
"epoch": 0.04712484309494704,
|
10090 |
+
"grad_norm": 0.05222998186945915,
|
10091 |
+
"learning_rate": 4.226560653999567e-06,
|
10092 |
+
"loss": 11.7344,
|
10093 |
+
"step": 1436
|
10094 |
+
},
|
10095 |
+
{
|
10096 |
+
"epoch": 0.04715765983804937,
|
10097 |
+
"grad_norm": 0.04455263912677765,
|
10098 |
+
"learning_rate": 4.169265194196781e-06,
|
10099 |
+
"loss": 11.7393,
|
10100 |
+
"step": 1437
|
10101 |
+
},
|
10102 |
+
{
|
10103 |
+
"epoch": 0.047190476581151705,
|
10104 |
+
"grad_norm": 0.0955723226070404,
|
10105 |
+
"learning_rate": 4.112352470413328e-06,
|
10106 |
+
"loss": 11.7351,
|
10107 |
+
"step": 1438
|
10108 |
+
},
|
10109 |
+
{
|
10110 |
+
"epoch": 0.04722329332425403,
|
10111 |
+
"grad_norm": 0.0574662946164608,
|
10112 |
+
"learning_rate": 4.05582270995154e-06,
|
10113 |
+
"loss": 11.7389,
|
10114 |
+
"step": 1439
|
10115 |
+
},
|
10116 |
+
{
|
10117 |
+
"epoch": 0.04725611006735637,
|
10118 |
+
"grad_norm": 0.10476269572973251,
|
10119 |
+
"learning_rate": 3.999676138584241e-06,
|
10120 |
+
"loss": 11.7032,
|
10121 |
+
"step": 1440
|
10122 |
+
},
|
10123 |
+
{
|
10124 |
+
"epoch": 0.047288926810458695,
|
10125 |
+
"grad_norm": 0.05399872735142708,
|
10126 |
+
"learning_rate": 3.943912980553854e-06,
|
10127 |
+
"loss": 11.7315,
|
10128 |
+
"step": 1441
|
10129 |
+
},
|
10130 |
+
{
|
10131 |
+
"epoch": 0.04732174355356103,
|
10132 |
+
"grad_norm": 0.07692320644855499,
|
10133 |
+
"learning_rate": 3.8885334585714865e-06,
|
10134 |
+
"loss": 11.7241,
|
10135 |
+
"step": 1442
|
10136 |
+
},
|
10137 |
+
{
|
10138 |
+
"epoch": 0.04735456029666336,
|
10139 |
+
"grad_norm": 0.04610075056552887,
|
10140 |
+
"learning_rate": 3.833537793816022e-06,
|
10141 |
+
"loss": 11.7382,
|
10142 |
+
"step": 1443
|
10143 |
+
},
|
10144 |
+
{
|
10145 |
+
"epoch": 0.047387377039765685,
|
10146 |
+
"grad_norm": 0.11135191470384598,
|
10147 |
+
"learning_rate": 3.778926205933342e-06,
|
10148 |
+
"loss": 11.7139,
|
10149 |
+
"step": 1444
|
10150 |
+
},
|
10151 |
+
{
|
10152 |
+
"epoch": 0.04742019378286802,
|
10153 |
+
"grad_norm": 0.06027137488126755,
|
10154 |
+
"learning_rate": 3.724698913035296e-06,
|
10155 |
+
"loss": 11.7325,
|
10156 |
+
"step": 1445
|
10157 |
+
},
|
10158 |
+
{
|
10159 |
+
"epoch": 0.04745301052597035,
|
10160 |
+
"grad_norm": 0.04877974092960358,
|
10161 |
+
"learning_rate": 3.6708561316989297e-06,
|
10162 |
+
"loss": 11.7418,
|
10163 |
+
"step": 1446
|
10164 |
+
},
|
10165 |
+
{
|
10166 |
+
"epoch": 0.04748582726907268,
|
10167 |
+
"grad_norm": 0.08045418560504913,
|
10168 |
+
"learning_rate": 3.617398076965639e-06,
|
10169 |
+
"loss": 11.7362,
|
10170 |
+
"step": 1447
|
10171 |
+
},
|
10172 |
+
{
|
10173 |
+
"epoch": 0.04751864401217501,
|
10174 |
+
"grad_norm": 0.09478320926427841,
|
10175 |
+
"learning_rate": 3.5643249623402265e-06,
|
10176 |
+
"loss": 11.7176,
|
10177 |
+
"step": 1448
|
10178 |
+
},
|
10179 |
+
{
|
10180 |
+
"epoch": 0.047551460755277344,
|
10181 |
+
"grad_norm": 0.07850537449121475,
|
10182 |
+
"learning_rate": 3.51163699979008e-06,
|
10183 |
+
"loss": 11.7241,
|
10184 |
+
"step": 1449
|
10185 |
+
},
|
10186 |
+
{
|
10187 |
+
"epoch": 0.04758427749837967,
|
10188 |
+
"grad_norm": 0.09163854271173477,
|
10189 |
+
"learning_rate": 3.459334399744374e-06,
|
10190 |
+
"loss": 11.7146,
|
10191 |
+
"step": 1450
|
10192 |
+
},
|
10193 |
+
{
|
10194 |
+
"epoch": 0.04761709424148201,
|
10195 |
+
"grad_norm": 0.04774540290236473,
|
10196 |
+
"learning_rate": 3.40741737109318e-06,
|
10197 |
+
"loss": 11.7341,
|
10198 |
+
"step": 1451
|
10199 |
+
},
|
10200 |
+
{
|
10201 |
+
"epoch": 0.047649910984584334,
|
10202 |
+
"grad_norm": 0.10682554543018341,
|
10203 |
+
"learning_rate": 3.3558861211866465e-06,
|
10204 |
+
"loss": 11.7204,
|
10205 |
+
"step": 1452
|
10206 |
+
},
|
10207 |
+
{
|
10208 |
+
"epoch": 0.04768272772768667,
|
10209 |
+
"grad_norm": 0.06452088057994843,
|
10210 |
+
"learning_rate": 3.304740855834154e-06,
|
10211 |
+
"loss": 11.7339,
|
10212 |
+
"step": 1453
|
10213 |
+
},
|
10214 |
+
{
|
10215 |
+
"epoch": 0.047715544470789,
|
10216 |
+
"grad_norm": 0.09056764841079712,
|
10217 |
+
"learning_rate": 3.2539817793035277e-06,
|
10218 |
+
"loss": 11.7319,
|
10219 |
+
"step": 1454
|
10220 |
+
},
|
10221 |
+
{
|
10222 |
+
"epoch": 0.047748361213891324,
|
10223 |
+
"grad_norm": 0.06478995829820633,
|
10224 |
+
"learning_rate": 3.2036090943202147e-06,
|
10225 |
+
"loss": 11.7348,
|
10226 |
+
"step": 1455
|
10227 |
+
},
|
10228 |
+
{
|
10229 |
+
"epoch": 0.04778117795699366,
|
10230 |
+
"grad_norm": 0.07074952870607376,
|
10231 |
+
"learning_rate": 3.1536230020664417e-06,
|
10232 |
+
"loss": 11.7437,
|
10233 |
+
"step": 1456
|
10234 |
+
},
|
10235 |
+
{
|
10236 |
+
"epoch": 0.04781399470009599,
|
10237 |
+
"grad_norm": 0.05798543244600296,
|
10238 |
+
"learning_rate": 3.1040237021804033e-06,
|
10239 |
+
"loss": 11.7219,
|
10240 |
+
"step": 1457
|
10241 |
+
},
|
10242 |
+
{
|
10243 |
+
"epoch": 0.04784681144319832,
|
10244 |
+
"grad_norm": 0.07001690566539764,
|
10245 |
+
"learning_rate": 3.0548113927555633e-06,
|
10246 |
+
"loss": 11.7317,
|
10247 |
+
"step": 1458
|
10248 |
+
},
|
10249 |
+
{
|
10250 |
+
"epoch": 0.04787962818630065,
|
10251 |
+
"grad_norm": 0.046797532588243484,
|
10252 |
+
"learning_rate": 3.0059862703397446e-06,
|
10253 |
+
"loss": 11.7311,
|
10254 |
+
"step": 1459
|
10255 |
+
},
|
10256 |
+
{
|
10257 |
+
"epoch": 0.047912444929402984,
|
10258 |
+
"grad_norm": 0.05379636585712433,
|
10259 |
+
"learning_rate": 2.9575485299343843e-06,
|
10260 |
+
"loss": 11.7456,
|
10261 |
+
"step": 1460
|
10262 |
+
},
|
10263 |
+
{
|
10264 |
+
"epoch": 0.04794526167250531,
|
10265 |
+
"grad_norm": 0.06763121485710144,
|
10266 |
+
"learning_rate": 2.9094983649938236e-06,
|
10267 |
+
"loss": 11.74,
|
10268 |
+
"step": 1461
|
10269 |
+
},
|
10270 |
+
{
|
10271 |
+
"epoch": 0.047978078415607646,
|
10272 |
+
"grad_norm": 0.10224005579948425,
|
10273 |
+
"learning_rate": 2.861835967424409e-06,
|
10274 |
+
"loss": 11.7138,
|
10275 |
+
"step": 1462
|
10276 |
+
},
|
10277 |
+
{
|
10278 |
+
"epoch": 0.048010895158709974,
|
10279 |
+
"grad_norm": 0.06867843866348267,
|
10280 |
+
"learning_rate": 2.8145615275838145e-06,
|
10281 |
+
"loss": 11.7332,
|
10282 |
+
"step": 1463
|
10283 |
+
},
|
10284 |
+
{
|
10285 |
+
"epoch": 0.0480437119018123,
|
10286 |
+
"grad_norm": 0.08831151574850082,
|
10287 |
+
"learning_rate": 2.767675234280298e-06,
|
10288 |
+
"loss": 11.7225,
|
10289 |
+
"step": 1464
|
10290 |
+
},
|
10291 |
+
{
|
10292 |
+
"epoch": 0.048076528644914636,
|
10293 |
+
"grad_norm": 0.06343547254800797,
|
10294 |
+
"learning_rate": 2.7211772747718467e-06,
|
10295 |
+
"loss": 11.735,
|
10296 |
+
"step": 1465
|
10297 |
+
},
|
10298 |
+
{
|
10299 |
+
"epoch": 0.048109345388016964,
|
10300 |
+
"grad_norm": 0.1038689985871315,
|
10301 |
+
"learning_rate": 2.67506783476551e-06,
|
10302 |
+
"loss": 11.7188,
|
10303 |
+
"step": 1466
|
10304 |
+
},
|
10305 |
+
{
|
10306 |
+
"epoch": 0.0481421621311193,
|
10307 |
+
"grad_norm": 0.08483647555112839,
|
10308 |
+
"learning_rate": 2.629347098416679e-06,
|
10309 |
+
"loss": 11.7038,
|
10310 |
+
"step": 1467
|
10311 |
+
},
|
10312 |
+
{
|
10313 |
+
"epoch": 0.048174978874221626,
|
10314 |
+
"grad_norm": 0.05848877131938934,
|
10315 |
+
"learning_rate": 2.5840152483282752e-06,
|
10316 |
+
"loss": 11.7426,
|
10317 |
+
"step": 1468
|
10318 |
+
},
|
10319 |
+
{
|
10320 |
+
"epoch": 0.04820779561732396,
|
10321 |
+
"grad_norm": 0.15112918615341187,
|
10322 |
+
"learning_rate": 2.539072465550052e-06,
|
10323 |
+
"loss": 11.7037,
|
10324 |
+
"step": 1469
|
10325 |
+
},
|
10326 |
+
{
|
10327 |
+
"epoch": 0.04824061236042629,
|
10328 |
+
"grad_norm": 0.05076136067509651,
|
10329 |
+
"learning_rate": 2.4945189295778717e-06,
|
10330 |
+
"loss": 11.7375,
|
10331 |
+
"step": 1470
|
10332 |
+
},
|
10333 |
+
{
|
10334 |
+
"epoch": 0.04827342910352862,
|
10335 |
+
"grad_norm": 0.06423319876194,
|
10336 |
+
"learning_rate": 2.45035481835304e-06,
|
10337 |
+
"loss": 11.7372,
|
10338 |
+
"step": 1471
|
10339 |
+
},
|
10340 |
+
{
|
10341 |
+
"epoch": 0.04830624584663095,
|
10342 |
+
"grad_norm": 0.03969534486532211,
|
10343 |
+
"learning_rate": 2.406580308261508e-06,
|
10344 |
+
"loss": 11.7395,
|
10345 |
+
"step": 1472
|
10346 |
+
},
|
10347 |
+
{
|
10348 |
+
"epoch": 0.048339062589733285,
|
10349 |
+
"grad_norm": 0.06721484661102295,
|
10350 |
+
"learning_rate": 2.3631955741331925e-06,
|
10351 |
+
"loss": 11.7336,
|
10352 |
+
"step": 1473
|
10353 |
+
},
|
10354 |
+
{
|
10355 |
+
"epoch": 0.04837187933283561,
|
10356 |
+
"grad_norm": 0.0560169443488121,
|
10357 |
+
"learning_rate": 2.3202007892413447e-06,
|
10358 |
+
"loss": 11.7403,
|
10359 |
+
"step": 1474
|
10360 |
+
},
|
10361 |
+
{
|
10362 |
+
"epoch": 0.04840469607593794,
|
10363 |
+
"grad_norm": 0.13154977560043335,
|
10364 |
+
"learning_rate": 2.277596125301773e-06,
|
10365 |
+
"loss": 11.712,
|
10366 |
+
"step": 1475
|
10367 |
+
},
|
10368 |
+
{
|
10369 |
+
"epoch": 0.048437512819040275,
|
10370 |
+
"grad_norm": 0.0506259985268116,
|
10371 |
+
"learning_rate": 2.2353817524721985e-06,
|
10372 |
+
"loss": 11.7417,
|
10373 |
+
"step": 1476
|
10374 |
+
},
|
10375 |
+
{
|
10376 |
+
"epoch": 0.0484703295621426,
|
10377 |
+
"grad_norm": 0.08203129470348358,
|
10378 |
+
"learning_rate": 2.1935578393515675e-06,
|
10379 |
+
"loss": 11.7397,
|
10380 |
+
"step": 1477
|
10381 |
+
},
|
10382 |
+
{
|
10383 |
+
"epoch": 0.04850314630524494,
|
10384 |
+
"grad_norm": 0.07745468616485596,
|
10385 |
+
"learning_rate": 2.152124552979373e-06,
|
10386 |
+
"loss": 11.7378,
|
10387 |
+
"step": 1478
|
10388 |
+
},
|
10389 |
+
{
|
10390 |
+
"epoch": 0.048535963048347265,
|
10391 |
+
"grad_norm": 0.07438670098781586,
|
10392 |
+
"learning_rate": 2.1110820588350232e-06,
|
10393 |
+
"loss": 11.739,
|
10394 |
+
"step": 1479
|
10395 |
+
},
|
10396 |
+
{
|
10397 |
+
"epoch": 0.0485687797914496,
|
10398 |
+
"grad_norm": 0.11283761262893677,
|
10399 |
+
"learning_rate": 2.0704305208370857e-06,
|
10400 |
+
"loss": 11.7308,
|
10401 |
+
"step": 1480
|
10402 |
+
},
|
10403 |
+
{
|
10404 |
+
"epoch": 0.04860159653455193,
|
10405 |
+
"grad_norm": 0.05734388902783394,
|
10406 |
+
"learning_rate": 2.0301701013427764e-06,
|
10407 |
+
"loss": 11.7415,
|
10408 |
+
"step": 1481
|
10409 |
+
},
|
10410 |
+
{
|
10411 |
+
"epoch": 0.04863441327765426,
|
10412 |
+
"grad_norm": 0.038278285413980484,
|
10413 |
+
"learning_rate": 1.9903009611471955e-06,
|
10414 |
+
"loss": 11.7417,
|
10415 |
+
"step": 1482
|
10416 |
+
},
|
10417 |
+
{
|
10418 |
+
"epoch": 0.04866723002075659,
|
10419 |
+
"grad_norm": 0.09572451561689377,
|
10420 |
+
"learning_rate": 1.9508232594827146e-06,
|
10421 |
+
"loss": 11.7196,
|
10422 |
+
"step": 1483
|
10423 |
+
},
|
10424 |
+
{
|
10425 |
+
"epoch": 0.04870004676385892,
|
10426 |
+
"grad_norm": 0.06419434398412704,
|
10427 |
+
"learning_rate": 1.9117371540183558e-06,
|
10428 |
+
"loss": 11.735,
|
10429 |
+
"step": 1484
|
10430 |
+
},
|
10431 |
+
{
|
10432 |
+
"epoch": 0.04873286350696125,
|
10433 |
+
"grad_norm": 0.054200414568185806,
|
10434 |
+
"learning_rate": 1.8730428008591594e-06,
|
10435 |
+
"loss": 11.7387,
|
10436 |
+
"step": 1485
|
10437 |
+
},
|
10438 |
+
{
|
10439 |
+
"epoch": 0.04876568025006358,
|
10440 |
+
"grad_norm": 0.053995657712221146,
|
10441 |
+
"learning_rate": 1.83474035454555e-06,
|
10442 |
+
"loss": 11.7326,
|
10443 |
+
"step": 1486
|
10444 |
+
},
|
10445 |
+
{
|
10446 |
+
"epoch": 0.048798496993165914,
|
10447 |
+
"grad_norm": 0.08638813346624374,
|
10448 |
+
"learning_rate": 1.7968299680527157e-06,
|
10449 |
+
"loss": 11.7041,
|
10450 |
+
"step": 1487
|
10451 |
+
},
|
10452 |
+
{
|
10453 |
+
"epoch": 0.04883131373626824,
|
10454 |
+
"grad_norm": 0.04241371527314186,
|
10455 |
+
"learning_rate": 1.7593117927900527e-06,
|
10456 |
+
"loss": 11.7423,
|
10457 |
+
"step": 1488
|
10458 |
+
},
|
10459 |
+
{
|
10460 |
+
"epoch": 0.04886413047937058,
|
10461 |
+
"grad_norm": 0.04665112495422363,
|
10462 |
+
"learning_rate": 1.7221859786004546e-06,
|
10463 |
+
"loss": 11.7378,
|
10464 |
+
"step": 1489
|
10465 |
+
},
|
10466 |
+
{
|
10467 |
+
"epoch": 0.048896947222472904,
|
10468 |
+
"grad_norm": 0.07939332723617554,
|
10469 |
+
"learning_rate": 1.6854526737597908e-06,
|
10470 |
+
"loss": 11.7236,
|
10471 |
+
"step": 1490
|
10472 |
+
},
|
10473 |
+
{
|
10474 |
+
"epoch": 0.04892976396557524,
|
10475 |
+
"grad_norm": 0.06212349608540535,
|
10476 |
+
"learning_rate": 1.6491120249763291e-06,
|
10477 |
+
"loss": 11.7396,
|
10478 |
+
"step": 1491
|
10479 |
+
},
|
10480 |
+
{
|
10481 |
+
"epoch": 0.04896258070867757,
|
10482 |
+
"grad_norm": 0.06920646131038666,
|
10483 |
+
"learning_rate": 1.6131641773900807e-06,
|
10484 |
+
"loss": 11.7405,
|
10485 |
+
"step": 1492
|
10486 |
+
},
|
10487 |
+
{
|
10488 |
+
"epoch": 0.0489953974517799,
|
10489 |
+
"grad_norm": 0.061681117862463,
|
10490 |
+
"learning_rate": 1.577609274572267e-06,
|
10491 |
+
"loss": 11.7296,
|
10492 |
+
"step": 1493
|
10493 |
+
},
|
10494 |
+
{
|
10495 |
+
"epoch": 0.04902821419488223,
|
10496 |
+
"grad_norm": 0.05474342405796051,
|
10497 |
+
"learning_rate": 1.5424474585247539e-06,
|
10498 |
+
"loss": 11.7434,
|
10499 |
+
"step": 1494
|
10500 |
+
},
|
10501 |
+
{
|
10502 |
+
"epoch": 0.04906103093798456,
|
10503 |
+
"grad_norm": 0.07861264795064926,
|
10504 |
+
"learning_rate": 1.507678869679463e-06,
|
10505 |
+
"loss": 11.7353,
|
10506 |
+
"step": 1495
|
10507 |
+
},
|
10508 |
+
{
|
10509 |
+
"epoch": 0.04909384768108689,
|
10510 |
+
"grad_norm": 0.10963724553585052,
|
10511 |
+
"learning_rate": 1.4733036468977946e-06,
|
10512 |
+
"loss": 11.723,
|
10513 |
+
"step": 1496
|
10514 |
+
},
|
10515 |
+
{
|
10516 |
+
"epoch": 0.04912666442418922,
|
10517 |
+
"grad_norm": 0.06728437542915344,
|
10518 |
+
"learning_rate": 1.4393219274700941e-06,
|
10519 |
+
"loss": 11.7267,
|
10520 |
+
"step": 1497
|
10521 |
+
},
|
10522 |
+
{
|
10523 |
+
"epoch": 0.049159481167291554,
|
10524 |
+
"grad_norm": 0.08786192536354065,
|
10525 |
+
"learning_rate": 1.4057338471151427e-06,
|
10526 |
+
"loss": 11.7353,
|
10527 |
+
"step": 1498
|
10528 |
+
},
|
10529 |
+
{
|
10530 |
+
"epoch": 0.04919229791039388,
|
10531 |
+
"grad_norm": 0.04205002263188362,
|
10532 |
+
"learning_rate": 1.3725395399795448e-06,
|
10533 |
+
"loss": 11.7373,
|
10534 |
+
"step": 1499
|
10535 |
+
},
|
10536 |
+
{
|
10537 |
+
"epoch": 0.049225114653496216,
|
10538 |
+
"grad_norm": 0.040331657975912094,
|
10539 |
+
"learning_rate": 1.3397391386372082e-06,
|
10540 |
+
"loss": 11.7415,
|
10541 |
+
"step": 1500
|
10542 |
+
},
|
10543 |
+
{
|
10544 |
+
"epoch": 0.049257931396598544,
|
10545 |
+
"grad_norm": 0.06028404086828232,
|
10546 |
+
"learning_rate": 1.3073327740888763e-06,
|
10547 |
+
"loss": 11.7386,
|
10548 |
+
"step": 1501
|
10549 |
+
},
|
10550 |
+
{
|
10551 |
+
"epoch": 0.04929074813970088,
|
10552 |
+
"grad_norm": 0.05279241502285004,
|
10553 |
+
"learning_rate": 1.2753205757615183e-06,
|
10554 |
+
"loss": 11.7317,
|
10555 |
+
"step": 1502
|
10556 |
+
},
|
10557 |
+
{
|
10558 |
+
"epoch": 0.049323564882803206,
|
10559 |
+
"grad_norm": 0.09567982703447342,
|
10560 |
+
"learning_rate": 1.2437026715078626e-06,
|
10561 |
+
"loss": 11.7394,
|
10562 |
+
"step": 1503
|
10563 |
+
},
|
10564 |
+
{
|
10565 |
+
"epoch": 0.04935638162590554,
|
10566 |
+
"grad_norm": 0.057669028639793396,
|
10567 |
+
"learning_rate": 1.212479187605897e-06,
|
10568 |
+
"loss": 11.7378,
|
10569 |
+
"step": 1504
|
10570 |
+
},
|
10571 |
+
{
|
10572 |
+
"epoch": 0.04938919836900787,
|
10573 |
+
"grad_norm": 0.06009034439921379,
|
10574 |
+
"learning_rate": 1.1816502487583147e-06,
|
10575 |
+
"loss": 11.7363,
|
10576 |
+
"step": 1505
|
10577 |
+
},
|
10578 |
+
{
|
10579 |
+
"epoch": 0.049422015112110196,
|
10580 |
+
"grad_norm": 0.0584440678358078,
|
10581 |
+
"learning_rate": 1.1512159780920683e-06,
|
10582 |
+
"loss": 11.7354,
|
10583 |
+
"step": 1506
|
10584 |
+
},
|
10585 |
+
{
|
10586 |
+
"epoch": 0.04945483185521253,
|
10587 |
+
"grad_norm": 0.07240193337202072,
|
10588 |
+
"learning_rate": 1.1211764971578276e-06,
|
10589 |
+
"loss": 11.7171,
|
10590 |
+
"step": 1507
|
10591 |
+
},
|
10592 |
+
{
|
10593 |
+
"epoch": 0.04948764859831486,
|
10594 |
+
"grad_norm": 0.04997074604034424,
|
10595 |
+
"learning_rate": 1.0915319259295565e-06,
|
10596 |
+
"loss": 11.7369,
|
10597 |
+
"step": 1508
|
10598 |
+
},
|
10599 |
+
{
|
10600 |
+
"epoch": 0.04952046534141719,
|
10601 |
+
"grad_norm": 0.04667845368385315,
|
10602 |
+
"learning_rate": 1.0622823828039696e-06,
|
10603 |
+
"loss": 11.7321,
|
10604 |
+
"step": 1509
|
10605 |
+
},
|
10606 |
+
{
|
10607 |
+
"epoch": 0.04955328208451952,
|
10608 |
+
"grad_norm": 0.10659927874803543,
|
10609 |
+
"learning_rate": 1.0334279846001106e-06,
|
10610 |
+
"loss": 11.7124,
|
10611 |
+
"step": 1510
|
10612 |
+
},
|
10613 |
+
{
|
10614 |
+
"epoch": 0.049586098827621855,
|
10615 |
+
"grad_norm": 0.062110841274261475,
|
10616 |
+
"learning_rate": 1.0049688465588291e-06,
|
10617 |
+
"loss": 11.7383,
|
10618 |
+
"step": 1511
|
10619 |
+
},
|
10620 |
+
{
|
10621 |
+
"epoch": 0.04961891557072418,
|
10622 |
+
"grad_norm": 0.05052445828914642,
|
10623 |
+
"learning_rate": 9.769050823424054e-07,
|
10624 |
+
"loss": 11.7397,
|
10625 |
+
"step": 1512
|
10626 |
+
},
|
10627 |
+
{
|
10628 |
+
"epoch": 0.04965173231382652,
|
10629 |
+
"grad_norm": 0.07188229262828827,
|
10630 |
+
"learning_rate": 9.492368040340039e-07,
|
10631 |
+
"loss": 11.7258,
|
10632 |
+
"step": 1513
|
10633 |
+
},
|
10634 |
+
{
|
10635 |
+
"epoch": 0.049684549056928845,
|
10636 |
+
"grad_norm": 0.10027482360601425,
|
10637 |
+
"learning_rate": 9.219641221372755e-07,
|
10638 |
+
"loss": 11.7237,
|
10639 |
+
"step": 1514
|
10640 |
+
},
|
10641 |
+
{
|
10642 |
+
"epoch": 0.04971736580003117,
|
10643 |
+
"grad_norm": 0.06642589718103409,
|
10644 |
+
"learning_rate": 8.950871455759346e-07,
|
10645 |
+
"loss": 11.7267,
|
10646 |
+
"step": 1515
|
10647 |
+
},
|
10648 |
+
{
|
10649 |
+
"epoch": 0.04975018254313351,
|
10650 |
+
"grad_norm": 0.047275640070438385,
|
10651 |
+
"learning_rate": 8.686059816932602e-07,
|
10652 |
+
"loss": 11.7409,
|
10653 |
+
"step": 1516
|
10654 |
+
},
|
10655 |
+
{
|
10656 |
+
"epoch": 0.049782999286235835,
|
10657 |
+
"grad_norm": 0.11810939759016037,
|
10658 |
+
"learning_rate": 8.425207362517285e-07,
|
10659 |
+
"loss": 11.7094,
|
10660 |
+
"step": 1517
|
10661 |
+
},
|
10662 |
+
{
|
10663 |
+
"epoch": 0.04981581602933817,
|
10664 |
+
"grad_norm": 0.05862468108534813,
|
10665 |
+
"learning_rate": 8.168315134325699e-07,
|
10666 |
+
"loss": 11.7351,
|
10667 |
+
"step": 1518
|
10668 |
+
},
|
10669 |
+
{
|
10670 |
+
"epoch": 0.0498486327724405,
|
10671 |
+
"grad_norm": 0.04006539657711983,
|
10672 |
+
"learning_rate": 7.915384158353245e-07,
|
10673 |
+
"loss": 11.7427,
|
10674 |
+
"step": 1519
|
10675 |
+
},
|
10676 |
+
{
|
10677 |
+
"epoch": 0.04988144951554283,
|
10678 |
+
"grad_norm": 0.04811315983533859,
|
10679 |
+
"learning_rate": 7.666415444774866e-07,
|
10680 |
+
"loss": 11.742,
|
10681 |
+
"step": 1520
|
10682 |
+
},
|
10683 |
+
{
|
10684 |
+
"epoch": 0.04991426625864516,
|
10685 |
+
"grad_norm": 0.0765606239438057,
|
10686 |
+
"learning_rate": 7.421409987940608e-07,
|
10687 |
+
"loss": 11.7264,
|
10688 |
+
"step": 1521
|
10689 |
+
},
|
10690 |
+
{
|
10691 |
+
"epoch": 0.049947083001747494,
|
10692 |
+
"grad_norm": 0.05792330205440521,
|
10693 |
+
"learning_rate": 7.180368766371515e-07,
|
10694 |
+
"loss": 11.7378,
|
10695 |
+
"step": 1522
|
10696 |
+
},
|
10697 |
+
{
|
10698 |
+
"epoch": 0.04997989974484982,
|
10699 |
+
"grad_norm": 0.05421476811170578,
|
10700 |
+
"learning_rate": 6.943292742756513e-07,
|
10701 |
+
"loss": 11.7333,
|
10702 |
+
"step": 1523
|
10703 |
+
},
|
10704 |
+
{
|
10705 |
+
"epoch": 0.05001271648795216,
|
10706 |
+
"grad_norm": 0.06898388266563416,
|
10707 |
+
"learning_rate": 6.710182863947534e-07,
|
10708 |
+
"loss": 11.7318,
|
10709 |
+
"step": 1524
|
10710 |
+
},
|
10711 |
+
{
|
10712 |
+
"epoch": 0.050045533231054484,
|
10713 |
+
"grad_norm": 0.0885835811495781,
|
10714 |
+
"learning_rate": 6.481040060956511e-07,
|
10715 |
+
"loss": 11.7357,
|
10716 |
+
"step": 1525
|
10717 |
+
},
|
10718 |
+
{
|
10719 |
+
"epoch": 0.05007834997415681,
|
10720 |
+
"grad_norm": 0.07863713800907135,
|
10721 |
+
"learning_rate": 6.255865248951276e-07,
|
10722 |
+
"loss": 11.7292,
|
10723 |
+
"step": 1526
|
10724 |
+
},
|
10725 |
+
{
|
10726 |
+
"epoch": 0.05011116671725915,
|
10727 |
+
"grad_norm": 0.03799126669764519,
|
10728 |
+
"learning_rate": 6.034659327251779e-07,
|
10729 |
+
"loss": 11.7395,
|
10730 |
+
"step": 1527
|
10731 |
+
},
|
10732 |
+
{
|
10733 |
+
"epoch": 0.050143983460361474,
|
10734 |
+
"grad_norm": 0.07774347066879272,
|
10735 |
+
"learning_rate": 5.817423179327098e-07,
|
10736 |
+
"loss": 11.7304,
|
10737 |
+
"step": 1528
|
10738 |
+
},
|
10739 |
+
{
|
10740 |
+
"epoch": 0.05017680020346381,
|
10741 |
+
"grad_norm": 0.07088611274957657,
|
10742 |
+
"learning_rate": 5.604157672791211e-07,
|
10743 |
+
"loss": 11.7436,
|
10744 |
+
"step": 1529
|
10745 |
+
},
|
10746 |
+
{
|
10747 |
+
"epoch": 0.05020961694656614,
|
10748 |
+
"grad_norm": 0.08295155316591263,
|
10749 |
+
"learning_rate": 5.394863659400228e-07,
|
10750 |
+
"loss": 11.7144,
|
10751 |
+
"step": 1530
|
10752 |
+
},
|
10753 |
+
{
|
10754 |
+
"epoch": 0.05024243368966847,
|
10755 |
+
"grad_norm": 0.04190947115421295,
|
10756 |
+
"learning_rate": 5.189541975048173e-07,
|
10757 |
+
"loss": 11.7431,
|
10758 |
+
"step": 1531
|
10759 |
+
},
|
10760 |
+
{
|
10761 |
+
"epoch": 0.0502752504327708,
|
10762 |
+
"grad_norm": 0.061545856297016144,
|
10763 |
+
"learning_rate": 4.988193439764311e-07,
|
10764 |
+
"loss": 11.7275,
|
10765 |
+
"step": 1532
|
10766 |
+
},
|
10767 |
+
{
|
10768 |
+
"epoch": 0.050308067175873133,
|
10769 |
+
"grad_norm": 0.10915189236402512,
|
10770 |
+
"learning_rate": 4.7908188577096e-07,
|
10771 |
+
"loss": 11.709,
|
10772 |
+
"step": 1533
|
10773 |
+
},
|
10774 |
+
{
|
10775 |
+
"epoch": 0.05034088391897546,
|
10776 |
+
"grad_norm": 0.04113264009356499,
|
10777 |
+
"learning_rate": 4.5974190171735874e-07,
|
10778 |
+
"loss": 11.7348,
|
10779 |
+
"step": 1534
|
10780 |
+
},
|
10781 |
+
{
|
10782 |
+
"epoch": 0.05037370066207779,
|
10783 |
+
"grad_norm": 0.0703873485326767,
|
10784 |
+
"learning_rate": 4.407994690571182e-07,
|
10785 |
+
"loss": 11.7437,
|
10786 |
+
"step": 1535
|
10787 |
+
},
|
10788 |
+
{
|
10789 |
+
"epoch": 0.050406517405180123,
|
10790 |
+
"grad_norm": 0.054062239825725555,
|
10791 |
+
"learning_rate": 4.2225466344394393e-07,
|
10792 |
+
"loss": 11.7384,
|
10793 |
+
"step": 1536
|
10794 |
+
},
|
10795 |
+
{
|
10796 |
+
"epoch": 0.05043933414828245,
|
10797 |
+
"grad_norm": 0.08565425872802734,
|
10798 |
+
"learning_rate": 4.0410755894348953e-07,
|
10799 |
+
"loss": 11.7187,
|
10800 |
+
"step": 1537
|
10801 |
+
},
|
10802 |
+
{
|
10803 |
+
"epoch": 0.050472150891384786,
|
10804 |
+
"grad_norm": 0.054301124066114426,
|
10805 |
+
"learning_rate": 3.8635822803303466e-07,
|
10806 |
+
"loss": 11.738,
|
10807 |
+
"step": 1538
|
10808 |
+
},
|
10809 |
+
{
|
10810 |
+
"epoch": 0.05050496763448711,
|
10811 |
+
"grad_norm": 0.09632788598537445,
|
10812 |
+
"learning_rate": 3.6900674160118553e-07,
|
10813 |
+
"loss": 11.727,
|
10814 |
+
"step": 1539
|
10815 |
+
},
|
10816 |
+
{
|
10817 |
+
"epoch": 0.05053778437758945,
|
10818 |
+
"grad_norm": 0.049743227660655975,
|
10819 |
+
"learning_rate": 3.520531689476192e-07,
|
10820 |
+
"loss": 11.7404,
|
10821 |
+
"step": 1540
|
10822 |
+
},
|
10823 |
+
{
|
10824 |
+
"epoch": 0.050570601120691776,
|
10825 |
+
"grad_norm": 0.06877107918262482,
|
10826 |
+
"learning_rate": 3.3549757778279514e-07,
|
10827 |
+
"loss": 11.7455,
|
10828 |
+
"step": 1541
|
10829 |
+
},
|
10830 |
+
{
|
10831 |
+
"epoch": 0.05060341786379411,
|
10832 |
+
"grad_norm": 0.053460944443941116,
|
10833 |
+
"learning_rate": 3.1934003422767755e-07,
|
10834 |
+
"loss": 11.7337,
|
10835 |
+
"step": 1542
|
10836 |
+
},
|
10837 |
+
{
|
10838 |
+
"epoch": 0.05063623460689644,
|
10839 |
+
"grad_norm": 0.06517241150140762,
|
10840 |
+
"learning_rate": 3.0358060281350244e-07,
|
10841 |
+
"loss": 11.7388,
|
10842 |
+
"step": 1543
|
10843 |
+
},
|
10844 |
+
{
|
10845 |
+
"epoch": 0.05066905134999877,
|
10846 |
+
"grad_norm": 0.0533582866191864,
|
10847 |
+
"learning_rate": 2.8821934648144424e-07,
|
10848 |
+
"loss": 11.7394,
|
10849 |
+
"step": 1544
|
10850 |
+
},
|
10851 |
+
{
|
10852 |
+
"epoch": 0.0507018680931011,
|
10853 |
+
"grad_norm": 0.10174456983804703,
|
10854 |
+
"learning_rate": 2.732563265824717e-07,
|
10855 |
+
"loss": 11.7269,
|
10856 |
+
"step": 1545
|
10857 |
+
},
|
10858 |
+
{
|
10859 |
+
"epoch": 0.05073468483620343,
|
10860 |
+
"grad_norm": 0.05859356373548508,
|
10861 |
+
"learning_rate": 2.586916028770259e-07,
|
10862 |
+
"loss": 11.7298,
|
10863 |
+
"step": 1546
|
10864 |
+
},
|
10865 |
+
{
|
10866 |
+
"epoch": 0.05076750157930576,
|
10867 |
+
"grad_norm": 0.05709284543991089,
|
10868 |
+
"learning_rate": 2.4452523353477584e-07,
|
10869 |
+
"loss": 11.7286,
|
10870 |
+
"step": 1547
|
10871 |
+
},
|
10872 |
+
{
|
10873 |
+
"epoch": 0.05080031832240809,
|
10874 |
+
"grad_norm": 0.061257701367139816,
|
10875 |
+
"learning_rate": 2.3075727513446333e-07,
|
10876 |
+
"loss": 11.738,
|
10877 |
+
"step": 1548
|
10878 |
+
},
|
10879 |
+
{
|
10880 |
+
"epoch": 0.050833135065510425,
|
10881 |
+
"grad_norm": 0.0900387167930603,
|
10882 |
+
"learning_rate": 2.1738778266356952e-07,
|
10883 |
+
"loss": 11.7263,
|
10884 |
+
"step": 1549
|
10885 |
+
},
|
10886 |
+
{
|
10887 |
+
"epoch": 0.05086595180861275,
|
10888 |
+
"grad_norm": 0.06343595683574677,
|
10889 |
+
"learning_rate": 2.0441680951821528e-07,
|
10890 |
+
"loss": 11.7325,
|
10891 |
+
"step": 1550
|
10892 |
+
},
|
10893 |
+
{
|
10894 |
+
"epoch": 0.05089876855171509,
|
10895 |
+
"grad_norm": 0.07590248435735703,
|
10896 |
+
"learning_rate": 1.9184440750281697e-07,
|
10897 |
+
"loss": 11.7364,
|
10898 |
+
"step": 1551
|
10899 |
+
},
|
10900 |
+
{
|
10901 |
+
"epoch": 0.050931585294817415,
|
10902 |
+
"grad_norm": 0.08634886890649796,
|
10903 |
+
"learning_rate": 1.7967062683001967e-07,
|
10904 |
+
"loss": 11.7149,
|
10905 |
+
"step": 1552
|
10906 |
+
},
|
10907 |
+
{
|
10908 |
+
"epoch": 0.05096440203791975,
|
10909 |
+
"grad_norm": 0.07112760096788406,
|
10910 |
+
"learning_rate": 1.6789551612035325e-07,
|
10911 |
+
"loss": 11.7376,
|
10912 |
+
"step": 1553
|
10913 |
+
},
|
10914 |
+
{
|
10915 |
+
"epoch": 0.05099721878102208,
|
10916 |
+
"grad_norm": 0.07765110582113266,
|
10917 |
+
"learning_rate": 1.565191224021656e-07,
|
10918 |
+
"loss": 11.718,
|
10919 |
+
"step": 1554
|
10920 |
+
},
|
10921 |
+
{
|
10922 |
+
"epoch": 0.051030035524124405,
|
10923 |
+
"grad_norm": 0.03257081285119057,
|
10924 |
+
"learning_rate": 1.4554149111135617e-07,
|
10925 |
+
"loss": 11.7417,
|
10926 |
+
"step": 1555
|
10927 |
+
},
|
10928 |
+
{
|
10929 |
+
"epoch": 0.05106285226722674,
|
10930 |
+
"grad_norm": 0.04912369325757027,
|
10931 |
+
"learning_rate": 1.3496266609119844e-07,
|
10932 |
+
"loss": 11.7375,
|
10933 |
+
"step": 1556
|
10934 |
+
},
|
10935 |
+
{
|
10936 |
+
"epoch": 0.05109566901032907,
|
10937 |
+
"grad_norm": 0.033625099807977676,
|
10938 |
+
"learning_rate": 1.2478268959221772e-07,
|
10939 |
+
"loss": 11.7395,
|
10940 |
+
"step": 1557
|
10941 |
+
},
|
10942 |
+
{
|
10943 |
+
"epoch": 0.0511284857534314,
|
10944 |
+
"grad_norm": 0.03911784291267395,
|
10945 |
+
"learning_rate": 1.150016022719691e-07,
|
10946 |
+
"loss": 11.7364,
|
10947 |
+
"step": 1558
|
10948 |
+
},
|
10949 |
+
{
|
10950 |
+
"epoch": 0.05116130249653373,
|
10951 |
+
"grad_norm": 0.15983366966247559,
|
10952 |
+
"learning_rate": 1.0561944319489314e-07,
|
10953 |
+
"loss": 11.6922,
|
10954 |
+
"step": 1559
|
10955 |
+
},
|
10956 |
+
{
|
10957 |
+
"epoch": 0.051194119239636064,
|
10958 |
+
"grad_norm": 0.04815153405070305,
|
10959 |
+
"learning_rate": 9.663624983217156e-08,
|
10960 |
+
"loss": 11.7401,
|
10961 |
+
"step": 1560
|
10962 |
+
},
|
10963 |
+
{
|
10964 |
+
"epoch": 0.05122693598273839,
|
10965 |
+
"grad_norm": 0.09805814176797867,
|
10966 |
+
"learning_rate": 8.80520580615496e-08,
|
10967 |
+
"loss": 11.7174,
|
10968 |
+
"step": 1561
|
10969 |
+
},
|
10970 |
+
{
|
10971 |
+
"epoch": 0.051259752725840727,
|
10972 |
+
"grad_norm": 0.06710517406463623,
|
10973 |
+
"learning_rate": 7.986690216722492e-08,
|
10974 |
+
"loss": 11.7378,
|
10975 |
+
"step": 1562
|
10976 |
+
},
|
10977 |
+
{
|
10978 |
+
"epoch": 0.051292569468943054,
|
10979 |
+
"grad_norm": 0.11421916633844376,
|
10980 |
+
"learning_rate": 7.208081483970341e-08,
|
10981 |
+
"loss": 11.717,
|
10982 |
+
"step": 1563
|
10983 |
+
},
|
10984 |
+
{
|
10985 |
+
"epoch": 0.05132538621204539,
|
10986 |
+
"grad_norm": 0.06702856719493866,
|
10987 |
+
"learning_rate": 6.469382717563255e-08,
|
10988 |
+
"loss": 11.7145,
|
10989 |
+
"step": 1564
|
10990 |
+
},
|
10991 |
+
{
|
10992 |
+
"epoch": 0.051358202955147716,
|
10993 |
+
"grad_norm": 0.07197005301713943,
|
10994 |
+
"learning_rate": 5.770596867772371e-08,
|
10995 |
+
"loss": 11.7404,
|
10996 |
+
"step": 1565
|
10997 |
+
},
|
10998 |
+
{
|
10999 |
+
"epoch": 0.051391019698250044,
|
11000 |
+
"grad_norm": 0.0416610911488533,
|
11001 |
+
"learning_rate": 5.1117267254607856e-08,
|
11002 |
+
"loss": 11.7356,
|
11003 |
+
"step": 1566
|
11004 |
+
},
|
11005 |
+
{
|
11006 |
+
"epoch": 0.05142383644135238,
|
11007 |
+
"grad_norm": 0.0418710820376873,
|
11008 |
+
"learning_rate": 4.492774922075782e-08,
|
11009 |
+
"loss": 11.7311,
|
11010 |
+
"step": 1567
|
11011 |
+
},
|
11012 |
+
{
|
11013 |
+
"epoch": 0.051456653184454706,
|
11014 |
+
"grad_norm": 0.08003939688205719,
|
11015 |
+
"learning_rate": 3.913743929631064e-08,
|
11016 |
+
"loss": 11.7272,
|
11017 |
+
"step": 1568
|
11018 |
+
},
|
11019 |
+
{
|
11020 |
+
"epoch": 0.05148946992755704,
|
11021 |
+
"grad_norm": 0.094752237200737,
|
11022 |
+
"learning_rate": 3.374636060706759e-08,
|
11023 |
+
"loss": 11.7169,
|
11024 |
+
"step": 1569
|
11025 |
+
},
|
11026 |
+
{
|
11027 |
+
"epoch": 0.05152228667065937,
|
11028 |
+
"grad_norm": 0.05441580340266228,
|
11029 |
+
"learning_rate": 2.8754534684316547e-08,
|
11030 |
+
"loss": 11.7367,
|
11031 |
+
"step": 1570
|
11032 |
+
},
|
11033 |
+
{
|
11034 |
+
"epoch": 0.0515551034137617,
|
11035 |
+
"grad_norm": 0.041577838361263275,
|
11036 |
+
"learning_rate": 2.416198146478754e-08,
|
11037 |
+
"loss": 11.7405,
|
11038 |
+
"step": 1571
|
11039 |
+
},
|
11040 |
+
{
|
11041 |
+
"epoch": 0.05158792015686403,
|
11042 |
+
"grad_norm": 0.05274864658713341,
|
11043 |
+
"learning_rate": 1.9968719290552883e-08,
|
11044 |
+
"loss": 11.7322,
|
11045 |
+
"step": 1572
|
11046 |
+
},
|
11047 |
+
{
|
11048 |
+
"epoch": 0.051620736899966366,
|
11049 |
+
"grad_norm": 0.0627567246556282,
|
11050 |
+
"learning_rate": 1.6174764909004937e-08,
|
11051 |
+
"loss": 11.7392,
|
11052 |
+
"step": 1573
|
11053 |
+
},
|
11054 |
+
{
|
11055 |
+
"epoch": 0.05165355364306869,
|
11056 |
+
"grad_norm": 0.06852841377258301,
|
11057 |
+
"learning_rate": 1.2780133472711787e-08,
|
11058 |
+
"loss": 11.7388,
|
11059 |
+
"step": 1574
|
11060 |
+
},
|
11061 |
+
{
|
11062 |
+
"epoch": 0.05168637038617102,
|
11063 |
+
"grad_norm": 0.05179280787706375,
|
11064 |
+
"learning_rate": 9.78483853940615e-09,
|
11065 |
+
"loss": 11.7414,
|
11066 |
+
"step": 1575
|
11067 |
+
},
|
11068 |
+
{
|
11069 |
+
"epoch": 0.051719187129273356,
|
11070 |
+
"grad_norm": 0.047133058309555054,
|
11071 |
+
"learning_rate": 7.188892071929854e-09,
|
11072 |
+
"loss": 11.7376,
|
11073 |
+
"step": 1576
|
11074 |
+
},
|
11075 |
+
{
|
11076 |
+
"epoch": 0.05175200387237568,
|
11077 |
+
"grad_norm": 0.09052584320306778,
|
11078 |
+
"learning_rate": 4.992304438156126e-09,
|
11079 |
+
"loss": 11.7383,
|
11080 |
+
"step": 1577
|
11081 |
+
},
|
11082 |
+
{
|
11083 |
+
"epoch": 0.05178482061547802,
|
11084 |
+
"grad_norm": 0.04038314148783684,
|
11085 |
+
"learning_rate": 3.1950844110006926e-09,
|
11086 |
+
"loss": 11.742,
|
11087 |
+
"step": 1578
|
11088 |
+
},
|
11089 |
+
{
|
11090 |
+
"epoch": 0.051817637358580346,
|
11091 |
+
"grad_norm": 0.08092480152845383,
|
11092 |
+
"learning_rate": 1.7972391683218626e-09,
|
11093 |
+
"loss": 11.7241,
|
11094 |
+
"step": 1579
|
11095 |
+
},
|
11096 |
+
{
|
11097 |
+
"epoch": 0.05185045410168268,
|
11098 |
+
"grad_norm": 0.0871976912021637,
|
11099 |
+
"learning_rate": 7.9877429295383e-10,
|
11100 |
+
"loss": 11.725,
|
11101 |
+
"step": 1580
|
11102 |
+
},
|
11103 |
+
{
|
11104 |
+
"epoch": 0.05188327084478501,
|
11105 |
+
"grad_norm": 0.0691569522023201,
|
11106 |
+
"learning_rate": 1.996937726289616e-10,
|
11107 |
+
"loss": 11.7303,
|
11108 |
+
"step": 1581
|
11109 |
+
},
|
11110 |
+
{
|
11111 |
+
"epoch": 0.05191608758788734,
|
11112 |
+
"grad_norm": 0.050536803901195526,
|
11113 |
+
"learning_rate": 0.0,
|
11114 |
+
"loss": 11.7425,
|
11115 |
+
"step": 1582
|
11116 |
}
|
11117 |
],
|
11118 |
"logging_steps": 1,
|
|
|
11127 |
"should_evaluate": false,
|
11128 |
"should_log": false,
|
11129 |
"should_save": true,
|
11130 |
+
"should_training_stop": true
|
11131 |
},
|
11132 |
"attributes": {}
|
11133 |
}
|
11134 |
},
|
11135 |
+
"total_flos": 87926102360064.0,
|
11136 |
"train_batch_size": 2,
|
11137 |
"trial_name": null,
|
11138 |
"trial_params": null
|