Training in progress, step 28000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 891558696
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff193c75cfe266a4454f3b865f678cd0068e31d158d56f3a08c31bd1c8ca8180
|
3 |
size 891558696
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1783272762
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:058172c34ee10347ff67ff2341a5f4bfdcee6c70e269830728adf925c422cbcb
|
3 |
size 1783272762
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0242710dd76fcfc259b7718df92653875f59a11b4f0133d9bd37c3e685163566
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:117137eaff655ce606b81582da9e0c028be111ad40ac1072d2fad13931816255
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1940,6 +1940,41 @@
|
|
1940 |
"learning_rate": 7.781777777777778e-06,
|
1941 |
"loss": 0.0593,
|
1942 |
"step": 27500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1943 |
}
|
1944 |
],
|
1945 |
"logging_steps": 100,
|
@@ -1959,7 +1994,7 @@
|
|
1959 |
"attributes": {}
|
1960 |
}
|
1961 |
},
|
1962 |
-
"total_flos": 6.
|
1963 |
"train_batch_size": 4,
|
1964 |
"trial_name": null,
|
1965 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.8666666666666667,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 28000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1940 |
"learning_rate": 7.781777777777778e-06,
|
1941 |
"loss": 0.0593,
|
1942 |
"step": 27500
|
1943 |
+
},
|
1944 |
+
{
|
1945 |
+
"epoch": 1.8399999999999999,
|
1946 |
+
"grad_norm": 0.24254228174686432,
|
1947 |
+
"learning_rate": 7.737333333333335e-06,
|
1948 |
+
"loss": 0.057,
|
1949 |
+
"step": 27600
|
1950 |
+
},
|
1951 |
+
{
|
1952 |
+
"epoch": 1.8466666666666667,
|
1953 |
+
"grad_norm": 0.0748680830001831,
|
1954 |
+
"learning_rate": 7.69288888888889e-06,
|
1955 |
+
"loss": 0.0574,
|
1956 |
+
"step": 27700
|
1957 |
+
},
|
1958 |
+
{
|
1959 |
+
"epoch": 1.8533333333333335,
|
1960 |
+
"grad_norm": 0.2756935656070709,
|
1961 |
+
"learning_rate": 7.648444444444445e-06,
|
1962 |
+
"loss": 0.0592,
|
1963 |
+
"step": 27800
|
1964 |
+
},
|
1965 |
+
{
|
1966 |
+
"epoch": 1.8599999999999999,
|
1967 |
+
"grad_norm": 0.26682248711586,
|
1968 |
+
"learning_rate": 7.604e-06,
|
1969 |
+
"loss": 0.0607,
|
1970 |
+
"step": 27900
|
1971 |
+
},
|
1972 |
+
{
|
1973 |
+
"epoch": 1.8666666666666667,
|
1974 |
+
"grad_norm": 0.2434193342924118,
|
1975 |
+
"learning_rate": 7.5595555555555565e-06,
|
1976 |
+
"loss": 0.056,
|
1977 |
+
"step": 28000
|
1978 |
}
|
1979 |
],
|
1980 |
"logging_steps": 100,
|
|
|
1994 |
"attributes": {}
|
1995 |
}
|
1996 |
},
|
1997 |
+
"total_flos": 6.820328374272e+16,
|
1998 |
"train_batch_size": 4,
|
1999 |
"trial_name": null,
|
2000 |
"trial_params": null
|