Model save
Browse files- README.md +1 -1
- all_results.json +2 -2
- train_results.json +2 -2
- trainer_state.json +20 -20
- training_args.bin +1 -1
README.md
CHANGED
@@ -27,7 +27,7 @@ print(output["generated_text"])
|
|
27 |
|
28 |
## Training procedure
|
29 |
|
30 |
-
[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/yuerlong-western-digital/huggingface/runs/
|
31 |
|
32 |
|
33 |
This model was trained with SFT.
|
|
|
27 |
|
28 |
## Training procedure
|
29 |
|
30 |
+
[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/yuerlong-western-digital/huggingface/runs/hh5gta2p)
|
31 |
|
32 |
|
33 |
This model was trained with SFT.
|
all_results.json
CHANGED
@@ -2,8 +2,8 @@
|
|
2 |
"epoch": 0.999259807549963,
|
3 |
"total_flos": 76888336760832.0,
|
4 |
"train_loss": 0.7676667234632704,
|
5 |
-
"train_runtime":
|
6 |
"train_samples": 16610,
|
7 |
-
"train_samples_per_second": 0.
|
8 |
"train_steps_per_second": 0.026
|
9 |
}
|
|
|
2 |
"epoch": 0.999259807549963,
|
3 |
"total_flos": 76888336760832.0,
|
4 |
"train_loss": 0.7676667234632704,
|
5 |
+
"train_runtime": 26097.2515,
|
6 |
"train_samples": 16610,
|
7 |
+
"train_samples_per_second": 0.828,
|
8 |
"train_steps_per_second": 0.026
|
9 |
}
|
train_results.json
CHANGED
@@ -2,8 +2,8 @@
|
|
2 |
"epoch": 0.999259807549963,
|
3 |
"total_flos": 76888336760832.0,
|
4 |
"train_loss": 0.7676667234632704,
|
5 |
-
"train_runtime":
|
6 |
"train_samples": 16610,
|
7 |
-
"train_samples_per_second": 0.
|
8 |
"train_steps_per_second": 0.026
|
9 |
}
|
|
|
2 |
"epoch": 0.999259807549963,
|
3 |
"total_flos": 76888336760832.0,
|
4 |
"train_loss": 0.7676667234632704,
|
5 |
+
"train_runtime": 26097.2515,
|
6 |
"train_samples": 16610,
|
7 |
+
"train_samples_per_second": 0.828,
|
8 |
"train_steps_per_second": 0.026
|
9 |
}
|
trainer_state.json
CHANGED
@@ -151,9 +151,9 @@
|
|
151 |
{
|
152 |
"epoch": 0.14803849000740193,
|
153 |
"eval_loss": 0.8196535110473633,
|
154 |
-
"eval_runtime":
|
155 |
-
"eval_samples_per_second": 3.
|
156 |
-
"eval_steps_per_second": 0.
|
157 |
"step": 100
|
158 |
},
|
159 |
{
|
@@ -299,9 +299,9 @@
|
|
299 |
{
|
300 |
"epoch": 0.29607698001480387,
|
301 |
"eval_loss": 0.7897325754165649,
|
302 |
-
"eval_runtime":
|
303 |
-
"eval_samples_per_second":
|
304 |
-
"eval_steps_per_second": 0.
|
305 |
"step": 200
|
306 |
},
|
307 |
{
|
@@ -447,9 +447,9 @@
|
|
447 |
{
|
448 |
"epoch": 0.44411547002220575,
|
449 |
"eval_loss": 0.7756889462471008,
|
450 |
-
"eval_runtime":
|
451 |
-
"eval_samples_per_second": 3.
|
452 |
-
"eval_steps_per_second": 0.
|
453 |
"step": 300
|
454 |
},
|
455 |
{
|
@@ -595,9 +595,9 @@
|
|
595 |
{
|
596 |
"epoch": 0.5921539600296077,
|
597 |
"eval_loss": 0.7637075781822205,
|
598 |
-
"eval_runtime": 41.
|
599 |
-
"eval_samples_per_second": 3.
|
600 |
-
"eval_steps_per_second": 0.
|
601 |
"step": 400
|
602 |
},
|
603 |
{
|
@@ -743,9 +743,9 @@
|
|
743 |
{
|
744 |
"epoch": 0.7401924500370096,
|
745 |
"eval_loss": 0.7563657164573669,
|
746 |
-
"eval_runtime":
|
747 |
-
"eval_samples_per_second": 3.
|
748 |
-
"eval_steps_per_second": 0.
|
749 |
"step": 500
|
750 |
},
|
751 |
{
|
@@ -891,9 +891,9 @@
|
|
891 |
{
|
892 |
"epoch": 0.8882309400444115,
|
893 |
"eval_loss": 0.7532988786697388,
|
894 |
-
"eval_runtime":
|
895 |
-
"eval_samples_per_second": 3.
|
896 |
-
"eval_steps_per_second": 0.
|
897 |
"step": 600
|
898 |
},
|
899 |
{
|
@@ -1006,8 +1006,8 @@
|
|
1006 |
"step": 675,
|
1007 |
"total_flos": 76888336760832.0,
|
1008 |
"train_loss": 0.7676667234632704,
|
1009 |
-
"train_runtime":
|
1010 |
-
"train_samples_per_second": 0.
|
1011 |
"train_steps_per_second": 0.026
|
1012 |
}
|
1013 |
],
|
|
|
151 |
{
|
152 |
"epoch": 0.14803849000740193,
|
153 |
"eval_loss": 0.8196535110473633,
|
154 |
+
"eval_runtime": 40.7418,
|
155 |
+
"eval_samples_per_second": 3.142,
|
156 |
+
"eval_steps_per_second": 0.393,
|
157 |
"step": 100
|
158 |
},
|
159 |
{
|
|
|
299 |
{
|
300 |
"epoch": 0.29607698001480387,
|
301 |
"eval_loss": 0.7897325754165649,
|
302 |
+
"eval_runtime": 41.2408,
|
303 |
+
"eval_samples_per_second": 3.104,
|
304 |
+
"eval_steps_per_second": 0.388,
|
305 |
"step": 200
|
306 |
},
|
307 |
{
|
|
|
447 |
{
|
448 |
"epoch": 0.44411547002220575,
|
449 |
"eval_loss": 0.7756889462471008,
|
450 |
+
"eval_runtime": 41.747,
|
451 |
+
"eval_samples_per_second": 3.066,
|
452 |
+
"eval_steps_per_second": 0.383,
|
453 |
"step": 300
|
454 |
},
|
455 |
{
|
|
|
595 |
{
|
596 |
"epoch": 0.5921539600296077,
|
597 |
"eval_loss": 0.7637075781822205,
|
598 |
+
"eval_runtime": 41.5052,
|
599 |
+
"eval_samples_per_second": 3.084,
|
600 |
+
"eval_steps_per_second": 0.385,
|
601 |
"step": 400
|
602 |
},
|
603 |
{
|
|
|
743 |
{
|
744 |
"epoch": 0.7401924500370096,
|
745 |
"eval_loss": 0.7563657164573669,
|
746 |
+
"eval_runtime": 42.1127,
|
747 |
+
"eval_samples_per_second": 3.039,
|
748 |
+
"eval_steps_per_second": 0.38,
|
749 |
"step": 500
|
750 |
},
|
751 |
{
|
|
|
891 |
{
|
892 |
"epoch": 0.8882309400444115,
|
893 |
"eval_loss": 0.7532988786697388,
|
894 |
+
"eval_runtime": 40.9176,
|
895 |
+
"eval_samples_per_second": 3.128,
|
896 |
+
"eval_steps_per_second": 0.391,
|
897 |
"step": 600
|
898 |
},
|
899 |
{
|
|
|
1006 |
"step": 675,
|
1007 |
"total_flos": 76888336760832.0,
|
1008 |
"train_loss": 0.7676667234632704,
|
1009 |
+
"train_runtime": 26097.2515,
|
1010 |
+
"train_samples_per_second": 0.828,
|
1011 |
"train_steps_per_second": 0.026
|
1012 |
}
|
1013 |
],
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 7352
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba8f0a151e7b6f259d7ca3385ef4acb6544739b6cfe0638270e86115f16b13b4
|
3 |
size 7352
|