yuerlong commited on
Commit
b48c414
·
verified ·
1 Parent(s): 7c30c4b

Model save

Browse files
Files changed (5) hide show
  1. README.md +1 -1
  2. all_results.json +2 -2
  3. train_results.json +2 -2
  4. trainer_state.json +20 -20
  5. training_args.bin +1 -1
README.md CHANGED
@@ -27,7 +27,7 @@ print(output["generated_text"])
27
 
28
  ## Training procedure
29
 
30
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/yuerlong-western-digital/huggingface/runs/31zqiu2r)
31
 
32
 
33
  This model was trained with SFT.
 
27
 
28
  ## Training procedure
29
 
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/yuerlong-western-digital/huggingface/runs/hh5gta2p)
31
 
32
 
33
  This model was trained with SFT.
all_results.json CHANGED
@@ -2,8 +2,8 @@
2
  "epoch": 0.999259807549963,
3
  "total_flos": 76888336760832.0,
4
  "train_loss": 0.7676667234632704,
5
- "train_runtime": 26434.6474,
6
  "train_samples": 16610,
7
- "train_samples_per_second": 0.817,
8
  "train_steps_per_second": 0.026
9
  }
 
2
  "epoch": 0.999259807549963,
3
  "total_flos": 76888336760832.0,
4
  "train_loss": 0.7676667234632704,
5
+ "train_runtime": 26097.2515,
6
  "train_samples": 16610,
7
+ "train_samples_per_second": 0.828,
8
  "train_steps_per_second": 0.026
9
  }
train_results.json CHANGED
@@ -2,8 +2,8 @@
2
  "epoch": 0.999259807549963,
3
  "total_flos": 76888336760832.0,
4
  "train_loss": 0.7676667234632704,
5
- "train_runtime": 26434.6474,
6
  "train_samples": 16610,
7
- "train_samples_per_second": 0.817,
8
  "train_steps_per_second": 0.026
9
  }
 
2
  "epoch": 0.999259807549963,
3
  "total_flos": 76888336760832.0,
4
  "train_loss": 0.7676667234632704,
5
+ "train_runtime": 26097.2515,
6
  "train_samples": 16610,
7
+ "train_samples_per_second": 0.828,
8
  "train_steps_per_second": 0.026
9
  }
trainer_state.json CHANGED
@@ -151,9 +151,9 @@
151
  {
152
  "epoch": 0.14803849000740193,
153
  "eval_loss": 0.8196535110473633,
154
- "eval_runtime": 41.9255,
155
- "eval_samples_per_second": 3.053,
156
- "eval_steps_per_second": 0.382,
157
  "step": 100
158
  },
159
  {
@@ -299,9 +299,9 @@
299
  {
300
  "epoch": 0.29607698001480387,
301
  "eval_loss": 0.7897325754165649,
302
- "eval_runtime": 42.8407,
303
- "eval_samples_per_second": 2.988,
304
- "eval_steps_per_second": 0.373,
305
  "step": 200
306
  },
307
  {
@@ -447,9 +447,9 @@
447
  {
448
  "epoch": 0.44411547002220575,
449
  "eval_loss": 0.7756889462471008,
450
- "eval_runtime": 42.6329,
451
- "eval_samples_per_second": 3.002,
452
- "eval_steps_per_second": 0.375,
453
  "step": 300
454
  },
455
  {
@@ -595,9 +595,9 @@
595
  {
596
  "epoch": 0.5921539600296077,
597
  "eval_loss": 0.7637075781822205,
598
- "eval_runtime": 41.453,
599
- "eval_samples_per_second": 3.088,
600
- "eval_steps_per_second": 0.386,
601
  "step": 400
602
  },
603
  {
@@ -743,9 +743,9 @@
743
  {
744
  "epoch": 0.7401924500370096,
745
  "eval_loss": 0.7563657164573669,
746
- "eval_runtime": 41.8297,
747
- "eval_samples_per_second": 3.06,
748
- "eval_steps_per_second": 0.383,
749
  "step": 500
750
  },
751
  {
@@ -891,9 +891,9 @@
891
  {
892
  "epoch": 0.8882309400444115,
893
  "eval_loss": 0.7532988786697388,
894
- "eval_runtime": 41.2008,
895
- "eval_samples_per_second": 3.107,
896
- "eval_steps_per_second": 0.388,
897
  "step": 600
898
  },
899
  {
@@ -1006,8 +1006,8 @@
1006
  "step": 675,
1007
  "total_flos": 76888336760832.0,
1008
  "train_loss": 0.7676667234632704,
1009
- "train_runtime": 26434.6474,
1010
- "train_samples_per_second": 0.817,
1011
  "train_steps_per_second": 0.026
1012
  }
1013
  ],
 
151
  {
152
  "epoch": 0.14803849000740193,
153
  "eval_loss": 0.8196535110473633,
154
+ "eval_runtime": 40.7418,
155
+ "eval_samples_per_second": 3.142,
156
+ "eval_steps_per_second": 0.393,
157
  "step": 100
158
  },
159
  {
 
299
  {
300
  "epoch": 0.29607698001480387,
301
  "eval_loss": 0.7897325754165649,
302
+ "eval_runtime": 41.2408,
303
+ "eval_samples_per_second": 3.104,
304
+ "eval_steps_per_second": 0.388,
305
  "step": 200
306
  },
307
  {
 
447
  {
448
  "epoch": 0.44411547002220575,
449
  "eval_loss": 0.7756889462471008,
450
+ "eval_runtime": 41.747,
451
+ "eval_samples_per_second": 3.066,
452
+ "eval_steps_per_second": 0.383,
453
  "step": 300
454
  },
455
  {
 
595
  {
596
  "epoch": 0.5921539600296077,
597
  "eval_loss": 0.7637075781822205,
598
+ "eval_runtime": 41.5052,
599
+ "eval_samples_per_second": 3.084,
600
+ "eval_steps_per_second": 0.385,
601
  "step": 400
602
  },
603
  {
 
743
  {
744
  "epoch": 0.7401924500370096,
745
  "eval_loss": 0.7563657164573669,
746
+ "eval_runtime": 42.1127,
747
+ "eval_samples_per_second": 3.039,
748
+ "eval_steps_per_second": 0.38,
749
  "step": 500
750
  },
751
  {
 
891
  {
892
  "epoch": 0.8882309400444115,
893
  "eval_loss": 0.7532988786697388,
894
+ "eval_runtime": 40.9176,
895
+ "eval_samples_per_second": 3.128,
896
+ "eval_steps_per_second": 0.391,
897
  "step": 600
898
  },
899
  {
 
1006
  "step": 675,
1007
  "total_flos": 76888336760832.0,
1008
  "train_loss": 0.7676667234632704,
1009
+ "train_runtime": 26097.2515,
1010
+ "train_samples_per_second": 0.828,
1011
  "train_steps_per_second": 0.026
1012
  }
1013
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c53af33dfbb02c81c7b0dd682da80d9aaa24f63d866344d12ed5a3982db7b8a6
3
  size 7352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba8f0a151e7b6f259d7ca3385ef4acb6544739b6cfe0638270e86115f16b13b4
3
  size 7352