nkuAlexLee commited on
Commit
334667b
·
1 Parent(s): e51adf6

Upload 13 files

Browse files
Files changed (7) hide show
  1. config.json +1 -1
  2. optimizer.pt +1 -1
  3. pytorch_model.bin +1 -1
  4. rng_state.pth +2 -2
  5. scheduler.pt +1 -1
  6. trainer_state.json +39 -81
  7. training_args.bin +1 -1
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "E:/Model/checkpoint-572",
3
  "activation_dropout": 0.0,
4
  "activation_function": "swish",
5
  "add_bias_logits": false,
 
1
  {
2
+ "_name_or_path": "DDDSSS/translation_en-zh",
3
  "activation_dropout": 0.0,
4
  "activation_function": "swish",
5
  "add_bias_logits": false,
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e269e5bcca7af7e360163e76801ed38d0af985742049e0fa42e225d7b86db1dc
3
  size 619500549
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f25bf36a56ac69410a3763cc4eace80e0aa12b75a85c46f41dc6b3f104a6538e
3
  size 619500549
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c87f1b34f17456bb9a3968decd7ac635005c664eda45edc4b50fddb3f3cd3ba
3
  size 310022533
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bbc0a089851cae6fbc14a78afae6fb0fa036c80699bf4fc9edd49b24c87843f
3
  size 310022533
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a671bcc93ab8ea7fef76bafb809be586f39447cc2edabe5e7f5de7b2999a61c3
3
- size 14639
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4369e459495ac86f5932270534e063e608774e09de46bc38426a0e61c4a3153b
3
+ size 14575
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b5c00d109dd7a47976d4ff594486cfa209d826da02a3d7b7bd52c9f452bfddd
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71ce1e82580d06836277ac609a46ef220afb039c95e4479b9265ae1045ab90e0
3
  size 627
trainer_state.json CHANGED
@@ -2,107 +2,65 @@
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 4.0,
5
- "global_step": 4172,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
- {
11
- "epoch": 0.48,
12
- "learning_rate": 0.0001761744966442953,
13
- "loss": 2.9499,
14
- "step": 500
15
- },
16
- {
17
- "epoch": 0.96,
18
- "learning_rate": 0.00015220517737296263,
19
- "loss": 2.4762,
20
- "step": 1000
21
- },
22
  {
23
  "epoch": 1.0,
24
- "eval_bleu": 24.199,
25
- "eval_gen_len": 25.8869,
26
- "eval_jit_compilation_time": 37.6313,
27
- "eval_loss": 2.099637746810913,
28
- "eval_runtime": 305.9149,
29
- "eval_samples_per_second": 3.409,
30
- "eval_steps_per_second": 0.853,
31
- "step": 1043
32
- },
33
- {
34
- "epoch": 1.44,
35
- "learning_rate": 0.00012823585810162994,
36
- "loss": 1.5053,
37
- "step": 1500
38
- },
39
- {
40
- "epoch": 1.92,
41
- "learning_rate": 0.00010426653883029723,
42
- "loss": 1.4464,
43
- "step": 2000
44
  },
45
  {
46
  "epoch": 2.0,
47
- "eval_bleu": 32.9405,
48
- "eval_gen_len": 25.3404,
49
- "eval_jit_compilation_time": 15.0422,
50
- "eval_loss": 1.9842771291732788,
51
- "eval_runtime": 203.0089,
52
- "eval_samples_per_second": 5.138,
53
- "eval_steps_per_second": 1.286,
54
- "step": 2086
55
- },
56
- {
57
- "epoch": 2.4,
58
- "learning_rate": 8.029721955896454e-05,
59
- "loss": 0.8818,
60
- "step": 2500
61
- },
62
- {
63
- "epoch": 2.88,
64
- "learning_rate": 5.632790028763183e-05,
65
- "loss": 0.8012,
66
- "step": 3000
67
  },
68
  {
69
  "epoch": 3.0,
70
- "eval_bleu": 34.2544,
71
- "eval_gen_len": 25.6347,
72
- "eval_jit_compilation_time": 15.4553,
73
- "eval_loss": 1.9202322959899902,
74
- "eval_runtime": 211.4889,
75
- "eval_samples_per_second": 4.932,
76
- "eval_steps_per_second": 1.234,
77
- "step": 3129
78
- },
79
- {
80
- "epoch": 3.36,
81
- "learning_rate": 3.235858101629914e-05,
82
- "loss": 0.4984,
83
- "step": 3500
84
  },
85
  {
86
- "epoch": 3.84,
87
- "learning_rate": 8.389261744966444e-06,
88
- "loss": 0.3757,
89
- "step": 4000
90
  },
91
  {
92
  "epoch": 4.0,
93
- "eval_bleu": 37.1127,
94
- "eval_gen_len": 25.7699,
95
- "eval_jit_compilation_time": 16.4019,
96
- "eval_loss": 1.9090477228164673,
97
- "eval_runtime": 195.2776,
98
- "eval_samples_per_second": 5.341,
99
- "eval_steps_per_second": 1.337,
100
- "step": 4172
101
  }
102
  ],
103
- "max_steps": 4172,
104
  "num_train_epochs": 4,
105
- "total_flos": 276925806673920.0,
106
  "trial_name": null,
107
  "trial_params": null
108
  }
 
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 4.0,
5
+ "global_step": 572,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
10
  {
11
  "epoch": 1.0,
12
+ "eval_bleu": 7.7545,
13
+ "eval_gen_len": 14.8741,
14
+ "eval_jit_compilation_time": 11.4032,
15
+ "eval_loss": 2.9266164302825928,
16
+ "eval_runtime": 12.5758,
17
+ "eval_samples_per_second": 11.371,
18
+ "eval_steps_per_second": 2.863,
19
+ "step": 143
 
 
 
 
 
 
 
 
 
 
 
 
20
  },
21
  {
22
  "epoch": 2.0,
23
+ "eval_bleu": 10.4293,
24
+ "eval_gen_len": 14.0629,
25
+ "eval_jit_compilation_time": 12.1037,
26
+ "eval_loss": 2.850106716156006,
27
+ "eval_runtime": 11.8772,
28
+ "eval_samples_per_second": 12.04,
29
+ "eval_steps_per_second": 3.031,
30
+ "step": 286
 
 
 
 
 
 
 
 
 
 
 
 
31
  },
32
  {
33
  "epoch": 3.0,
34
+ "eval_bleu": 11.0503,
35
+ "eval_gen_len": 14.5664,
36
+ "eval_jit_compilation_time": 12.7826,
37
+ "eval_loss": 2.875509262084961,
38
+ "eval_runtime": 12.4472,
39
+ "eval_samples_per_second": 11.489,
40
+ "eval_steps_per_second": 2.892,
41
+ "step": 429
 
 
 
 
 
 
42
  },
43
  {
44
+ "epoch": 3.5,
45
+ "learning_rate": 2.5874125874125877e-05,
46
+ "loss": 2.0491,
47
+ "step": 500
48
  },
49
  {
50
  "epoch": 4.0,
51
+ "eval_bleu": 12.4752,
52
+ "eval_gen_len": 14.6014,
53
+ "eval_jit_compilation_time": 12.9294,
54
+ "eval_loss": 2.9099602699279785,
55
+ "eval_runtime": 12.6411,
56
+ "eval_samples_per_second": 11.312,
57
+ "eval_steps_per_second": 2.848,
58
+ "step": 572
59
  }
60
  ],
61
+ "max_steps": 572,
62
  "num_train_epochs": 4,
63
+ "total_flos": 11456058359808.0,
64
  "trial_name": null,
65
  "trial_params": null
66
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c80a47adcb5aa1accd81cc203b8008430788786a4190cd9d2c1716a1a717d2e4
3
  size 4027
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:093ba43f3bf932ee2c0a0909229582d9d4b5b60aa73b0d1fe5caf0a48ad77ddb
3
  size 4027