Edison commited on
Commit
0982e5b
·
1 Parent(s): 4403a77

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +15 -15
  2. eval_results.json +10 -10
  3. train_results.json +5 -5
  4. trainer_state.json +22 -174
all_results.json CHANGED
@@ -1,18 +1,18 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_accuracy": 0.9702625751344511,
4
- "eval_f1": 0.9683927370544722,
5
- "eval_loss": 0.11914637684822083,
6
- "eval_precision": 0.9769335142469471,
7
- "eval_recall": 0.96,
8
- "eval_roc_auc": 0.9697652016857315,
9
- "eval_runtime": 9.4949,
10
- "eval_samples": 6322,
11
- "eval_samples_per_second": 665.828,
12
- "eval_steps_per_second": 5.266,
13
- "train_loss": 0.11545699092274161,
14
- "train_runtime": 488.0473,
15
- "train_samples": 25287,
16
- "train_samples_per_second": 155.438,
17
- "train_steps_per_second": 1.217
18
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_accuracy": 0.9273952095808383,
4
+ "eval_f1": 0.9438332368268674,
5
+ "eval_loss": 0.23274052143096924,
6
+ "eval_precision": 0.9690844233055886,
7
+ "eval_recall": 0.9198645598194131,
8
+ "eval_roc_auc": 0.9310433910208176,
9
+ "eval_runtime": 1.9896,
10
+ "eval_samples": 1336,
11
+ "eval_samples_per_second": 671.502,
12
+ "eval_steps_per_second": 5.529,
13
+ "train_loss": 0.19368775685628256,
14
+ "train_runtime": 134.498,
15
+ "train_samples": 5342,
16
+ "train_samples_per_second": 119.154,
17
+ "train_steps_per_second": 0.937
18
  }
eval_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_accuracy": 0.9702625751344511,
4
- "eval_f1": 0.9683927370544722,
5
- "eval_loss": 0.11914637684822083,
6
- "eval_precision": 0.9769335142469471,
7
- "eval_recall": 0.96,
8
- "eval_roc_auc": 0.9697652016857315,
9
- "eval_runtime": 9.4949,
10
- "eval_samples": 6322,
11
- "eval_samples_per_second": 665.828,
12
- "eval_steps_per_second": 5.266
13
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_accuracy": 0.9273952095808383,
4
+ "eval_f1": 0.9438332368268674,
5
+ "eval_loss": 0.23274052143096924,
6
+ "eval_precision": 0.9690844233055886,
7
+ "eval_recall": 0.9198645598194131,
8
+ "eval_roc_auc": 0.9310433910208176,
9
+ "eval_runtime": 1.9896,
10
+ "eval_samples": 1336,
11
+ "eval_samples_per_second": 671.502,
12
+ "eval_steps_per_second": 5.529
13
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.0,
3
- "train_loss": 0.11545699092274161,
4
- "train_runtime": 488.0473,
5
- "train_samples": 25287,
6
- "train_samples_per_second": 155.438,
7
- "train_steps_per_second": 1.217
8
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "train_loss": 0.19368775685628256,
4
+ "train_runtime": 134.498,
5
+ "train_samples": 5342,
6
+ "train_samples_per_second": 119.154,
7
+ "train_steps_per_second": 0.937
8
  }
trainer_state.json CHANGED
@@ -2,195 +2,43 @@
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 3.0,
5
- "global_step": 594,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.33,
12
- "learning_rate": 8.905723905723906e-05,
13
- "loss": 0.3716,
14
  "step": 66
15
  },
16
  {
17
- "epoch": 0.33,
18
- "eval_accuracy": 0.9296108826320785,
19
- "eval_f1": 0.9248945147679325,
20
- "eval_loss": 0.20176392793655396,
21
- "eval_precision": 0.9367521367521368,
22
- "eval_recall": 0.9133333333333333,
23
- "eval_roc_auc": 0.9288219947822597,
24
- "eval_runtime": 9.531,
25
- "eval_samples_per_second": 663.307,
26
- "eval_steps_per_second": 5.246,
27
  "step": 66
28
  },
29
- {
30
- "epoch": 0.67,
31
- "learning_rate": 7.794612794612795e-05,
32
- "loss": 0.1745,
33
- "step": 132
34
- },
35
- {
36
- "epoch": 0.67,
37
- "eval_accuracy": 0.9533375514077823,
38
- "eval_f1": 0.9497530233350365,
39
- "eval_loss": 0.14681555330753326,
40
- "eval_precision": 0.9710902124695228,
41
- "eval_recall": 0.9293333333333333,
42
- "eval_roc_auc": 0.9521741922536624,
43
- "eval_runtime": 9.5186,
44
- "eval_samples_per_second": 664.175,
45
- "eval_steps_per_second": 5.253,
46
- "step": 132
47
- },
48
- {
49
- "epoch": 1.0,
50
- "learning_rate": 6.683501683501684e-05,
51
- "loss": 0.1346,
52
- "step": 198
53
- },
54
- {
55
- "epoch": 1.0,
56
- "eval_accuracy": 0.9656754191711484,
57
- "eval_f1": 0.9633631605605266,
58
- "eval_loss": 0.10907050222158432,
59
- "eval_precision": 0.976052001368457,
60
- "eval_recall": 0.951,
61
- "eval_roc_auc": 0.9649641782059001,
62
- "eval_runtime": 9.5208,
63
- "eval_samples_per_second": 664.018,
64
- "eval_steps_per_second": 5.252,
65
- "step": 198
66
- },
67
- {
68
- "epoch": 1.33,
69
- "learning_rate": 5.572390572390572e-05,
70
- "loss": 0.0917,
71
- "step": 264
72
- },
73
- {
74
- "epoch": 1.33,
75
- "eval_accuracy": 0.9647263524201202,
76
- "eval_f1": 0.9621969825394135,
77
- "eval_loss": 0.12939223647117615,
78
- "eval_precision": 0.9789582614694722,
79
- "eval_recall": 0.946,
80
- "eval_roc_auc": 0.9638187838651415,
81
- "eval_runtime": 9.5288,
82
- "eval_samples_per_second": 663.461,
83
- "eval_steps_per_second": 5.247,
84
- "step": 264
85
- },
86
- {
87
- "epoch": 1.67,
88
- "learning_rate": 4.4612794612794616e-05,
89
- "loss": 0.0877,
90
- "step": 330
91
- },
92
- {
93
- "epoch": 1.67,
94
- "eval_accuracy": 0.9667826637140146,
95
- "eval_f1": 0.9651162790697674,
96
- "eval_loss": 0.10901537537574768,
97
- "eval_precision": 0.9619205298013245,
98
- "eval_recall": 0.9683333333333334,
99
- "eval_roc_auc": 0.9668578165763597,
100
- "eval_runtime": 9.5197,
101
- "eval_samples_per_second": 664.094,
102
- "eval_steps_per_second": 5.252,
103
- "step": 330
104
- },
105
- {
106
- "epoch": 2.0,
107
- "learning_rate": 3.35016835016835e-05,
108
- "loss": 0.0731,
109
- "step": 396
110
- },
111
- {
112
- "epoch": 2.0,
113
- "eval_accuracy": 0.9688389750079089,
114
- "eval_f1": 0.9669074416260709,
115
- "eval_loss": 0.10423740744590759,
116
- "eval_precision": 0.9746020995597697,
117
- "eval_recall": 0.9593333333333334,
118
- "eval_roc_auc": 0.9683782861729882,
119
- "eval_runtime": 9.5669,
120
- "eval_samples_per_second": 660.818,
121
- "eval_steps_per_second": 5.226,
122
- "step": 396
123
- },
124
- {
125
- "epoch": 2.33,
126
- "learning_rate": 2.2390572390572393e-05,
127
- "loss": 0.0342,
128
- "step": 462
129
- },
130
- {
131
- "epoch": 2.33,
132
- "eval_accuracy": 0.969155330591585,
133
- "eval_f1": 0.9674620390455532,
134
- "eval_loss": 0.12906306982040405,
135
- "eval_precision": 0.9685933845639826,
136
- "eval_recall": 0.9663333333333334,
137
- "eval_roc_auc": 0.9690185631145897,
138
- "eval_runtime": 9.5007,
139
- "eval_samples_per_second": 665.425,
140
- "eval_steps_per_second": 5.263,
141
- "step": 462
142
- },
143
- {
144
- "epoch": 2.67,
145
- "learning_rate": 1.127946127946128e-05,
146
- "loss": 0.0375,
147
- "step": 528
148
- },
149
- {
150
- "epoch": 2.67,
151
- "eval_accuracy": 0.9705789307181272,
152
- "eval_f1": 0.9687919463087248,
153
- "eval_loss": 0.12022514641284943,
154
- "eval_precision": 0.9753378378378378,
155
- "eval_recall": 0.9623333333333334,
156
- "eval_roc_auc": 0.9701793096528197,
157
- "eval_runtime": 9.5237,
158
- "eval_samples_per_second": 663.816,
159
- "eval_steps_per_second": 5.25,
160
- "step": 528
161
- },
162
- {
163
- "epoch": 3.0,
164
- "learning_rate": 1.6835016835016834e-07,
165
- "loss": 0.0342,
166
- "step": 594
167
- },
168
- {
169
- "epoch": 3.0,
170
- "eval_accuracy": 0.9702625751344511,
171
- "eval_f1": 0.9683927370544722,
172
- "eval_loss": 0.11914637684822083,
173
- "eval_precision": 0.9769335142469471,
174
- "eval_recall": 0.96,
175
- "eval_roc_auc": 0.9697652016857315,
176
- "eval_runtime": 9.5437,
177
- "eval_samples_per_second": 662.428,
178
- "eval_steps_per_second": 5.239,
179
- "step": 594
180
- },
181
  {
182
  "epoch": 3.0,
183
- "step": 594,
184
- "total_flos": 9979933885332480.0,
185
- "train_loss": 0.11545699092274161,
186
- "train_runtime": 488.0473,
187
- "train_samples_per_second": 155.438,
188
- "train_steps_per_second": 1.217
189
  }
190
  ],
191
- "max_steps": 594,
192
  "num_train_epochs": 3,
193
- "total_flos": 9979933885332480.0,
194
  "trial_name": null,
195
  "trial_params": null
196
  }
 
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 3.0,
5
+ "global_step": 126,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 1.57,
12
+ "learning_rate": 4.9206349206349204e-05,
13
+ "loss": 0.2872,
14
  "step": 66
15
  },
16
  {
17
+ "epoch": 1.57,
18
+ "eval_accuracy": 0.9176646706586826,
19
+ "eval_f1": 0.9374999999999999,
20
+ "eval_loss": 0.20166487991809845,
21
+ "eval_precision": 0.9439359267734554,
22
+ "eval_recall": 0.9311512415349887,
23
+ "eval_roc_auc": 0.9111311763230499,
24
+ "eval_runtime": 1.9771,
25
+ "eval_samples_per_second": 675.722,
26
+ "eval_steps_per_second": 5.564,
27
  "step": 66
28
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  {
30
  "epoch": 3.0,
31
+ "step": 126,
32
+ "total_flos": 2108308886599680.0,
33
+ "train_loss": 0.19368775685628256,
34
+ "train_runtime": 134.498,
35
+ "train_samples_per_second": 119.154,
36
+ "train_steps_per_second": 0.937
37
  }
38
  ],
39
+ "max_steps": 126,
40
  "num_train_epochs": 3,
41
+ "total_flos": 2108308886599680.0,
42
  "trial_name": null,
43
  "trial_params": null
44
  }