Ahatsham commited on
Commit
333f7cc
·
verified ·
1 Parent(s): 364ab1e

Model save

Browse files
README.md ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: meta-llama/Meta-Llama-3-8B
3
+ library_name: peft
4
+ license: llama3
5
+ metrics:
6
+ - accuracy
7
+ tags:
8
+ - generated_from_trainer
9
+ model-index:
10
+ - name: Output_llama2_70-15-15
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # Output_llama2_70-15-15
18
+
19
+ This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B) on the None dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 0.6250
22
+ - Balanced Accuracy: 0.6326
23
+ - Accuracy: 0.6282
24
+
25
+ ## Model description
26
+
27
+ More information needed
28
+
29
+ ## Intended uses & limitations
30
+
31
+ More information needed
32
+
33
+ ## Training and evaluation data
34
+
35
+ More information needed
36
+
37
+ ## Training procedure
38
+
39
+ ### Training hyperparameters
40
+
41
+ The following hyperparameters were used during training:
42
+ - learning_rate: 1e-05
43
+ - train_batch_size: 16
44
+ - eval_batch_size: 16
45
+ - seed: 42
46
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
+ - lr_scheduler_type: linear
48
+ - num_epochs: 20
49
+
50
+ ### Training results
51
+
52
+ | Training Loss | Epoch | Step | Validation Loss | Balanced Accuracy | Accuracy |
53
+ |:-------------:|:-----:|:----:|:---------------:|:-----------------:|:--------:|
54
+ | No log | 1.0 | 46 | 0.7111 | 0.5764 | 0.5641 |
55
+ | No log | 2.0 | 92 | 0.7043 | 0.5656 | 0.5577 |
56
+ | No log | 3.0 | 138 | 0.6619 | 0.5142 | 0.5192 |
57
+ | No log | 4.0 | 184 | 0.7013 | 0.5595 | 0.5513 |
58
+ | No log | 5.0 | 230 | 0.6493 | 0.5620 | 0.5577 |
59
+ | No log | 6.0 | 276 | 0.6496 | 0.5671 | 0.5641 |
60
+ | No log | 7.0 | 322 | 0.6466 | 0.5798 | 0.5769 |
61
+ | No log | 8.0 | 368 | 0.6748 | 0.5527 | 0.5513 |
62
+ | No log | 9.0 | 414 | 0.6551 | 0.5692 | 0.5705 |
63
+ | No log | 10.0 | 460 | 0.6205 | 0.6063 | 0.5833 |
64
+ | 0.6541 | 11.0 | 506 | 0.6537 | 0.6020 | 0.6026 |
65
+ | 0.6541 | 12.0 | 552 | 0.6379 | 0.6167 | 0.6154 |
66
+ | 0.6541 | 13.0 | 598 | 0.6243 | 0.6107 | 0.6026 |
67
+ | 0.6541 | 14.0 | 644 | 0.6248 | 0.6074 | 0.6026 |
68
+ | 0.6541 | 15.0 | 690 | 0.6172 | 0.6370 | 0.6218 |
69
+ | 0.6541 | 16.0 | 736 | 0.6237 | 0.6202 | 0.6154 |
70
+ | 0.6541 | 17.0 | 782 | 0.6308 | 0.6230 | 0.6218 |
71
+ | 0.6541 | 18.0 | 828 | 0.6179 | 0.6319 | 0.6218 |
72
+ | 0.6541 | 19.0 | 874 | 0.6252 | 0.6326 | 0.6282 |
73
+ | 0.6541 | 20.0 | 920 | 0.6250 | 0.6326 | 0.6282 |
74
+
75
+
76
+ ### Framework versions
77
+
78
+ - PEFT 0.10.0
79
+ - Transformers 4.41.2
80
+ - Pytorch 2.3.1+cu121
81
+ - Datasets 3.2.0
82
+ - Tokenizers 0.19.1
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae0d06c0f3b3a658f6f5f6b97c0c94a7d715e8b0954890d1fff0342ba1faf3fc
3
  size 16818656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5eb33dc0f3ed1883b5b290afe98054c3bb46d9c133c067783affba82d91a06f0
3
  size 16818656
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 20.0,
3
- "total_flos": 4.976764349571072e+17,
4
- "train_loss": 0.4557167053222656,
5
- "train_runtime": 45521.9577,
6
  "train_samples": 730,
7
- "train_samples_per_second": 0.321,
8
- "train_steps_per_second": 0.02
9
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "total_flos": 4.970048339828736e+17,
4
+ "train_loss": 0.6203255363132643,
5
+ "train_runtime": 33373.6734,
6
  "train_samples": 730,
7
+ "train_samples_per_second": 0.437,
8
+ "train_steps_per_second": 0.028
9
  }
tokenizer.json CHANGED
@@ -6,7 +6,14 @@
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
- "padding": null,
 
 
 
 
 
 
 
10
  "added_tokens": [
11
  {
12
  "id": 128000,
 
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
9
+ "padding": {
10
+ "strategy": "BatchLongest",
11
+ "direction": "Right",
12
+ "pad_to_multiple_of": null,
13
+ "pad_id": 128001,
14
+ "pad_type_id": 0,
15
+ "pad_token": "<|end_of_text|>"
16
+ },
17
  "added_tokens": [
18
  {
19
  "id": 128000,
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 20.0,
3
- "total_flos": 4.976764349571072e+17,
4
- "train_loss": 0.4557167053222656,
5
- "train_runtime": 45521.9577,
6
  "train_samples": 730,
7
- "train_samples_per_second": 0.321,
8
- "train_steps_per_second": 0.02
9
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "total_flos": 4.970048339828736e+17,
4
+ "train_loss": 0.6203255363132643,
5
+ "train_runtime": 33373.6734,
6
  "train_samples": 730,
7
+ "train_samples_per_second": 0.437,
8
+ "train_steps_per_second": 0.028
9
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.7435897435897436,
3
- "best_model_checkpoint": "Output_llama2_70-15-15/checkpoint-460",
4
  "epoch": 20.0,
5
  "eval_steps": 500,
6
  "global_step": 920,
@@ -10,219 +10,219 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.6538461538461539,
14
- "eval_balanced_accuracy": 0.654945054945055,
15
- "eval_loss": 0.6548082232475281,
16
- "eval_runtime": 142.4927,
17
- "eval_samples_per_second": 1.095,
18
- "eval_steps_per_second": 0.07,
19
  "step": 46
20
  },
21
  {
22
  "epoch": 2.0,
23
- "eval_accuracy": 0.6153846153846154,
24
- "eval_balanced_accuracy": 0.6180613090306546,
25
- "eval_loss": 0.659588634967804,
26
- "eval_runtime": 174.4587,
27
- "eval_samples_per_second": 0.894,
28
- "eval_steps_per_second": 0.057,
29
  "step": 92
30
  },
31
  {
32
  "epoch": 3.0,
33
- "eval_accuracy": 0.5833333333333334,
34
- "eval_balanced_accuracy": 0.7773972602739726,
35
- "eval_loss": 0.5924286842346191,
36
- "eval_runtime": 145.3834,
37
- "eval_samples_per_second": 1.073,
38
- "eval_steps_per_second": 0.069,
39
  "step": 138
40
  },
41
  {
42
  "epoch": 4.0,
43
- "eval_accuracy": 0.6474358974358975,
44
- "eval_balanced_accuracy": 0.6468253968253967,
45
- "eval_loss": 0.621242105960846,
46
- "eval_runtime": 157.0532,
47
- "eval_samples_per_second": 0.993,
48
- "eval_steps_per_second": 0.064,
49
  "step": 184
50
  },
51
  {
52
  "epoch": 5.0,
53
- "eval_accuracy": 0.6666666666666666,
54
- "eval_balanced_accuracy": 0.6741095162147794,
55
- "eval_loss": 0.5860592722892761,
56
- "eval_runtime": 152.4923,
57
- "eval_samples_per_second": 1.023,
58
- "eval_steps_per_second": 0.066,
59
  "step": 230
60
  },
61
  {
62
  "epoch": 6.0,
63
- "eval_accuracy": 0.6538461538461539,
64
- "eval_balanced_accuracy": 0.6597350327016602,
65
- "eval_loss": 0.6232044696807861,
66
- "eval_runtime": 143.706,
67
- "eval_samples_per_second": 1.086,
68
- "eval_steps_per_second": 0.07,
69
  "step": 276
70
  },
71
  {
72
  "epoch": 7.0,
73
- "eval_accuracy": 0.6346153846153846,
74
- "eval_balanced_accuracy": 0.6890120967741935,
75
- "eval_loss": 0.5387519001960754,
76
- "eval_runtime": 153.5233,
77
- "eval_samples_per_second": 1.016,
78
- "eval_steps_per_second": 0.065,
79
  "step": 322
80
  },
81
  {
82
  "epoch": 8.0,
83
- "eval_accuracy": 0.6923076923076923,
84
- "eval_balanced_accuracy": 0.7074555779446785,
85
- "eval_loss": 0.5306457281112671,
86
- "eval_runtime": 158.9271,
87
- "eval_samples_per_second": 0.982,
88
- "eval_steps_per_second": 0.063,
89
  "step": 368
90
  },
91
  {
92
  "epoch": 9.0,
93
- "eval_accuracy": 0.6923076923076923,
94
- "eval_balanced_accuracy": 0.6994583260527695,
95
- "eval_loss": 0.5140182375907898,
96
- "eval_runtime": 145.2382,
97
- "eval_samples_per_second": 1.074,
98
- "eval_steps_per_second": 0.069,
99
  "step": 414
100
  },
101
  {
102
  "epoch": 10.0,
103
- "eval_accuracy": 0.7435897435897436,
104
- "eval_balanced_accuracy": 0.7438276719138359,
105
- "eval_loss": 0.513781726360321,
106
- "eval_runtime": 144.459,
107
- "eval_samples_per_second": 1.08,
108
- "eval_steps_per_second": 0.069,
109
  "step": 460
110
  },
111
  {
112
  "epoch": 10.869565217391305,
113
- "grad_norm": 22.628210067749023,
114
  "learning_rate": 4.565217391304348e-06,
115
- "loss": 0.5585,
116
  "step": 500
117
  },
118
  {
119
  "epoch": 11.0,
120
- "eval_accuracy": 0.7371794871794872,
121
- "eval_balanced_accuracy": 0.7398989898989898,
122
- "eval_loss": 0.49725937843322754,
123
- "eval_runtime": 145.8734,
124
- "eval_samples_per_second": 1.069,
125
- "eval_steps_per_second": 0.069,
126
  "step": 506
127
  },
128
  {
129
  "epoch": 12.0,
130
- "eval_accuracy": 0.6794871794871795,
131
- "eval_balanced_accuracy": 0.6999809269502193,
132
- "eval_loss": 0.49190282821655273,
133
- "eval_runtime": 148.1722,
134
- "eval_samples_per_second": 1.053,
135
- "eval_steps_per_second": 0.067,
136
  "step": 552
137
  },
138
  {
139
  "epoch": 13.0,
140
- "eval_accuracy": 0.717948717948718,
141
- "eval_balanced_accuracy": 0.7293992557150452,
142
- "eval_loss": 0.4932830035686493,
143
- "eval_runtime": 150.2623,
144
- "eval_samples_per_second": 1.038,
145
- "eval_steps_per_second": 0.067,
146
  "step": 598
147
  },
148
  {
149
  "epoch": 14.0,
150
- "eval_accuracy": 0.7051282051282052,
151
- "eval_balanced_accuracy": 0.7297348846080488,
152
- "eval_loss": 0.49414268136024475,
153
- "eval_runtime": 152.7381,
154
- "eval_samples_per_second": 1.021,
155
- "eval_steps_per_second": 0.065,
156
  "step": 644
157
  },
158
  {
159
  "epoch": 15.0,
160
- "eval_accuracy": 0.7115384615384616,
161
- "eval_balanced_accuracy": 0.7239285714285715,
162
- "eval_loss": 0.4975796341896057,
163
- "eval_runtime": 148.3264,
164
- "eval_samples_per_second": 1.052,
165
- "eval_steps_per_second": 0.067,
166
  "step": 690
167
  },
168
  {
169
  "epoch": 16.0,
170
- "eval_accuracy": 0.7371794871794872,
171
- "eval_balanced_accuracy": 0.7386363636363636,
172
- "eval_loss": 0.5156741738319397,
173
- "eval_runtime": 151.5711,
174
- "eval_samples_per_second": 1.029,
175
- "eval_steps_per_second": 0.066,
176
  "step": 736
177
  },
178
  {
179
  "epoch": 17.0,
180
- "eval_accuracy": 0.717948717948718,
181
- "eval_balanced_accuracy": 0.720879120879121,
182
- "eval_loss": 0.5169395208358765,
183
- "eval_runtime": 152.1754,
184
- "eval_samples_per_second": 1.025,
185
- "eval_steps_per_second": 0.066,
186
  "step": 782
187
  },
188
  {
189
  "epoch": 18.0,
190
- "eval_accuracy": 0.7243589743589743,
191
- "eval_balanced_accuracy": 0.7282608695652174,
192
- "eval_loss": 0.5141614079475403,
193
- "eval_runtime": 146.7922,
194
- "eval_samples_per_second": 1.063,
195
- "eval_steps_per_second": 0.068,
196
  "step": 828
197
  },
198
  {
199
  "epoch": 19.0,
200
- "eval_accuracy": 0.7115384615384616,
201
- "eval_balanced_accuracy": 0.7167124227865478,
202
- "eval_loss": 0.5163176655769348,
203
- "eval_runtime": 112.8422,
204
- "eval_samples_per_second": 1.382,
205
- "eval_steps_per_second": 0.089,
206
  "step": 874
207
  },
208
  {
209
  "epoch": 20.0,
210
- "eval_accuracy": 0.7115384615384616,
211
- "eval_balanced_accuracy": 0.7113095238095238,
212
- "eval_loss": 0.5302315950393677,
213
- "eval_runtime": 112.7072,
214
- "eval_samples_per_second": 1.384,
215
- "eval_steps_per_second": 0.089,
216
  "step": 920
217
  },
218
  {
219
  "epoch": 20.0,
220
  "step": 920,
221
- "total_flos": 4.976764349571072e+17,
222
- "train_loss": 0.4557167053222656,
223
- "train_runtime": 45521.9577,
224
- "train_samples_per_second": 0.321,
225
- "train_steps_per_second": 0.02
226
  }
227
  ],
228
  "logging_steps": 500,
@@ -233,7 +233,7 @@
233
  "stateful_callbacks": {
234
  "EarlyStoppingCallback": {
235
  "args": {
236
- "early_stopping_patience": 10,
237
  "early_stopping_threshold": 0.0
238
  },
239
  "attributes": {
@@ -251,7 +251,7 @@
251
  "attributes": {}
252
  }
253
  },
254
- "total_flos": 4.976764349571072e+17,
255
  "train_batch_size": 16,
256
  "trial_name": null,
257
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.6282051282051282,
3
+ "best_model_checkpoint": "Output_llama2_70-15-15/checkpoint-874",
4
  "epoch": 20.0,
5
  "eval_steps": 500,
6
  "global_step": 920,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.5641025641025641,
14
+ "eval_balanced_accuracy": 0.5764176417641764,
15
+ "eval_loss": 0.7111307978630066,
16
+ "eval_runtime": 111.2436,
17
+ "eval_samples_per_second": 1.402,
18
+ "eval_steps_per_second": 0.09,
19
  "step": 46
20
  },
21
  {
22
  "epoch": 2.0,
23
+ "eval_accuracy": 0.5576923076923077,
24
+ "eval_balanced_accuracy": 0.565625,
25
+ "eval_loss": 0.7042574882507324,
26
+ "eval_runtime": 111.3987,
27
+ "eval_samples_per_second": 1.4,
28
+ "eval_steps_per_second": 0.09,
29
  "step": 92
30
  },
31
  {
32
  "epoch": 3.0,
33
+ "eval_accuracy": 0.5192307692307693,
34
+ "eval_balanced_accuracy": 0.5141509433962264,
35
+ "eval_loss": 0.6619334816932678,
36
+ "eval_runtime": 111.2028,
37
+ "eval_samples_per_second": 1.403,
38
+ "eval_steps_per_second": 0.09,
39
  "step": 138
40
  },
41
  {
42
  "epoch": 4.0,
43
+ "eval_accuracy": 0.5512820512820513,
44
+ "eval_balanced_accuracy": 0.5594967674296698,
45
+ "eval_loss": 0.7012639045715332,
46
+ "eval_runtime": 111.1032,
47
+ "eval_samples_per_second": 1.404,
48
+ "eval_steps_per_second": 0.09,
49
  "step": 184
50
  },
51
  {
52
  "epoch": 5.0,
53
+ "eval_accuracy": 0.5576923076923077,
54
+ "eval_balanced_accuracy": 0.5620300751879699,
55
+ "eval_loss": 0.6493321657180786,
56
+ "eval_runtime": 111.1592,
57
+ "eval_samples_per_second": 1.403,
58
+ "eval_steps_per_second": 0.09,
59
  "step": 230
60
  },
61
  {
62
  "epoch": 6.0,
63
+ "eval_accuracy": 0.5641025641025641,
64
+ "eval_balanced_accuracy": 0.5670505563146594,
65
+ "eval_loss": 0.6495689749717712,
66
+ "eval_runtime": 111.1491,
67
+ "eval_samples_per_second": 1.404,
68
+ "eval_steps_per_second": 0.09,
69
  "step": 276
70
  },
71
  {
72
  "epoch": 7.0,
73
+ "eval_accuracy": 0.5769230769230769,
74
+ "eval_balanced_accuracy": 0.5798319327731092,
75
+ "eval_loss": 0.6466049551963806,
76
+ "eval_runtime": 111.1355,
77
+ "eval_samples_per_second": 1.404,
78
+ "eval_steps_per_second": 0.09,
79
  "step": 322
80
  },
81
  {
82
  "epoch": 8.0,
83
+ "eval_accuracy": 0.5512820512820513,
84
+ "eval_balanced_accuracy": 0.5527314738405678,
85
+ "eval_loss": 0.6747537851333618,
86
+ "eval_runtime": 111.3484,
87
+ "eval_samples_per_second": 1.401,
88
+ "eval_steps_per_second": 0.09,
89
  "step": 368
90
  },
91
  {
92
  "epoch": 9.0,
93
+ "eval_accuracy": 0.5705128205128205,
94
+ "eval_balanced_accuracy": 0.5691844919786097,
95
+ "eval_loss": 0.6550834774971008,
96
+ "eval_runtime": 111.4062,
97
+ "eval_samples_per_second": 1.4,
98
+ "eval_steps_per_second": 0.09,
99
  "step": 414
100
  },
101
  {
102
  "epoch": 10.0,
103
+ "eval_accuracy": 0.5833333333333334,
104
+ "eval_balanced_accuracy": 0.6063162970106075,
105
+ "eval_loss": 0.6205254793167114,
106
+ "eval_runtime": 111.3582,
107
+ "eval_samples_per_second": 1.401,
108
+ "eval_steps_per_second": 0.09,
109
  "step": 460
110
  },
111
  {
112
  "epoch": 10.869565217391305,
113
+ "grad_norm": 3.162184715270996,
114
  "learning_rate": 4.565217391304348e-06,
115
+ "loss": 0.6541,
116
  "step": 500
117
  },
118
  {
119
  "epoch": 11.0,
120
+ "eval_accuracy": 0.6025641025641025,
121
+ "eval_balanced_accuracy": 0.6019753086419752,
122
+ "eval_loss": 0.6536551117897034,
123
+ "eval_runtime": 111.1486,
124
+ "eval_samples_per_second": 1.404,
125
+ "eval_steps_per_second": 0.09,
126
  "step": 506
127
  },
128
  {
129
  "epoch": 12.0,
130
+ "eval_accuracy": 0.6153846153846154,
131
+ "eval_balanced_accuracy": 0.6167385677308024,
132
+ "eval_loss": 0.6378962397575378,
133
+ "eval_runtime": 111.1235,
134
+ "eval_samples_per_second": 1.404,
135
+ "eval_steps_per_second": 0.09,
136
  "step": 552
137
  },
138
  {
139
  "epoch": 13.0,
140
+ "eval_accuracy": 0.6025641025641025,
141
+ "eval_balanced_accuracy": 0.6107190539767309,
142
+ "eval_loss": 0.6242974400520325,
143
+ "eval_runtime": 111.2048,
144
+ "eval_samples_per_second": 1.403,
145
+ "eval_steps_per_second": 0.09,
146
  "step": 598
147
  },
148
  {
149
  "epoch": 14.0,
150
+ "eval_accuracy": 0.6025641025641025,
151
+ "eval_balanced_accuracy": 0.60743725957135,
152
+ "eval_loss": 0.6248365640640259,
153
+ "eval_runtime": 111.2066,
154
+ "eval_samples_per_second": 1.403,
155
+ "eval_steps_per_second": 0.09,
156
  "step": 644
157
  },
158
  {
159
  "epoch": 15.0,
160
+ "eval_accuracy": 0.6217948717948718,
161
+ "eval_balanced_accuracy": 0.6369565217391304,
162
+ "eval_loss": 0.6171802878379822,
163
+ "eval_runtime": 111.2635,
164
+ "eval_samples_per_second": 1.402,
165
+ "eval_steps_per_second": 0.09,
166
  "step": 690
167
  },
168
  {
169
  "epoch": 16.0,
170
+ "eval_accuracy": 0.6153846153846154,
171
+ "eval_balanced_accuracy": 0.6201620162016201,
172
+ "eval_loss": 0.6236566305160522,
173
+ "eval_runtime": 111.2833,
174
+ "eval_samples_per_second": 1.402,
175
+ "eval_steps_per_second": 0.09,
176
  "step": 736
177
  },
178
  {
179
  "epoch": 17.0,
180
+ "eval_accuracy": 0.6217948717948718,
181
+ "eval_balanced_accuracy": 0.6230267673301304,
182
+ "eval_loss": 0.630794107913971,
183
+ "eval_runtime": 111.1945,
184
+ "eval_samples_per_second": 1.403,
185
+ "eval_steps_per_second": 0.09,
186
  "step": 782
187
  },
188
  {
189
  "epoch": 18.0,
190
+ "eval_accuracy": 0.6217948717948718,
191
+ "eval_balanced_accuracy": 0.6318867924528302,
192
+ "eval_loss": 0.6179038882255554,
193
+ "eval_runtime": 111.4097,
194
+ "eval_samples_per_second": 1.4,
195
+ "eval_steps_per_second": 0.09,
196
  "step": 828
197
  },
198
  {
199
  "epoch": 19.0,
200
+ "eval_accuracy": 0.6282051282051282,
201
+ "eval_balanced_accuracy": 0.63264221158958,
202
+ "eval_loss": 0.6252104640007019,
203
+ "eval_runtime": 111.3148,
204
+ "eval_samples_per_second": 1.401,
205
+ "eval_steps_per_second": 0.09,
206
  "step": 874
207
  },
208
  {
209
  "epoch": 20.0,
210
+ "eval_accuracy": 0.6282051282051282,
211
+ "eval_balanced_accuracy": 0.63264221158958,
212
+ "eval_loss": 0.6249805688858032,
213
+ "eval_runtime": 111.384,
214
+ "eval_samples_per_second": 1.401,
215
+ "eval_steps_per_second": 0.09,
216
  "step": 920
217
  },
218
  {
219
  "epoch": 20.0,
220
  "step": 920,
221
+ "total_flos": 4.970048339828736e+17,
222
+ "train_loss": 0.6203255363132643,
223
+ "train_runtime": 33373.6734,
224
+ "train_samples_per_second": 0.437,
225
+ "train_steps_per_second": 0.028
226
  }
227
  ],
228
  "logging_steps": 500,
 
233
  "stateful_callbacks": {
234
  "EarlyStoppingCallback": {
235
  "args": {
236
+ "early_stopping_patience": 19,
237
  "early_stopping_threshold": 0.0
238
  },
239
  "attributes": {
 
251
  "attributes": {}
252
  }
253
  },
254
+ "total_flos": 4.970048339828736e+17,
255
  "train_batch_size": 16,
256
  "trial_name": null,
257
  "trial_params": null