ZivKassnerNK commited on
Commit
c29ff1a
·
1 Parent(s): b5ba7c1

Add evaluation results and output files to model card

Browse files
README.md ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: code
3
+ tags:
4
+ - binary-classification
5
+ - model
6
+ - evaluation
7
+ metrics:
8
+ - average_precision: 0.67
9
+ - roc_auc: 0.77
10
+ - best threshold according to F1: 0.42
11
+ ---
12
+
13
+ # Binary Classification Model
14
+
15
+ ## Evaluation Results
16
+
17
+ **Average Precision:** 0.67
18
+ **ROC AUC:** 0.77
19
+ **best threshold according to F1: 0.42
20
+
21
+
22
+ ## Visualizations
23
+
24
+ ### Precision-Recall Curve
25
+ ![Precision-Recall Curve](./pr_curve.png)
26
+
27
+ ### ROC Curve
28
+ ![ROC Curve](./roc_curve.png)
29
+
30
+ ## Output Files and Directories
31
+
32
+ - 📂 `checkpoint-171/`
33
+ - `config.json`
34
+ - `model.safetensors`
35
+ - `preprocessor_config.json`
36
+ - `training_args.bin`
checkpoint-171/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "GenetikaPlus/binary_classification_model_v3.1.3_spines",
3
+ "architectures": [
4
+ "ViTForImageClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
+ "hidden_size": 256,
11
+ "image_size": 32,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 512,
14
+ "layer_norm_eps": 1e-12,
15
+ "model_type": "vit",
16
+ "num_attention_heads": 8,
17
+ "num_channels": 3,
18
+ "num_hidden_layers": 4,
19
+ "patch_size": 4,
20
+ "qkv_bias": true,
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.46.2"
23
+ }
checkpoint-171/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3296337d01ecfc0aabdf11fceb071277d873306c7f611bb786d3aa1bfa53c035
3
+ size 8563512
checkpoint-171/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4272504bd3f8fba2f7db1db12081b07ea7d6709ff1a5062eb02c8d0f6b60ef5b
3
+ size 17171514
checkpoint-171/preprocessor_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_rescale": true,
4
+ "do_resize": false,
5
+ "image_mean": [
6
+ 0.5,
7
+ 0.5,
8
+ 0.5
9
+ ],
10
+ "image_processor_type": "ViTFeatureExtractor",
11
+ "image_std": [
12
+ 0.5,
13
+ 0.5,
14
+ 0.5
15
+ ],
16
+ "resample": 2,
17
+ "rescale_factor": 0.00392156862745098,
18
+ "size": {
19
+ "height": 32,
20
+ "width": 32
21
+ }
22
+ }
checkpoint-171/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b835e41d78ca90e7b03e5bb0ab545ee556f7edb32620a808446fe9c2748e593c
3
+ size 14244
checkpoint-171/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa8424070a154dd5915b5a32fe02aed2c0823a7e279c136e8b450b0450061e08
3
+ size 1000
checkpoint-171/trainer_state.json ADDED
@@ -0,0 +1,1458 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.80322265625,
3
+ "best_model_checkpoint": "/tmp/logs/binary_classification_model_v3.1.5_spines/checkpoint-171",
4
+ "epoch": 19.0,
5
+ "eval_steps": 500,
6
+ "global_step": 171,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.1111111111111111,
13
+ "grad_norm": 474895.5625,
14
+ "learning_rate": 2.87092328892972e-10,
15
+ "loss": 0.4027,
16
+ "step": 1
17
+ },
18
+ {
19
+ "epoch": 0.2222222222222222,
20
+ "grad_norm": 420552.40625,
21
+ "learning_rate": 5.74184657785944e-10,
22
+ "loss": 0.4218,
23
+ "step": 2
24
+ },
25
+ {
26
+ "epoch": 0.3333333333333333,
27
+ "grad_norm": 373690.21875,
28
+ "learning_rate": 8.612769866789159e-10,
29
+ "loss": 0.4276,
30
+ "step": 3
31
+ },
32
+ {
33
+ "epoch": 0.4444444444444444,
34
+ "grad_norm": 461776.09375,
35
+ "learning_rate": 1.148369315571888e-09,
36
+ "loss": 0.4082,
37
+ "step": 4
38
+ },
39
+ {
40
+ "epoch": 0.5555555555555556,
41
+ "grad_norm": 263371.625,
42
+ "learning_rate": 1.43546164446486e-09,
43
+ "loss": 0.391,
44
+ "step": 5
45
+ },
46
+ {
47
+ "epoch": 0.6666666666666666,
48
+ "grad_norm": 285742.5625,
49
+ "learning_rate": 1.7225539733578319e-09,
50
+ "loss": 0.3947,
51
+ "step": 6
52
+ },
53
+ {
54
+ "epoch": 0.7777777777777778,
55
+ "grad_norm": 275876.28125,
56
+ "learning_rate": 2.009646302250804e-09,
57
+ "loss": 0.3982,
58
+ "step": 7
59
+ },
60
+ {
61
+ "epoch": 0.8888888888888888,
62
+ "grad_norm": 254763.890625,
63
+ "learning_rate": 2.296738631143776e-09,
64
+ "loss": 0.3958,
65
+ "step": 8
66
+ },
67
+ {
68
+ "epoch": 1.0,
69
+ "grad_norm": 775464.875,
70
+ "learning_rate": 2.5838309600367482e-09,
71
+ "loss": 0.425,
72
+ "step": 9
73
+ },
74
+ {
75
+ "epoch": 1.0,
76
+ "eval_accuracy": 0.8063849333945797,
77
+ "eval_f1": 0.7877109040543944,
78
+ "eval_loss": 0.40825775265693665,
79
+ "eval_precision": 0.8266384778012685,
80
+ "eval_recall": 0.7522847522847523,
81
+ "eval_runtime": 2.8714,
82
+ "eval_samples_per_second": 1516.327,
83
+ "eval_steps_per_second": 1.045,
84
+ "step": 9
85
+ },
86
+ {
87
+ "epoch": 1.1111111111111112,
88
+ "grad_norm": 597081.6875,
89
+ "learning_rate": 2.87092328892972e-09,
90
+ "loss": 0.4126,
91
+ "step": 10
92
+ },
93
+ {
94
+ "epoch": 1.2222222222222223,
95
+ "grad_norm": 260747.1875,
96
+ "learning_rate": 3.158015617822692e-09,
97
+ "loss": 0.4022,
98
+ "step": 11
99
+ },
100
+ {
101
+ "epoch": 1.3333333333333333,
102
+ "grad_norm": 534622.6875,
103
+ "learning_rate": 3.4451079467156637e-09,
104
+ "loss": 0.4173,
105
+ "step": 12
106
+ },
107
+ {
108
+ "epoch": 1.4444444444444444,
109
+ "grad_norm": 448352.53125,
110
+ "learning_rate": 3.732200275608636e-09,
111
+ "loss": 0.3865,
112
+ "step": 13
113
+ },
114
+ {
115
+ "epoch": 1.5555555555555556,
116
+ "grad_norm": 421928.59375,
117
+ "learning_rate": 4.019292604501608e-09,
118
+ "loss": 0.4012,
119
+ "step": 14
120
+ },
121
+ {
122
+ "epoch": 1.6666666666666665,
123
+ "grad_norm": 231615.71875,
124
+ "learning_rate": 4.30638493339458e-09,
125
+ "loss": 0.4068,
126
+ "step": 15
127
+ },
128
+ {
129
+ "epoch": 1.7777777777777777,
130
+ "grad_norm": 299990.625,
131
+ "learning_rate": 4.593477262287552e-09,
132
+ "loss": 0.4235,
133
+ "step": 16
134
+ },
135
+ {
136
+ "epoch": 1.8888888888888888,
137
+ "grad_norm": 399554.59375,
138
+ "learning_rate": 4.8805695911805235e-09,
139
+ "loss": 0.3954,
140
+ "step": 17
141
+ },
142
+ {
143
+ "epoch": 2.0,
144
+ "grad_norm": 150956.59375,
145
+ "learning_rate": 5.1676619200734964e-09,
146
+ "loss": 0.4087,
147
+ "step": 18
148
+ },
149
+ {
150
+ "epoch": 2.0,
151
+ "eval_accuracy": 0.8063849333945797,
152
+ "eval_f1": 0.7879245283018868,
153
+ "eval_loss": 0.4081619381904602,
154
+ "eval_precision": 0.8259493670886076,
155
+ "eval_recall": 0.7532467532467533,
156
+ "eval_runtime": 3.0185,
157
+ "eval_samples_per_second": 1442.425,
158
+ "eval_steps_per_second": 0.994,
159
+ "step": 18
160
+ },
161
+ {
162
+ "epoch": 2.111111111111111,
163
+ "grad_norm": 578097.9375,
164
+ "learning_rate": 5.454754248966468e-09,
165
+ "loss": 0.41,
166
+ "step": 19
167
+ },
168
+ {
169
+ "epoch": 2.2222222222222223,
170
+ "grad_norm": 241448.453125,
171
+ "learning_rate": 5.74184657785944e-09,
172
+ "loss": 0.4095,
173
+ "step": 20
174
+ },
175
+ {
176
+ "epoch": 2.3333333333333335,
177
+ "grad_norm": 563117.375,
178
+ "learning_rate": 6.028938906752413e-09,
179
+ "loss": 0.4254,
180
+ "step": 21
181
+ },
182
+ {
183
+ "epoch": 2.4444444444444446,
184
+ "grad_norm": 277218.59375,
185
+ "learning_rate": 6.316031235645384e-09,
186
+ "loss": 0.3986,
187
+ "step": 22
188
+ },
189
+ {
190
+ "epoch": 2.5555555555555554,
191
+ "grad_norm": 253562.546875,
192
+ "learning_rate": 6.603123564538356e-09,
193
+ "loss": 0.4053,
194
+ "step": 23
195
+ },
196
+ {
197
+ "epoch": 2.6666666666666665,
198
+ "grad_norm": 386568.96875,
199
+ "learning_rate": 6.8902158934313275e-09,
200
+ "loss": 0.3903,
201
+ "step": 24
202
+ },
203
+ {
204
+ "epoch": 2.7777777777777777,
205
+ "grad_norm": 331298.84375,
206
+ "learning_rate": 7.1773082223243e-09,
207
+ "loss": 0.4037,
208
+ "step": 25
209
+ },
210
+ {
211
+ "epoch": 2.888888888888889,
212
+ "grad_norm": 408564.9375,
213
+ "learning_rate": 7.464400551217272e-09,
214
+ "loss": 0.4086,
215
+ "step": 26
216
+ },
217
+ {
218
+ "epoch": 3.0,
219
+ "grad_norm": 243629.5625,
220
+ "learning_rate": 7.751492880110245e-09,
221
+ "loss": 0.3882,
222
+ "step": 27
223
+ },
224
+ {
225
+ "epoch": 3.0,
226
+ "eval_accuracy": 0.8070739549839229,
227
+ "eval_f1": 0.7888386123680241,
228
+ "eval_loss": 0.40801021456718445,
229
+ "eval_precision": 0.8262243285939969,
230
+ "eval_recall": 0.7546897546897547,
231
+ "eval_runtime": 3.0995,
232
+ "eval_samples_per_second": 1404.727,
233
+ "eval_steps_per_second": 0.968,
234
+ "step": 27
235
+ },
236
+ {
237
+ "epoch": 3.111111111111111,
238
+ "grad_norm": 367006.5625,
239
+ "learning_rate": 8.038585209003216e-09,
240
+ "loss": 0.3957,
241
+ "step": 28
242
+ },
243
+ {
244
+ "epoch": 3.2222222222222223,
245
+ "grad_norm": 460813.4375,
246
+ "learning_rate": 8.325677537896187e-09,
247
+ "loss": 0.3812,
248
+ "step": 29
249
+ },
250
+ {
251
+ "epoch": 3.3333333333333335,
252
+ "grad_norm": 207972.6875,
253
+ "learning_rate": 8.61276986678916e-09,
254
+ "loss": 0.4144,
255
+ "step": 30
256
+ },
257
+ {
258
+ "epoch": 3.4444444444444446,
259
+ "grad_norm": 409202.28125,
260
+ "learning_rate": 8.899862195682133e-09,
261
+ "loss": 0.4003,
262
+ "step": 31
263
+ },
264
+ {
265
+ "epoch": 3.5555555555555554,
266
+ "grad_norm": 310814.125,
267
+ "learning_rate": 9.186954524575104e-09,
268
+ "loss": 0.4161,
269
+ "step": 32
270
+ },
271
+ {
272
+ "epoch": 3.6666666666666665,
273
+ "grad_norm": 468177.125,
274
+ "learning_rate": 9.474046853468076e-09,
275
+ "loss": 0.4164,
276
+ "step": 33
277
+ },
278
+ {
279
+ "epoch": 3.7777777777777777,
280
+ "grad_norm": 565639.8125,
281
+ "learning_rate": 9.761139182361047e-09,
282
+ "loss": 0.3942,
283
+ "step": 34
284
+ },
285
+ {
286
+ "epoch": 3.888888888888889,
287
+ "grad_norm": 346480.3125,
288
+ "learning_rate": 1.004823151125402e-08,
289
+ "loss": 0.4176,
290
+ "step": 35
291
+ },
292
+ {
293
+ "epoch": 4.0,
294
+ "grad_norm": 300739.5625,
295
+ "learning_rate": 1.0335323840146993e-08,
296
+ "loss": 0.3981,
297
+ "step": 36
298
+ },
299
+ {
300
+ "epoch": 4.0,
301
+ "eval_accuracy": 0.807762976573266,
302
+ "eval_f1": 0.789856891790108,
303
+ "eval_loss": 0.40780285000801086,
304
+ "eval_precision": 0.8261554621848739,
305
+ "eval_recall": 0.7566137566137566,
306
+ "eval_runtime": 3.1559,
307
+ "eval_samples_per_second": 1379.616,
308
+ "eval_steps_per_second": 0.951,
309
+ "step": 36
310
+ },
311
+ {
312
+ "epoch": 4.111111111111111,
313
+ "grad_norm": 283485.25,
314
+ "learning_rate": 1.0622416169039966e-08,
315
+ "loss": 0.4223,
316
+ "step": 37
317
+ },
318
+ {
319
+ "epoch": 4.222222222222222,
320
+ "grad_norm": 210353.0625,
321
+ "learning_rate": 1.0909508497932935e-08,
322
+ "loss": 0.4203,
323
+ "step": 38
324
+ },
325
+ {
326
+ "epoch": 4.333333333333333,
327
+ "grad_norm": 254267.328125,
328
+ "learning_rate": 1.1196600826825908e-08,
329
+ "loss": 0.3943,
330
+ "step": 39
331
+ },
332
+ {
333
+ "epoch": 4.444444444444445,
334
+ "grad_norm": 368198.0625,
335
+ "learning_rate": 1.148369315571888e-08,
336
+ "loss": 0.4063,
337
+ "step": 40
338
+ },
339
+ {
340
+ "epoch": 4.555555555555555,
341
+ "grad_norm": 388967.53125,
342
+ "learning_rate": 1.1770785484611853e-08,
343
+ "loss": 0.4066,
344
+ "step": 41
345
+ },
346
+ {
347
+ "epoch": 4.666666666666667,
348
+ "grad_norm": 221729.828125,
349
+ "learning_rate": 1.2057877813504826e-08,
350
+ "loss": 0.3929,
351
+ "step": 42
352
+ },
353
+ {
354
+ "epoch": 4.777777777777778,
355
+ "grad_norm": 482960.75,
356
+ "learning_rate": 1.2344970142397795e-08,
357
+ "loss": 0.4113,
358
+ "step": 43
359
+ },
360
+ {
361
+ "epoch": 4.888888888888889,
362
+ "grad_norm": 252074.890625,
363
+ "learning_rate": 1.2632062471290768e-08,
364
+ "loss": 0.3781,
365
+ "step": 44
366
+ },
367
+ {
368
+ "epoch": 5.0,
369
+ "grad_norm": 539560.9375,
370
+ "learning_rate": 1.291915480018374e-08,
371
+ "loss": 0.4297,
372
+ "step": 45
373
+ },
374
+ {
375
+ "epoch": 5.0,
376
+ "eval_accuracy": 0.808451998162609,
377
+ "eval_f1": 0.7909774436090226,
378
+ "eval_loss": 0.40754494071006775,
379
+ "eval_precision": 0.82574568288854,
380
+ "eval_recall": 0.759018759018759,
381
+ "eval_runtime": 3.477,
382
+ "eval_samples_per_second": 1252.239,
383
+ "eval_steps_per_second": 0.863,
384
+ "step": 45
385
+ },
386
+ {
387
+ "epoch": 5.111111111111111,
388
+ "grad_norm": 221841.65625,
389
+ "learning_rate": 1.3206247129076712e-08,
390
+ "loss": 0.4104,
391
+ "step": 46
392
+ },
393
+ {
394
+ "epoch": 5.222222222222222,
395
+ "grad_norm": 389851.0,
396
+ "learning_rate": 1.3493339457969685e-08,
397
+ "loss": 0.376,
398
+ "step": 47
399
+ },
400
+ {
401
+ "epoch": 5.333333333333333,
402
+ "grad_norm": 240513.25,
403
+ "learning_rate": 1.3780431786862655e-08,
404
+ "loss": 0.3952,
405
+ "step": 48
406
+ },
407
+ {
408
+ "epoch": 5.444444444444445,
409
+ "grad_norm": 451586.1875,
410
+ "learning_rate": 1.4067524115755628e-08,
411
+ "loss": 0.439,
412
+ "step": 49
413
+ },
414
+ {
415
+ "epoch": 5.555555555555555,
416
+ "grad_norm": 144918.9375,
417
+ "learning_rate": 1.43546164446486e-08,
418
+ "loss": 0.4047,
419
+ "step": 50
420
+ },
421
+ {
422
+ "epoch": 5.666666666666667,
423
+ "grad_norm": 332475.0625,
424
+ "learning_rate": 1.4641708773541572e-08,
425
+ "loss": 0.3976,
426
+ "step": 51
427
+ },
428
+ {
429
+ "epoch": 5.777777777777778,
430
+ "grad_norm": 497577.0,
431
+ "learning_rate": 1.4928801102434543e-08,
432
+ "loss": 0.4031,
433
+ "step": 52
434
+ },
435
+ {
436
+ "epoch": 5.888888888888889,
437
+ "grad_norm": 291300.15625,
438
+ "learning_rate": 1.5215893431327515e-08,
439
+ "loss": 0.394,
440
+ "step": 53
441
+ },
442
+ {
443
+ "epoch": 6.0,
444
+ "grad_norm": 505439.625,
445
+ "learning_rate": 1.550298576022049e-08,
446
+ "loss": 0.4158,
447
+ "step": 54
448
+ },
449
+ {
450
+ "epoch": 6.0,
451
+ "eval_accuracy": 0.808451998162609,
452
+ "eval_f1": 0.7915,
453
+ "eval_loss": 0.407249391078949,
454
+ "eval_precision": 0.8240499739718896,
455
+ "eval_recall": 0.7614237614237614,
456
+ "eval_runtime": 3.2609,
457
+ "eval_samples_per_second": 1335.2,
458
+ "eval_steps_per_second": 0.92,
459
+ "step": 54
460
+ },
461
+ {
462
+ "epoch": 6.111111111111111,
463
+ "grad_norm": 324893.5,
464
+ "learning_rate": 1.579007808911346e-08,
465
+ "loss": 0.4025,
466
+ "step": 55
467
+ },
468
+ {
469
+ "epoch": 6.222222222222222,
470
+ "grad_norm": 368678.25,
471
+ "learning_rate": 1.6077170418006432e-08,
472
+ "loss": 0.4055,
473
+ "step": 56
474
+ },
475
+ {
476
+ "epoch": 6.333333333333333,
477
+ "grad_norm": 255911.453125,
478
+ "learning_rate": 1.6364262746899406e-08,
479
+ "loss": 0.4027,
480
+ "step": 57
481
+ },
482
+ {
483
+ "epoch": 6.444444444444445,
484
+ "grad_norm": 464521.34375,
485
+ "learning_rate": 1.6651355075792374e-08,
486
+ "loss": 0.3845,
487
+ "step": 58
488
+ },
489
+ {
490
+ "epoch": 6.555555555555555,
491
+ "grad_norm": 98293.2578125,
492
+ "learning_rate": 1.693844740468535e-08,
493
+ "loss": 0.4181,
494
+ "step": 59
495
+ },
496
+ {
497
+ "epoch": 6.666666666666667,
498
+ "grad_norm": 153667.34375,
499
+ "learning_rate": 1.722553973357832e-08,
500
+ "loss": 0.384,
501
+ "step": 60
502
+ },
503
+ {
504
+ "epoch": 6.777777777777778,
505
+ "grad_norm": 307245.625,
506
+ "learning_rate": 1.751263206247129e-08,
507
+ "loss": 0.4285,
508
+ "step": 61
509
+ },
510
+ {
511
+ "epoch": 6.888888888888889,
512
+ "grad_norm": 313019.0625,
513
+ "learning_rate": 1.7799724391364266e-08,
514
+ "loss": 0.4083,
515
+ "step": 62
516
+ },
517
+ {
518
+ "epoch": 7.0,
519
+ "grad_norm": 377737.375,
520
+ "learning_rate": 1.8086816720257234e-08,
521
+ "loss": 0.4009,
522
+ "step": 63
523
+ },
524
+ {
525
+ "epoch": 7.0,
526
+ "eval_accuracy": 0.8079926504363804,
527
+ "eval_f1": 0.7916251246261217,
528
+ "eval_loss": 0.4069373309612274,
529
+ "eval_precision": 0.8215209518882566,
530
+ "eval_recall": 0.7638287638287639,
531
+ "eval_runtime": 3.2888,
532
+ "eval_samples_per_second": 1323.904,
533
+ "eval_steps_per_second": 0.912,
534
+ "step": 63
535
+ },
536
+ {
537
+ "epoch": 7.111111111111111,
538
+ "grad_norm": 291745.25,
539
+ "learning_rate": 1.837390904915021e-08,
540
+ "loss": 0.3934,
541
+ "step": 64
542
+ },
543
+ {
544
+ "epoch": 7.222222222222222,
545
+ "grad_norm": 371277.96875,
546
+ "learning_rate": 1.866100137804318e-08,
547
+ "loss": 0.3967,
548
+ "step": 65
549
+ },
550
+ {
551
+ "epoch": 7.333333333333333,
552
+ "grad_norm": 322570.125,
553
+ "learning_rate": 1.894809370693615e-08,
554
+ "loss": 0.4054,
555
+ "step": 66
556
+ },
557
+ {
558
+ "epoch": 7.444444444444445,
559
+ "grad_norm": 214398.75,
560
+ "learning_rate": 1.9235186035829126e-08,
561
+ "loss": 0.4069,
562
+ "step": 67
563
+ },
564
+ {
565
+ "epoch": 7.555555555555555,
566
+ "grad_norm": 213520.4375,
567
+ "learning_rate": 1.9522278364722094e-08,
568
+ "loss": 0.4127,
569
+ "step": 68
570
+ },
571
+ {
572
+ "epoch": 7.666666666666667,
573
+ "grad_norm": 303630.6875,
574
+ "learning_rate": 1.9809370693615065e-08,
575
+ "loss": 0.4015,
576
+ "step": 69
577
+ },
578
+ {
579
+ "epoch": 7.777777777777778,
580
+ "grad_norm": 307784.3125,
581
+ "learning_rate": 2.009646302250804e-08,
582
+ "loss": 0.4286,
583
+ "step": 70
584
+ },
585
+ {
586
+ "epoch": 7.888888888888889,
587
+ "grad_norm": 276592.65625,
588
+ "learning_rate": 2.038355535140101e-08,
589
+ "loss": 0.4014,
590
+ "step": 71
591
+ },
592
+ {
593
+ "epoch": 8.0,
594
+ "grad_norm": 183215.234375,
595
+ "learning_rate": 2.0670647680293986e-08,
596
+ "loss": 0.3701,
597
+ "step": 72
598
+ },
599
+ {
600
+ "epoch": 8.0,
601
+ "eval_accuracy": 0.8091410197519522,
602
+ "eval_f1": 0.7934377330350485,
603
+ "eval_loss": 0.4066150188446045,
604
+ "eval_precision": 0.8209876543209876,
605
+ "eval_recall": 0.7676767676767676,
606
+ "eval_runtime": 3.374,
607
+ "eval_samples_per_second": 1290.473,
608
+ "eval_steps_per_second": 0.889,
609
+ "step": 72
610
+ },
611
+ {
612
+ "epoch": 8.11111111111111,
613
+ "grad_norm": 368879.1875,
614
+ "learning_rate": 2.0957740009186957e-08,
615
+ "loss": 0.3844,
616
+ "step": 73
617
+ },
618
+ {
619
+ "epoch": 8.222222222222221,
620
+ "grad_norm": 375783.4375,
621
+ "learning_rate": 2.124483233807993e-08,
622
+ "loss": 0.4067,
623
+ "step": 74
624
+ },
625
+ {
626
+ "epoch": 8.333333333333334,
627
+ "grad_norm": 295647.8125,
628
+ "learning_rate": 2.15319246669729e-08,
629
+ "loss": 0.4126,
630
+ "step": 75
631
+ },
632
+ {
633
+ "epoch": 8.444444444444445,
634
+ "grad_norm": 396154.8125,
635
+ "learning_rate": 2.181901699586587e-08,
636
+ "loss": 0.4184,
637
+ "step": 76
638
+ },
639
+ {
640
+ "epoch": 8.555555555555555,
641
+ "grad_norm": 323196.53125,
642
+ "learning_rate": 2.2106109324758845e-08,
643
+ "loss": 0.4218,
644
+ "step": 77
645
+ },
646
+ {
647
+ "epoch": 8.666666666666666,
648
+ "grad_norm": 180532.421875,
649
+ "learning_rate": 2.2393201653651817e-08,
650
+ "loss": 0.4102,
651
+ "step": 78
652
+ },
653
+ {
654
+ "epoch": 8.777777777777779,
655
+ "grad_norm": 160135.71875,
656
+ "learning_rate": 2.268029398254479e-08,
657
+ "loss": 0.3813,
658
+ "step": 79
659
+ },
660
+ {
661
+ "epoch": 8.88888888888889,
662
+ "grad_norm": 90108.5390625,
663
+ "learning_rate": 2.296738631143776e-08,
664
+ "loss": 0.3771,
665
+ "step": 80
666
+ },
667
+ {
668
+ "epoch": 9.0,
669
+ "grad_norm": 148206.140625,
670
+ "learning_rate": 2.325447864033073e-08,
671
+ "loss": 0.3913,
672
+ "step": 81
673
+ },
674
+ {
675
+ "epoch": 9.0,
676
+ "eval_accuracy": 0.8089113458888378,
677
+ "eval_f1": 0.7938553022794846,
678
+ "eval_loss": 0.4063035249710083,
679
+ "eval_precision": 0.8185998978027593,
680
+ "eval_recall": 0.7705627705627706,
681
+ "eval_runtime": 3.4033,
682
+ "eval_samples_per_second": 1279.333,
683
+ "eval_steps_per_second": 0.881,
684
+ "step": 81
685
+ },
686
+ {
687
+ "epoch": 9.11111111111111,
688
+ "grad_norm": 368934.125,
689
+ "learning_rate": 2.3541570969223705e-08,
690
+ "loss": 0.3934,
691
+ "step": 82
692
+ },
693
+ {
694
+ "epoch": 9.222222222222221,
695
+ "grad_norm": 121018.4921875,
696
+ "learning_rate": 2.3828663298116677e-08,
697
+ "loss": 0.39,
698
+ "step": 83
699
+ },
700
+ {
701
+ "epoch": 9.333333333333334,
702
+ "grad_norm": 145493.53125,
703
+ "learning_rate": 2.411575562700965e-08,
704
+ "loss": 0.4076,
705
+ "step": 84
706
+ },
707
+ {
708
+ "epoch": 9.444444444444445,
709
+ "grad_norm": 255409.46875,
710
+ "learning_rate": 2.440284795590262e-08,
711
+ "loss": 0.4162,
712
+ "step": 85
713
+ },
714
+ {
715
+ "epoch": 9.555555555555555,
716
+ "grad_norm": 283049.0625,
717
+ "learning_rate": 2.468994028479559e-08,
718
+ "loss": 0.4,
719
+ "step": 86
720
+ },
721
+ {
722
+ "epoch": 9.666666666666666,
723
+ "grad_norm": 291149.34375,
724
+ "learning_rate": 2.4977032613688565e-08,
725
+ "loss": 0.4227,
726
+ "step": 87
727
+ },
728
+ {
729
+ "epoch": 9.777777777777779,
730
+ "grad_norm": 190105.96875,
731
+ "learning_rate": 2.5264124942581536e-08,
732
+ "loss": 0.3963,
733
+ "step": 88
734
+ },
735
+ {
736
+ "epoch": 9.88888888888889,
737
+ "grad_norm": 155022.09375,
738
+ "learning_rate": 2.555121727147451e-08,
739
+ "loss": 0.3905,
740
+ "step": 89
741
+ },
742
+ {
743
+ "epoch": 10.0,
744
+ "grad_norm": 194663.59375,
745
+ "learning_rate": 2.583830960036748e-08,
746
+ "loss": 0.3815,
747
+ "step": 90
748
+ },
749
+ {
750
+ "epoch": 10.0,
751
+ "eval_accuracy": 0.809600367478181,
752
+ "eval_f1": 0.7948527592180153,
753
+ "eval_loss": 0.4060344994068146,
754
+ "eval_precision": 0.81855249745158,
755
+ "eval_recall": 0.7724867724867724,
756
+ "eval_runtime": 3.7122,
757
+ "eval_samples_per_second": 1172.903,
758
+ "eval_steps_per_second": 0.808,
759
+ "step": 90
760
+ },
761
+ {
762
+ "epoch": 10.11111111111111,
763
+ "grad_norm": 176111.203125,
764
+ "learning_rate": 2.612540192926045e-08,
765
+ "loss": 0.4231,
766
+ "step": 91
767
+ },
768
+ {
769
+ "epoch": 10.222222222222221,
770
+ "grad_norm": 206408.578125,
771
+ "learning_rate": 2.6412494258153425e-08,
772
+ "loss": 0.3939,
773
+ "step": 92
774
+ },
775
+ {
776
+ "epoch": 10.333333333333334,
777
+ "grad_norm": 266319.78125,
778
+ "learning_rate": 2.6699586587046396e-08,
779
+ "loss": 0.4014,
780
+ "step": 93
781
+ },
782
+ {
783
+ "epoch": 10.444444444444445,
784
+ "grad_norm": 153663.359375,
785
+ "learning_rate": 2.698667891593937e-08,
786
+ "loss": 0.3855,
787
+ "step": 94
788
+ },
789
+ {
790
+ "epoch": 10.555555555555555,
791
+ "grad_norm": 122127.5390625,
792
+ "learning_rate": 2.727377124483234e-08,
793
+ "loss": 0.3873,
794
+ "step": 95
795
+ },
796
+ {
797
+ "epoch": 10.666666666666666,
798
+ "grad_norm": 314791.78125,
799
+ "learning_rate": 2.756086357372531e-08,
800
+ "loss": 0.4327,
801
+ "step": 96
802
+ },
803
+ {
804
+ "epoch": 10.777777777777779,
805
+ "grad_norm": 127333.1171875,
806
+ "learning_rate": 2.7847955902618285e-08,
807
+ "loss": 0.4,
808
+ "step": 97
809
+ },
810
+ {
811
+ "epoch": 10.88888888888889,
812
+ "grad_norm": 317753.1875,
813
+ "learning_rate": 2.8135048231511256e-08,
814
+ "loss": 0.4006,
815
+ "step": 98
816
+ },
817
+ {
818
+ "epoch": 11.0,
819
+ "grad_norm": 146177.5625,
820
+ "learning_rate": 2.842214056040423e-08,
821
+ "loss": 0.4056,
822
+ "step": 99
823
+ },
824
+ {
825
+ "epoch": 11.0,
826
+ "eval_accuracy": 0.8105190629306385,
827
+ "eval_f1": 0.7963465810910886,
828
+ "eval_loss": 0.4058038294315338,
829
+ "eval_precision": 0.8179513184584178,
830
+ "eval_recall": 0.7758537758537759,
831
+ "eval_runtime": 3.481,
832
+ "eval_samples_per_second": 1250.793,
833
+ "eval_steps_per_second": 0.862,
834
+ "step": 99
835
+ },
836
+ {
837
+ "epoch": 11.11111111111111,
838
+ "grad_norm": 197264.453125,
839
+ "learning_rate": 2.87092328892972e-08,
840
+ "loss": 0.3961,
841
+ "step": 100
842
+ },
843
+ {
844
+ "epoch": 11.222222222222221,
845
+ "grad_norm": 119001.2421875,
846
+ "learning_rate": 2.899632521819017e-08,
847
+ "loss": 0.4046,
848
+ "step": 101
849
+ },
850
+ {
851
+ "epoch": 11.333333333333334,
852
+ "grad_norm": 190082.234375,
853
+ "learning_rate": 2.9283417547083144e-08,
854
+ "loss": 0.416,
855
+ "step": 102
856
+ },
857
+ {
858
+ "epoch": 11.444444444444445,
859
+ "grad_norm": 123708.5625,
860
+ "learning_rate": 2.9570509875976116e-08,
861
+ "loss": 0.391,
862
+ "step": 103
863
+ },
864
+ {
865
+ "epoch": 11.555555555555555,
866
+ "grad_norm": 172807.75,
867
+ "learning_rate": 2.985760220486909e-08,
868
+ "loss": 0.3847,
869
+ "step": 104
870
+ },
871
+ {
872
+ "epoch": 11.666666666666666,
873
+ "grad_norm": 118848.234375,
874
+ "learning_rate": 3.014469453376206e-08,
875
+ "loss": 0.3968,
876
+ "step": 105
877
+ },
878
+ {
879
+ "epoch": 11.777777777777779,
880
+ "grad_norm": 337085.53125,
881
+ "learning_rate": 3.043178686265503e-08,
882
+ "loss": 0.4127,
883
+ "step": 106
884
+ },
885
+ {
886
+ "epoch": 11.88888888888889,
887
+ "grad_norm": 254728.96875,
888
+ "learning_rate": 3.071887919154801e-08,
889
+ "loss": 0.4058,
890
+ "step": 107
891
+ },
892
+ {
893
+ "epoch": 12.0,
894
+ "grad_norm": 179838.640625,
895
+ "learning_rate": 3.100597152044098e-08,
896
+ "loss": 0.4056,
897
+ "step": 108
898
+ },
899
+ {
900
+ "epoch": 12.0,
901
+ "eval_accuracy": 0.8118971061093248,
902
+ "eval_f1": 0.7984248092542456,
903
+ "eval_loss": 0.40560415387153625,
904
+ "eval_precision": 0.8175403225806451,
905
+ "eval_recall": 0.7801827801827802,
906
+ "eval_runtime": 3.5421,
907
+ "eval_samples_per_second": 1229.227,
908
+ "eval_steps_per_second": 0.847,
909
+ "step": 108
910
+ },
911
+ {
912
+ "epoch": 12.11111111111111,
913
+ "grad_norm": 152279.546875,
914
+ "learning_rate": 3.129306384933395e-08,
915
+ "loss": 0.401,
916
+ "step": 109
917
+ },
918
+ {
919
+ "epoch": 12.222222222222221,
920
+ "grad_norm": 145296.5,
921
+ "learning_rate": 3.158015617822692e-08,
922
+ "loss": 0.4145,
923
+ "step": 110
924
+ },
925
+ {
926
+ "epoch": 12.333333333333334,
927
+ "grad_norm": 162064.296875,
928
+ "learning_rate": 3.186724850711989e-08,
929
+ "loss": 0.4096,
930
+ "step": 111
931
+ },
932
+ {
933
+ "epoch": 12.444444444444445,
934
+ "grad_norm": 142840.90625,
935
+ "learning_rate": 3.2154340836012864e-08,
936
+ "loss": 0.3996,
937
+ "step": 112
938
+ },
939
+ {
940
+ "epoch": 12.555555555555555,
941
+ "grad_norm": 121552.7421875,
942
+ "learning_rate": 3.2441433164905835e-08,
943
+ "loss": 0.3979,
944
+ "step": 113
945
+ },
946
+ {
947
+ "epoch": 12.666666666666666,
948
+ "grad_norm": 124694.5546875,
949
+ "learning_rate": 3.272852549379881e-08,
950
+ "loss": 0.4006,
951
+ "step": 114
952
+ },
953
+ {
954
+ "epoch": 12.777777777777779,
955
+ "grad_norm": 375322.09375,
956
+ "learning_rate": 3.301561782269178e-08,
957
+ "loss": 0.4088,
958
+ "step": 115
959
+ },
960
+ {
961
+ "epoch": 12.88888888888889,
962
+ "grad_norm": 157402.0625,
963
+ "learning_rate": 3.330271015158475e-08,
964
+ "loss": 0.4087,
965
+ "step": 116
966
+ },
967
+ {
968
+ "epoch": 13.0,
969
+ "grad_norm": 202887.8125,
970
+ "learning_rate": 3.358980248047773e-08,
971
+ "loss": 0.3654,
972
+ "step": 117
973
+ },
974
+ {
975
+ "epoch": 13.0,
976
+ "eval_accuracy": 0.8121267799724391,
977
+ "eval_f1": 0.799017199017199,
978
+ "eval_loss": 0.40542593598365784,
979
+ "eval_precision": 0.8166750376695128,
980
+ "eval_recall": 0.7821067821067821,
981
+ "eval_runtime": 3.5933,
982
+ "eval_samples_per_second": 1211.695,
983
+ "eval_steps_per_second": 0.835,
984
+ "step": 117
985
+ },
986
+ {
987
+ "epoch": 13.11111111111111,
988
+ "grad_norm": 103942.6640625,
989
+ "learning_rate": 3.38768948093707e-08,
990
+ "loss": 0.4133,
991
+ "step": 118
992
+ },
993
+ {
994
+ "epoch": 13.222222222222221,
995
+ "grad_norm": 73012.1328125,
996
+ "learning_rate": 3.416398713826367e-08,
997
+ "loss": 0.3909,
998
+ "step": 119
999
+ },
1000
+ {
1001
+ "epoch": 13.333333333333334,
1002
+ "grad_norm": 94441.5625,
1003
+ "learning_rate": 3.445107946715664e-08,
1004
+ "loss": 0.4166,
1005
+ "step": 120
1006
+ },
1007
+ {
1008
+ "epoch": 13.444444444444445,
1009
+ "grad_norm": 150568.984375,
1010
+ "learning_rate": 3.473817179604961e-08,
1011
+ "loss": 0.3918,
1012
+ "step": 121
1013
+ },
1014
+ {
1015
+ "epoch": 13.555555555555555,
1016
+ "grad_norm": 132181.59375,
1017
+ "learning_rate": 3.502526412494258e-08,
1018
+ "loss": 0.3995,
1019
+ "step": 122
1020
+ },
1021
+ {
1022
+ "epoch": 13.666666666666666,
1023
+ "grad_norm": 112690.1171875,
1024
+ "learning_rate": 3.5312356453835555e-08,
1025
+ "loss": 0.4005,
1026
+ "step": 123
1027
+ },
1028
+ {
1029
+ "epoch": 13.777777777777779,
1030
+ "grad_norm": 309373.25,
1031
+ "learning_rate": 3.559944878272853e-08,
1032
+ "loss": 0.3928,
1033
+ "step": 124
1034
+ },
1035
+ {
1036
+ "epoch": 13.88888888888889,
1037
+ "grad_norm": 123288.84375,
1038
+ "learning_rate": 3.5886541111621504e-08,
1039
+ "loss": 0.3923,
1040
+ "step": 125
1041
+ },
1042
+ {
1043
+ "epoch": 14.0,
1044
+ "grad_norm": 179755.625,
1045
+ "learning_rate": 3.617363344051447e-08,
1046
+ "loss": 0.3993,
1047
+ "step": 126
1048
+ },
1049
+ {
1050
+ "epoch": 14.0,
1051
+ "eval_accuracy": 0.8130454754248967,
1052
+ "eval_f1": 0.8003923491907798,
1053
+ "eval_loss": 0.4052511155605316,
1054
+ "eval_precision": 0.8164082041020511,
1055
+ "eval_recall": 0.784992784992785,
1056
+ "eval_runtime": 3.6375,
1057
+ "eval_samples_per_second": 1196.967,
1058
+ "eval_steps_per_second": 0.825,
1059
+ "step": 126
1060
+ },
1061
+ {
1062
+ "epoch": 14.11111111111111,
1063
+ "grad_norm": 89736.4609375,
1064
+ "learning_rate": 3.6460725769407446e-08,
1065
+ "loss": 0.4223,
1066
+ "step": 127
1067
+ },
1068
+ {
1069
+ "epoch": 14.222222222222221,
1070
+ "grad_norm": 118175.4921875,
1071
+ "learning_rate": 3.674781809830042e-08,
1072
+ "loss": 0.4071,
1073
+ "step": 128
1074
+ },
1075
+ {
1076
+ "epoch": 14.333333333333334,
1077
+ "grad_norm": 128466.0625,
1078
+ "learning_rate": 3.703491042719339e-08,
1079
+ "loss": 0.3898,
1080
+ "step": 129
1081
+ },
1082
+ {
1083
+ "epoch": 14.444444444444445,
1084
+ "grad_norm": 76387.4296875,
1085
+ "learning_rate": 3.732200275608636e-08,
1086
+ "loss": 0.4033,
1087
+ "step": 130
1088
+ },
1089
+ {
1090
+ "epoch": 14.555555555555555,
1091
+ "grad_norm": 117603.078125,
1092
+ "learning_rate": 3.760909508497933e-08,
1093
+ "loss": 0.3862,
1094
+ "step": 131
1095
+ },
1096
+ {
1097
+ "epoch": 14.666666666666666,
1098
+ "grad_norm": 112156.703125,
1099
+ "learning_rate": 3.78961874138723e-08,
1100
+ "loss": 0.3988,
1101
+ "step": 132
1102
+ },
1103
+ {
1104
+ "epoch": 14.777777777777779,
1105
+ "grad_norm": 116138.1171875,
1106
+ "learning_rate": 3.8183279742765274e-08,
1107
+ "loss": 0.3811,
1108
+ "step": 133
1109
+ },
1110
+ {
1111
+ "epoch": 14.88888888888889,
1112
+ "grad_norm": 169944.3125,
1113
+ "learning_rate": 3.847037207165825e-08,
1114
+ "loss": 0.3836,
1115
+ "step": 134
1116
+ },
1117
+ {
1118
+ "epoch": 15.0,
1119
+ "grad_norm": 162322.875,
1120
+ "learning_rate": 3.875746440055122e-08,
1121
+ "loss": 0.4048,
1122
+ "step": 135
1123
+ },
1124
+ {
1125
+ "epoch": 15.0,
1126
+ "eval_accuracy": 0.8141938447404685,
1127
+ "eval_f1": 0.801861376438893,
1128
+ "eval_loss": 0.4050845801830292,
1129
+ "eval_precision": 0.8168662674650699,
1130
+ "eval_recall": 0.7873977873977874,
1131
+ "eval_runtime": 3.6466,
1132
+ "eval_samples_per_second": 1194.003,
1133
+ "eval_steps_per_second": 0.823,
1134
+ "step": 135
1135
+ },
1136
+ {
1137
+ "epoch": 15.11111111111111,
1138
+ "grad_norm": 113360.9453125,
1139
+ "learning_rate": 3.904455672944419e-08,
1140
+ "loss": 0.3972,
1141
+ "step": 136
1142
+ },
1143
+ {
1144
+ "epoch": 15.222222222222221,
1145
+ "grad_norm": 92233.84375,
1146
+ "learning_rate": 3.9331649058337166e-08,
1147
+ "loss": 0.4229,
1148
+ "step": 137
1149
+ },
1150
+ {
1151
+ "epoch": 15.333333333333334,
1152
+ "grad_norm": 334741.0,
1153
+ "learning_rate": 3.961874138723013e-08,
1154
+ "loss": 0.3986,
1155
+ "step": 138
1156
+ },
1157
+ {
1158
+ "epoch": 15.444444444444445,
1159
+ "grad_norm": 207920.78125,
1160
+ "learning_rate": 3.99058337161231e-08,
1161
+ "loss": 0.3979,
1162
+ "step": 139
1163
+ },
1164
+ {
1165
+ "epoch": 15.555555555555555,
1166
+ "grad_norm": 78730.8671875,
1167
+ "learning_rate": 4.019292604501608e-08,
1168
+ "loss": 0.4066,
1169
+ "step": 140
1170
+ },
1171
+ {
1172
+ "epoch": 15.666666666666666,
1173
+ "grad_norm": 272575.46875,
1174
+ "learning_rate": 4.048001837390905e-08,
1175
+ "loss": 0.4215,
1176
+ "step": 141
1177
+ },
1178
+ {
1179
+ "epoch": 15.777777777777779,
1180
+ "grad_norm": 121701.4921875,
1181
+ "learning_rate": 4.076711070280202e-08,
1182
+ "loss": 0.4133,
1183
+ "step": 142
1184
+ },
1185
+ {
1186
+ "epoch": 15.88888888888889,
1187
+ "grad_norm": 224911.0,
1188
+ "learning_rate": 4.1054203031694994e-08,
1189
+ "loss": 0.3784,
1190
+ "step": 143
1191
+ },
1192
+ {
1193
+ "epoch": 16.0,
1194
+ "grad_norm": 88522.6171875,
1195
+ "learning_rate": 4.134129536058797e-08,
1196
+ "loss": 0.4024,
1197
+ "step": 144
1198
+ },
1199
+ {
1200
+ "epoch": 16.0,
1201
+ "eval_accuracy": 0.8148828663298117,
1202
+ "eval_f1": 0.8029339853300733,
1203
+ "eval_loss": 0.40494343638420105,
1204
+ "eval_precision": 0.8165091994032819,
1205
+ "eval_recall": 0.7898027898027898,
1206
+ "eval_runtime": 4.3334,
1207
+ "eval_samples_per_second": 1004.752,
1208
+ "eval_steps_per_second": 0.692,
1209
+ "step": 144
1210
+ },
1211
+ {
1212
+ "epoch": 16.11111111111111,
1213
+ "grad_norm": 111251.046875,
1214
+ "learning_rate": 4.162838768948094e-08,
1215
+ "loss": 0.382,
1216
+ "step": 145
1217
+ },
1218
+ {
1219
+ "epoch": 16.22222222222222,
1220
+ "grad_norm": 103675.90625,
1221
+ "learning_rate": 4.1915480018373914e-08,
1222
+ "loss": 0.4145,
1223
+ "step": 146
1224
+ },
1225
+ {
1226
+ "epoch": 16.333333333333332,
1227
+ "grad_norm": 58449.1328125,
1228
+ "learning_rate": 4.2202572347266885e-08,
1229
+ "loss": 0.3989,
1230
+ "step": 147
1231
+ },
1232
+ {
1233
+ "epoch": 16.444444444444443,
1234
+ "grad_norm": 321891.53125,
1235
+ "learning_rate": 4.248966467615986e-08,
1236
+ "loss": 0.4029,
1237
+ "step": 148
1238
+ },
1239
+ {
1240
+ "epoch": 16.555555555555557,
1241
+ "grad_norm": 203043.90625,
1242
+ "learning_rate": 4.277675700505283e-08,
1243
+ "loss": 0.4039,
1244
+ "step": 149
1245
+ },
1246
+ {
1247
+ "epoch": 16.666666666666668,
1248
+ "grad_norm": 162260.46875,
1249
+ "learning_rate": 4.30638493339458e-08,
1250
+ "loss": 0.3799,
1251
+ "step": 150
1252
+ },
1253
+ {
1254
+ "epoch": 16.77777777777778,
1255
+ "grad_norm": 75518.8359375,
1256
+ "learning_rate": 4.335094166283877e-08,
1257
+ "loss": 0.415,
1258
+ "step": 151
1259
+ },
1260
+ {
1261
+ "epoch": 16.88888888888889,
1262
+ "grad_norm": 377702.125,
1263
+ "learning_rate": 4.363803399173174e-08,
1264
+ "loss": 0.4015,
1265
+ "step": 152
1266
+ },
1267
+ {
1268
+ "epoch": 17.0,
1269
+ "grad_norm": 183517.390625,
1270
+ "learning_rate": 4.392512632062471e-08,
1271
+ "loss": 0.4073,
1272
+ "step": 153
1273
+ },
1274
+ {
1275
+ "epoch": 17.0,
1276
+ "eval_accuracy": 0.8146531924666973,
1277
+ "eval_f1": 0.802544653780279,
1278
+ "eval_loss": 0.40483081340789795,
1279
+ "eval_precision": 0.8167330677290837,
1280
+ "eval_recall": 0.7888407888407888,
1281
+ "eval_runtime": 3.7671,
1282
+ "eval_samples_per_second": 1155.81,
1283
+ "eval_steps_per_second": 0.796,
1284
+ "step": 153
1285
+ },
1286
+ {
1287
+ "epoch": 17.11111111111111,
1288
+ "grad_norm": 58819.46484375,
1289
+ "learning_rate": 4.421221864951769e-08,
1290
+ "loss": 0.3848,
1291
+ "step": 154
1292
+ },
1293
+ {
1294
+ "epoch": 17.22222222222222,
1295
+ "grad_norm": 139786.34375,
1296
+ "learning_rate": 4.449931097841066e-08,
1297
+ "loss": 0.403,
1298
+ "step": 155
1299
+ },
1300
+ {
1301
+ "epoch": 17.333333333333332,
1302
+ "grad_norm": 247516.828125,
1303
+ "learning_rate": 4.4786403307303634e-08,
1304
+ "loss": 0.4033,
1305
+ "step": 156
1306
+ },
1307
+ {
1308
+ "epoch": 17.444444444444443,
1309
+ "grad_norm": 97322.15625,
1310
+ "learning_rate": 4.5073495636196605e-08,
1311
+ "loss": 0.3917,
1312
+ "step": 157
1313
+ },
1314
+ {
1315
+ "epoch": 17.555555555555557,
1316
+ "grad_norm": 123633.953125,
1317
+ "learning_rate": 4.536058796508958e-08,
1318
+ "loss": 0.4085,
1319
+ "step": 158
1320
+ },
1321
+ {
1322
+ "epoch": 17.666666666666668,
1323
+ "grad_norm": 226355.78125,
1324
+ "learning_rate": 4.564768029398255e-08,
1325
+ "loss": 0.4028,
1326
+ "step": 159
1327
+ },
1328
+ {
1329
+ "epoch": 17.77777777777778,
1330
+ "grad_norm": 55226.03515625,
1331
+ "learning_rate": 4.593477262287552e-08,
1332
+ "loss": 0.408,
1333
+ "step": 160
1334
+ },
1335
+ {
1336
+ "epoch": 17.88888888888889,
1337
+ "grad_norm": 114759.9296875,
1338
+ "learning_rate": 4.622186495176849e-08,
1339
+ "loss": 0.3813,
1340
+ "step": 161
1341
+ },
1342
+ {
1343
+ "epoch": 18.0,
1344
+ "grad_norm": 157256.296875,
1345
+ "learning_rate": 4.650895728066146e-08,
1346
+ "loss": 0.3884,
1347
+ "step": 162
1348
+ },
1349
+ {
1350
+ "epoch": 18.0,
1351
+ "eval_accuracy": 0.8146531924666973,
1352
+ "eval_f1": 0.8027377169396236,
1353
+ "eval_loss": 0.40473371744155884,
1354
+ "eval_precision": 0.81610337972167,
1355
+ "eval_recall": 0.7898027898027898,
1356
+ "eval_runtime": 3.8201,
1357
+ "eval_samples_per_second": 1139.77,
1358
+ "eval_steps_per_second": 0.785,
1359
+ "step": 162
1360
+ },
1361
+ {
1362
+ "epoch": 18.11111111111111,
1363
+ "grad_norm": 103914.2421875,
1364
+ "learning_rate": 4.679604960955444e-08,
1365
+ "loss": 0.3989,
1366
+ "step": 163
1367
+ },
1368
+ {
1369
+ "epoch": 18.22222222222222,
1370
+ "grad_norm": 101009.2421875,
1371
+ "learning_rate": 4.708314193844741e-08,
1372
+ "loss": 0.3977,
1373
+ "step": 164
1374
+ },
1375
+ {
1376
+ "epoch": 18.333333333333332,
1377
+ "grad_norm": 161130.59375,
1378
+ "learning_rate": 4.737023426734038e-08,
1379
+ "loss": 0.4009,
1380
+ "step": 165
1381
+ },
1382
+ {
1383
+ "epoch": 18.444444444444443,
1384
+ "grad_norm": 137444.46875,
1385
+ "learning_rate": 4.765732659623335e-08,
1386
+ "loss": 0.3911,
1387
+ "step": 166
1388
+ },
1389
+ {
1390
+ "epoch": 18.555555555555557,
1391
+ "grad_norm": 178129.796875,
1392
+ "learning_rate": 4.7944418925126324e-08,
1393
+ "loss": 0.4103,
1394
+ "step": 167
1395
+ },
1396
+ {
1397
+ "epoch": 18.666666666666668,
1398
+ "grad_norm": 150367.984375,
1399
+ "learning_rate": 4.82315112540193e-08,
1400
+ "loss": 0.428,
1401
+ "step": 168
1402
+ },
1403
+ {
1404
+ "epoch": 18.77777777777778,
1405
+ "grad_norm": 108212.7265625,
1406
+ "learning_rate": 4.851860358291227e-08,
1407
+ "loss": 0.4053,
1408
+ "step": 169
1409
+ },
1410
+ {
1411
+ "epoch": 18.88888888888889,
1412
+ "grad_norm": 92431.296875,
1413
+ "learning_rate": 4.880569591180524e-08,
1414
+ "loss": 0.3938,
1415
+ "step": 170
1416
+ },
1417
+ {
1418
+ "epoch": 19.0,
1419
+ "grad_norm": 98930.1328125,
1420
+ "learning_rate": 4.909278824069821e-08,
1421
+ "loss": 0.3901,
1422
+ "step": 171
1423
+ },
1424
+ {
1425
+ "epoch": 19.0,
1426
+ "eval_accuracy": 0.8148828663298117,
1427
+ "eval_f1": 0.80322265625,
1428
+ "eval_loss": 0.404643177986145,
1429
+ "eval_precision": 0.8155676747645018,
1430
+ "eval_recall": 0.7912457912457912,
1431
+ "eval_runtime": 3.8213,
1432
+ "eval_samples_per_second": 1139.417,
1433
+ "eval_steps_per_second": 0.785,
1434
+ "step": 171
1435
+ }
1436
+ ],
1437
+ "logging_steps": 1,
1438
+ "max_steps": 180,
1439
+ "num_input_tokens_seen": 0,
1440
+ "num_train_epochs": 20,
1441
+ "save_steps": 500,
1442
+ "stateful_callbacks": {
1443
+ "TrainerControl": {
1444
+ "args": {
1445
+ "should_epoch_stop": false,
1446
+ "should_evaluate": false,
1447
+ "should_log": false,
1448
+ "should_save": true,
1449
+ "should_training_stop": false
1450
+ },
1451
+ "attributes": {}
1452
+ }
1453
+ },
1454
+ "total_flos": 1.3045517279133696e+16,
1455
+ "train_batch_size": 2048,
1456
+ "trial_name": null,
1457
+ "trial_params": null
1458
+ }
checkpoint-171/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db7a6e522bc59163d2b15697b64887320385f6e05bcc931836ff29f5693f7d4f
3
+ size 5368
config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "GenetikaPlus/binary_classification_model_v3.1.3_spines",
3
+ "architectures": [
4
+ "ViTForImageClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
+ "hidden_size": 256,
11
+ "image_size": 32,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 512,
14
+ "layer_norm_eps": 1e-12,
15
+ "model_type": "vit",
16
+ "num_attention_heads": 8,
17
+ "num_channels": 3,
18
+ "num_hidden_layers": 4,
19
+ "patch_size": 4,
20
+ "qkv_bias": true,
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.46.2"
23
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aebf6830fe10538fdd5841cfa8bc3a7de059c6edd64c4cbbbbe6f2bd94c9fc23
3
+ size 8563512
preprocessor_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_rescale": true,
4
+ "do_resize": false,
5
+ "image_mean": [
6
+ 0.5,
7
+ 0.5,
8
+ 0.5
9
+ ],
10
+ "image_processor_type": "ViTFeatureExtractor",
11
+ "image_std": [
12
+ 0.5,
13
+ 0.5,
14
+ 0.5
15
+ ],
16
+ "resample": 2,
17
+ "rescale_factor": 0.00392156862745098,
18
+ "size": {
19
+ "height": 32,
20
+ "width": 32
21
+ }
22
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db7a6e522bc59163d2b15697b64887320385f6e05bcc931836ff29f5693f7d4f
3
+ size 5368