wilsondt commited on
Commit
fb965d3
·
verified ·
1 Parent(s): 0ed623d

Training in progress, epoch 1

Browse files
config.json CHANGED
@@ -55,6 +55,7 @@
55
  3,
56
  6
57
  ],
 
58
  "reshape_hidden_states": true,
59
  "semantic_loss_ignore_index": 255,
60
  "stage_names": [
 
55
  3,
56
  6
57
  ],
58
+ "problem_type": "single_label_classification",
59
  "reshape_hidden_states": true,
60
  "semantic_loss_ignore_index": 255,
61
  "stage_names": [
logs/events.out.tfevents.1740283925.ca6c489f7bbc.211.3 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a3552fb4cd66c14f5fae71427cd63fa0add1088e1bd4f48172fa49d3b80301d
3
- size 88
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32954df55bade1fee3deeff8b772e548a50886daab72f7b113ff5604bb3de16b
3
+ size 7019
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:efa9afb7089f2aa3b84b697dbe29fea573bfcbadb182863f9bc6cda186ef29ff
3
  size 343098784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fec57a3f7dabbd92bcab71d9565d59359e5ab6a20ad1d28d7d1b25278093c81b
3
  size 343098784
trainer_state.json CHANGED
@@ -1,263 +1,13 @@
1
  {
2
- "best_metric": 0.3260420858860016,
3
- "best_model_checkpoint": "./drive/Shareddrives/CS198-Drones/training_output/beit-base-patch16-224_rice-leaf-disease-augmented-v2_fft/checkpoint-375",
4
- "epoch": 15.0,
5
  "eval_steps": 500,
6
- "global_step": 1875,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 1.0,
13
- "grad_norm": 9.300188064575195,
14
- "learning_rate": 6.524064171122996e-06,
15
- "loss": 1.5633,
16
- "step": 125
17
- },
18
- {
19
- "epoch": 1.0,
20
- "eval_accuracy": 0.7797619047619048,
21
- "eval_loss": 0.6928136944770813,
22
- "eval_runtime": 12.0343,
23
- "eval_samples_per_second": 27.92,
24
- "eval_steps_per_second": 0.499,
25
- "step": 125
26
- },
27
- {
28
- "epoch": 2.0,
29
- "grad_norm": 10.518802642822266,
30
- "learning_rate": 9.883929099922349e-06,
31
- "loss": 0.4092,
32
- "step": 250
33
- },
34
- {
35
- "epoch": 2.0,
36
- "eval_accuracy": 0.8690476190476191,
37
- "eval_loss": 0.3738097846508026,
38
- "eval_runtime": 12.1178,
39
- "eval_samples_per_second": 27.728,
40
- "eval_steps_per_second": 0.495,
41
- "step": 250
42
- },
43
- {
44
- "epoch": 3.0,
45
- "grad_norm": 5.8602752685546875,
46
- "learning_rate": 8.884167663453091e-06,
47
- "loss": 0.1327,
48
- "step": 375
49
- },
50
- {
51
- "epoch": 3.0,
52
- "eval_accuracy": 0.9047619047619048,
53
- "eval_loss": 0.3260420858860016,
54
- "eval_runtime": 12.0126,
55
- "eval_samples_per_second": 27.971,
56
- "eval_steps_per_second": 0.499,
57
- "step": 375
58
- },
59
- {
60
- "epoch": 4.0,
61
- "grad_norm": 4.471620082855225,
62
- "learning_rate": 7.0585894873794514e-06,
63
- "loss": 0.0433,
64
- "step": 500
65
- },
66
- {
67
- "epoch": 4.0,
68
- "eval_accuracy": 0.9166666666666666,
69
- "eval_loss": 0.34726911783218384,
70
- "eval_runtime": 11.5003,
71
- "eval_samples_per_second": 29.217,
72
- "eval_steps_per_second": 0.522,
73
- "step": 500
74
- },
75
- {
76
- "epoch": 5.0,
77
- "grad_norm": 0.4554196298122406,
78
- "learning_rate": 4.7953325584941465e-06,
79
- "loss": 0.019,
80
- "step": 625
81
- },
82
- {
83
- "epoch": 5.0,
84
- "eval_accuracy": 0.9196428571428571,
85
- "eval_loss": 0.3815436065196991,
86
- "eval_runtime": 12.0802,
87
- "eval_samples_per_second": 27.814,
88
- "eval_steps_per_second": 0.497,
89
- "step": 625
90
- },
91
- {
92
- "epoch": 6.0,
93
- "grad_norm": 0.5961848497390747,
94
- "learning_rate": 2.7317059402422887e-06,
95
- "loss": 0.0116,
96
- "step": 750
97
- },
98
- {
99
- "epoch": 6.0,
100
- "eval_accuracy": 0.9166666666666666,
101
- "eval_loss": 0.4082700312137604,
102
- "eval_runtime": 10.577,
103
- "eval_samples_per_second": 31.767,
104
- "eval_steps_per_second": 0.567,
105
- "step": 750
106
- },
107
- {
108
- "epoch": 7.0,
109
- "grad_norm": 0.2265198975801468,
110
- "learning_rate": 9.989992910524462e-06,
111
- "loss": 0.0072,
112
- "step": 875
113
- },
114
- {
115
- "epoch": 7.0,
116
- "eval_accuracy": 0.9166666666666666,
117
- "eval_loss": 0.44227680563926697,
118
- "eval_runtime": 10.7145,
119
- "eval_samples_per_second": 31.359,
120
- "eval_steps_per_second": 0.56,
121
- "step": 875
122
- },
123
- {
124
- "epoch": 8.0,
125
- "grad_norm": 0.09561679512262344,
126
- "learning_rate": 6.687734491723899e-06,
127
- "loss": 0.0078,
128
- "step": 1000
129
- },
130
- {
131
- "epoch": 8.0,
132
- "eval_accuracy": 0.9226190476190477,
133
- "eval_loss": 0.48087066411972046,
134
- "eval_runtime": 10.6527,
135
- "eval_samples_per_second": 31.541,
136
- "eval_steps_per_second": 0.563,
137
- "step": 1000
138
- },
139
- {
140
- "epoch": 9.0,
141
- "grad_norm": 0.06357914209365845,
142
- "learning_rate": 1.3799679245093633e-06,
143
- "loss": 0.007,
144
- "step": 1125
145
- },
146
- {
147
- "epoch": 9.0,
148
- "eval_accuracy": 0.9196428571428571,
149
- "eval_loss": 0.4225828945636749,
150
- "eval_runtime": 9.622,
151
- "eval_samples_per_second": 34.92,
152
- "eval_steps_per_second": 0.624,
153
- "step": 1125
154
- },
155
- {
156
- "epoch": 10.0,
157
- "grad_norm": 1.7639108896255493,
158
- "learning_rate": 9.601392291702693e-06,
159
- "loss": 0.0053,
160
- "step": 1250
161
- },
162
- {
163
- "epoch": 10.0,
164
- "eval_accuracy": 0.9047619047619048,
165
- "eval_loss": 0.5498608946800232,
166
- "eval_runtime": 11.0412,
167
- "eval_samples_per_second": 30.431,
168
- "eval_steps_per_second": 0.543,
169
- "step": 1250
170
- },
171
- {
172
- "epoch": 11.0,
173
- "grad_norm": 0.150970920920372,
174
- "learning_rate": 5.074442573016012e-06,
175
- "loss": 0.0064,
176
- "step": 1375
177
- },
178
- {
179
- "epoch": 11.0,
180
- "eval_accuracy": 0.9077380952380952,
181
- "eval_loss": 0.5284379720687866,
182
- "eval_runtime": 10.8395,
183
- "eval_samples_per_second": 30.998,
184
- "eval_steps_per_second": 0.554,
185
- "step": 1375
186
- },
187
- {
188
- "epoch": 12.0,
189
- "grad_norm": 0.03461284562945366,
190
- "learning_rate": 4.396198449987288e-07,
191
- "loss": 0.0037,
192
- "step": 1500
193
- },
194
- {
195
- "epoch": 12.0,
196
- "eval_accuracy": 0.9166666666666666,
197
- "eval_loss": 0.5137431621551514,
198
- "eval_runtime": 10.6292,
199
- "eval_samples_per_second": 31.611,
200
- "eval_steps_per_second": 0.564,
201
- "step": 1500
202
- },
203
- {
204
- "epoch": 13.0,
205
- "grad_norm": 1.6548749208450317,
206
- "learning_rate": 8.689877388713784e-06,
207
- "loss": 0.0056,
208
- "step": 1625
209
- },
210
- {
211
- "epoch": 13.0,
212
- "eval_accuracy": 0.9136904761904762,
213
- "eval_loss": 0.555907666683197,
214
- "eval_runtime": 10.5418,
215
- "eval_samples_per_second": 31.873,
216
- "eval_steps_per_second": 0.569,
217
- "step": 1625
218
- },
219
- {
220
- "epoch": 14.0,
221
- "grad_norm": 0.4239642322063446,
222
- "learning_rate": 3.453145092452331e-06,
223
- "loss": 0.0077,
224
- "step": 1750
225
- },
226
- {
227
- "epoch": 14.0,
228
- "eval_accuracy": 0.9166666666666666,
229
- "eval_loss": 0.5193145275115967,
230
- "eval_runtime": 10.5774,
231
- "eval_samples_per_second": 31.766,
232
- "eval_steps_per_second": 0.567,
233
- "step": 1750
234
- },
235
- {
236
- "epoch": 15.0,
237
- "grad_norm": 0.052128296345472336,
238
- "learning_rate": 2.1633234416539305e-08,
239
- "loss": 0.0037,
240
- "step": 1875
241
- },
242
- {
243
- "epoch": 15.0,
244
- "eval_accuracy": 0.9136904761904762,
245
- "eval_loss": 0.5215830206871033,
246
- "eval_runtime": 21.3118,
247
- "eval_samples_per_second": 15.766,
248
- "eval_steps_per_second": 0.282,
249
- "step": 1875
250
- },
251
- {
252
- "epoch": 15.0,
253
- "step": 1875,
254
- "total_flos": 9.29556430258176e+18,
255
- "train_loss": 0.0018048280239105225,
256
- "train_runtime": 1746.5831,
257
- "train_samples_per_second": 68.706,
258
- "train_steps_per_second": 1.074
259
- }
260
- ],
261
  "logging_steps": 500,
262
  "max_steps": 1875,
263
  "num_input_tokens_seen": 0,
@@ -269,13 +19,13 @@
269
  "should_epoch_stop": false,
270
  "should_evaluate": false,
271
  "should_log": false,
272
- "should_save": true,
273
- "should_training_stop": true
274
  },
275
  "attributes": {}
276
  }
277
  },
278
- "total_flos": 9.29556430258176e+18,
279
  "train_batch_size": 64,
280
  "trial_name": null,
281
  "trial_params": null
 
1
  {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0,
5
  "eval_steps": 500,
6
+ "global_step": 0,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
+ "log_history": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  "logging_steps": 500,
12
  "max_steps": 1875,
13
  "num_input_tokens_seen": 0,
 
19
  "should_epoch_stop": false,
20
  "should_evaluate": false,
21
  "should_log": false,
22
+ "should_save": false,
23
+ "should_training_stop": false
24
  },
25
  "attributes": {}
26
  }
27
  },
28
+ "total_flos": 0,
29
  "train_batch_size": 64,
30
  "trial_name": null,
31
  "trial_params": null