brixeus commited on
Commit
030d8e0
·
verified ·
1 Parent(s): 6deb5a7

Training in progress, step 300, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:82b5aa30e1a5893f0eb056684ec50b6dee95e4817afcc22b677c62cc1b9818e1
3
  size 100966336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd40c75455960cc69b93c8a0949e9f0a87de121a1ac9c5bf36a70f9902fe6aaa
3
  size 100966336
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25ee3aca9dc1b20671dc619e34af7758679b970840b0f57dd86e89b672942e5f
3
- size 51613348
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06e76ac9e7b5a85a54d496a38c698975c341f44e899145d37f311bcd9912144f
3
+ size 51613668
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:deb36ff945ed06d7cf988ad14ccb571fbaad9c693f2bb9d17cde1e0ec3bb3f5f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:019cea10a2ea963ce36b980aa95f96cb1364758a9b5a5a0a5acdc9b99d5f64ec
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:596785cc644037bdf9b1374ba5340995054de5f4bde563878d8bc4f03a7aa10e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8d9346c4fcc90fb1ec8546736583b76a4fae6bc25cb93181337c187d15da94a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 2.3954179286956787,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
- "epoch": 0.015467505349178933,
5
  "eval_steps": 50,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -144,6 +144,135 @@
144
  "eval_samples_per_second": 48.856,
145
  "eval_steps_per_second": 12.215,
146
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  }
148
  ],
149
  "logging_steps": 10,
@@ -172,7 +301,7 @@
172
  "attributes": {}
173
  }
174
  },
175
- "total_flos": 3.1409380276568064e+16,
176
  "train_batch_size": 8,
177
  "trial_name": null,
178
  "trial_params": null
 
1
  {
2
+ "best_metric": 2.2416651248931885,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-300",
4
+ "epoch": 0.030935010698357867,
5
  "eval_steps": 50,
6
+ "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
144
  "eval_samples_per_second": 48.856,
145
  "eval_steps_per_second": 12.215,
146
  "step": 150
147
+ },
148
+ {
149
+ "epoch": 0.01649867237245753,
150
+ "grad_norm": 0.597623348236084,
151
+ "learning_rate": 0.0001697631521134985,
152
+ "loss": 1.8683,
153
+ "step": 160
154
+ },
155
+ {
156
+ "epoch": 0.017529839395736123,
157
+ "grad_norm": 1.2859327793121338,
158
+ "learning_rate": 0.00016585113790650388,
159
+ "loss": 2.1469,
160
+ "step": 170
161
+ },
162
+ {
163
+ "epoch": 0.01856100641901472,
164
+ "grad_norm": 1.5065349340438843,
165
+ "learning_rate": 0.0001617524614946192,
166
+ "loss": 2.5904,
167
+ "step": 180
168
+ },
169
+ {
170
+ "epoch": 0.019592173442293314,
171
+ "grad_norm": 2.6285014152526855,
172
+ "learning_rate": 0.0001574787410214407,
173
+ "loss": 2.2524,
174
+ "step": 190
175
+ },
176
+ {
177
+ "epoch": 0.02062334046557191,
178
+ "grad_norm": 4.162689685821533,
179
+ "learning_rate": 0.00015304209081197425,
180
+ "loss": 2.2073,
181
+ "step": 200
182
+ },
183
+ {
184
+ "epoch": 0.02062334046557191,
185
+ "eval_loss": 2.3460395336151123,
186
+ "eval_runtime": 334.257,
187
+ "eval_samples_per_second": 48.867,
188
+ "eval_steps_per_second": 12.218,
189
+ "step": 200
190
+ },
191
+ {
192
+ "epoch": 0.021654507488850505,
193
+ "grad_norm": 0.6041058301925659,
194
+ "learning_rate": 0.00014845508703326504,
195
+ "loss": 1.8224,
196
+ "step": 210
197
+ },
198
+ {
199
+ "epoch": 0.022685674512129102,
200
+ "grad_norm": 1.3182774782180786,
201
+ "learning_rate": 0.00014373073204588556,
202
+ "loss": 2.2141,
203
+ "step": 220
204
+ },
205
+ {
206
+ "epoch": 0.023716841535407696,
207
+ "grad_norm": 1.494461178779602,
208
+ "learning_rate": 0.00013888241754733208,
209
+ "loss": 2.7371,
210
+ "step": 230
211
+ },
212
+ {
213
+ "epoch": 0.024748008558686294,
214
+ "grad_norm": 2.414562225341797,
215
+ "learning_rate": 0.00013392388661180303,
216
+ "loss": 2.2599,
217
+ "step": 240
218
+ },
219
+ {
220
+ "epoch": 0.025779175581964887,
221
+ "grad_norm": 3.806607484817505,
222
+ "learning_rate": 0.0001288691947339621,
223
+ "loss": 2.1804,
224
+ "step": 250
225
+ },
226
+ {
227
+ "epoch": 0.025779175581964887,
228
+ "eval_loss": 2.274019956588745,
229
+ "eval_runtime": 333.7217,
230
+ "eval_samples_per_second": 48.945,
231
+ "eval_steps_per_second": 12.238,
232
+ "step": 250
233
+ },
234
+ {
235
+ "epoch": 0.026810342605243485,
236
+ "grad_norm": 0.5745736360549927,
237
+ "learning_rate": 0.0001237326699871115,
238
+ "loss": 1.7065,
239
+ "step": 260
240
+ },
241
+ {
242
+ "epoch": 0.02784150962852208,
243
+ "grad_norm": 1.216685175895691,
244
+ "learning_rate": 0.00011852887240871145,
245
+ "loss": 2.2073,
246
+ "step": 270
247
+ },
248
+ {
249
+ "epoch": 0.028872676651800676,
250
+ "grad_norm": 1.3834314346313477,
251
+ "learning_rate": 0.00011327255272837221,
252
+ "loss": 2.6267,
253
+ "step": 280
254
+ },
255
+ {
256
+ "epoch": 0.02990384367507927,
257
+ "grad_norm": 2.0821757316589355,
258
+ "learning_rate": 0.00010797861055530831,
259
+ "loss": 2.2572,
260
+ "step": 290
261
+ },
262
+ {
263
+ "epoch": 0.030935010698357867,
264
+ "grad_norm": 4.278872489929199,
265
+ "learning_rate": 0.00010266205214377748,
266
+ "loss": 2.1662,
267
+ "step": 300
268
+ },
269
+ {
270
+ "epoch": 0.030935010698357867,
271
+ "eval_loss": 2.2416651248931885,
272
+ "eval_runtime": 333.1876,
273
+ "eval_samples_per_second": 49.023,
274
+ "eval_steps_per_second": 12.257,
275
+ "step": 300
276
  }
277
  ],
278
  "logging_steps": 10,
 
301
  "attributes": {}
302
  }
303
  },
304
+ "total_flos": 6.281876055313613e+16,
305
  "train_batch_size": 8,
306
  "trial_name": null,
307
  "trial_params": null