ardaspear commited on
Commit
f103d03
·
verified ·
1 Parent(s): 0cc996d

Training in progress, step 300, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2b85338669f928da5b776e614d0f413046ac68f44c749ec58ed1aa0e9397c0b
3
  size 1216072
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:280d1d25033f925638c3f28d6b9812f50a55cb69335b304f346da90f600194a9
3
  size 1216072
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3ab381f67f145232e683d8b7a8ece596713bc8223ed1feea18d321775905b72
3
  size 1294548
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad47fe346fbda1cf3c7be4acba0af85deb9cedfdc43c59bbf73dd168568ec9b4
3
  size 1294548
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:459c3f406a0936fc386ad18c7987b6ec35fd89400ca940c25931be4409d216e4
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b26889fac2d40fecb38ddb511932a340b9eca68dacd2959e6fb794722ce2bdbc
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e994acda9463ad5e79f11759cee0746e6d525c82215e6ea2f53a57491ac0869b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b536b0fa0a634e1c6dfafee7987ec2b47c88eb25b052693577be75945f4ed90
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 9.663827896118164,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
- "epoch": 0.176522506619594,
5
  "eval_steps": 50,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -144,6 +144,135 @@
144
  "eval_samples_per_second": 460.483,
145
  "eval_steps_per_second": 115.121,
146
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  }
148
  ],
149
  "logging_steps": 10,
@@ -172,7 +301,7 @@
172
  "attributes": {}
173
  }
174
  },
175
- "total_flos": 32150414426112.0,
176
  "train_batch_size": 8,
177
  "trial_name": null,
178
  "trial_params": null
 
1
  {
2
+ "best_metric": 9.273290634155273,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-300",
4
+ "epoch": 0.353045013239188,
5
  "eval_steps": 50,
6
+ "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
144
  "eval_samples_per_second": 460.483,
145
  "eval_steps_per_second": 115.121,
146
  "step": 150
147
+ },
148
+ {
149
+ "epoch": 0.18829067372756694,
150
+ "grad_norm": 0.5674024224281311,
151
+ "learning_rate": 0.0001697631521134985,
152
+ "loss": 9.6526,
153
+ "step": 160
154
+ },
155
+ {
156
+ "epoch": 0.20005884083553988,
157
+ "grad_norm": 0.5667808651924133,
158
+ "learning_rate": 0.00016585113790650388,
159
+ "loss": 9.613,
160
+ "step": 170
161
+ },
162
+ {
163
+ "epoch": 0.2118270079435128,
164
+ "grad_norm": 0.5394534468650818,
165
+ "learning_rate": 0.0001617524614946192,
166
+ "loss": 9.5741,
167
+ "step": 180
168
+ },
169
+ {
170
+ "epoch": 0.22359517505148574,
171
+ "grad_norm": 0.5556952357292175,
172
+ "learning_rate": 0.0001574787410214407,
173
+ "loss": 9.5348,
174
+ "step": 190
175
+ },
176
+ {
177
+ "epoch": 0.23536334215945867,
178
+ "grad_norm": 0.5671817064285278,
179
+ "learning_rate": 0.00015304209081197425,
180
+ "loss": 9.4987,
181
+ "step": 200
182
+ },
183
+ {
184
+ "epoch": 0.23536334215945867,
185
+ "eval_loss": 9.482871055603027,
186
+ "eval_runtime": 3.1933,
187
+ "eval_samples_per_second": 448.444,
188
+ "eval_steps_per_second": 112.111,
189
+ "step": 200
190
+ },
191
+ {
192
+ "epoch": 0.2471315092674316,
193
+ "grad_norm": 0.7693024277687073,
194
+ "learning_rate": 0.00014845508703326504,
195
+ "loss": 9.4781,
196
+ "step": 210
197
+ },
198
+ {
199
+ "epoch": 0.2588996763754045,
200
+ "grad_norm": 3.6308693885803223,
201
+ "learning_rate": 0.00014373073204588556,
202
+ "loss": 9.4655,
203
+ "step": 220
204
+ },
205
+ {
206
+ "epoch": 0.27066784348337747,
207
+ "grad_norm": 0.5690402388572693,
208
+ "learning_rate": 0.00013888241754733208,
209
+ "loss": 9.4397,
210
+ "step": 230
211
+ },
212
+ {
213
+ "epoch": 0.2824360105913504,
214
+ "grad_norm": 0.5628238916397095,
215
+ "learning_rate": 0.00013392388661180303,
216
+ "loss": 9.4196,
217
+ "step": 240
218
+ },
219
+ {
220
+ "epoch": 0.29420417769932333,
221
+ "grad_norm": 0.5627617835998535,
222
+ "learning_rate": 0.0001288691947339621,
223
+ "loss": 9.3916,
224
+ "step": 250
225
+ },
226
+ {
227
+ "epoch": 0.29420417769932333,
228
+ "eval_loss": 9.378963470458984,
229
+ "eval_runtime": 3.1537,
230
+ "eval_samples_per_second": 454.067,
231
+ "eval_steps_per_second": 113.517,
232
+ "step": 250
233
+ },
234
+ {
235
+ "epoch": 0.30597234480729624,
236
+ "grad_norm": 0.5669119954109192,
237
+ "learning_rate": 0.0001237326699871115,
238
+ "loss": 9.3743,
239
+ "step": 260
240
+ },
241
+ {
242
+ "epoch": 0.3177405119152692,
243
+ "grad_norm": 0.5632140040397644,
244
+ "learning_rate": 0.00011852887240871145,
245
+ "loss": 9.3444,
246
+ "step": 270
247
+ },
248
+ {
249
+ "epoch": 0.32950867902324216,
250
+ "grad_norm": 0.5589627623558044,
251
+ "learning_rate": 0.00011327255272837221,
252
+ "loss": 9.3176,
253
+ "step": 280
254
+ },
255
+ {
256
+ "epoch": 0.34127684613121506,
257
+ "grad_norm": 0.5277190208435059,
258
+ "learning_rate": 0.00010797861055530831,
259
+ "loss": 9.2899,
260
+ "step": 290
261
+ },
262
+ {
263
+ "epoch": 0.353045013239188,
264
+ "grad_norm": 0.5792800784111023,
265
+ "learning_rate": 0.00010266205214377748,
266
+ "loss": 9.2735,
267
+ "step": 300
268
+ },
269
+ {
270
+ "epoch": 0.353045013239188,
271
+ "eval_loss": 9.273290634155273,
272
+ "eval_runtime": 3.1419,
273
+ "eval_samples_per_second": 455.778,
274
+ "eval_steps_per_second": 113.945,
275
+ "step": 300
276
  }
277
  ],
278
  "logging_steps": 10,
 
301
  "attributes": {}
302
  }
303
  },
304
+ "total_flos": 64247775363072.0,
305
  "train_batch_size": 8,
306
  "trial_name": null,
307
  "trial_params": null