lesso13 commited on
Commit
e05c535
·
verified ·
1 Parent(s): 33f593e

Training in progress, step 200, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07e8b59a814c9a088f6b9062440659bccc8fc289625fd1ff2e5b3c52147998cd
3
  size 671149168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60acf8dcdcddeb62b78640b8365bd45d762ea5c3866998aa0be50add19d2809
3
  size 671149168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8889c11567294a3dfe42417a85989fcd84a90c7cb2632f1841d04ba92d392ee0
3
  size 341314196
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec65fbdf54f303f0bbfc7f6e2f24f4ad7574352f8d75e7c896ac4762adb45837
3
  size 341314196
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b18fd526770faca31d6b68eaac69de0bf5165319660d891f391c0303ed73e314
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f704cbed04a35a8d02bf4d57aa505157b25d4a6cec9754224cd1a7fed92ae36
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0585a94ca770852d904d0a057ee7d0f13731dc026d439d9add35f155aff77fb2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a29a98d7724cf179d3beb2d49a34c568e23cf47a86cb77f3cd39efdcdbcc2de
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.639454960823059,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
- "epoch": 0.07286859363614283,
5
  "eval_steps": 50,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -144,6 +144,49 @@
144
  "eval_samples_per_second": 16.753,
145
  "eval_steps_per_second": 4.193,
146
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  }
148
  ],
149
  "logging_steps": 10,
@@ -158,7 +201,7 @@
158
  "early_stopping_threshold": 0.0
159
  },
160
  "attributes": {
161
- "early_stopping_patience_counter": 2
162
  }
163
  },
164
  "TrainerControl": {
@@ -167,12 +210,12 @@
167
  "should_evaluate": false,
168
  "should_log": false,
169
  "should_save": true,
170
- "should_training_stop": false
171
  },
172
  "attributes": {}
173
  }
174
  },
175
- "total_flos": 2.68312126685184e+16,
176
  "train_batch_size": 4,
177
  "trial_name": null,
178
  "trial_params": null
 
1
  {
2
  "best_metric": 1.639454960823059,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
+ "epoch": 0.09715812484819043,
5
  "eval_steps": 50,
6
+ "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
144
  "eval_samples_per_second": 16.753,
145
  "eval_steps_per_second": 4.193,
146
  "step": 150
147
+ },
148
+ {
149
+ "epoch": 0.07772649987855235,
150
+ "grad_norm": 15.678354263305664,
151
+ "learning_rate": 0.00018310968873606635,
152
+ "loss": 3.1187,
153
+ "step": 160
154
+ },
155
+ {
156
+ "epoch": 0.08258440612096186,
157
+ "grad_norm": 27.940187454223633,
158
+ "learning_rate": 0.0001777624095772184,
159
+ "loss": 2.4375,
160
+ "step": 170
161
+ },
162
+ {
163
+ "epoch": 0.08744231236337138,
164
+ "grad_norm": 22.329130172729492,
165
+ "learning_rate": 0.0001720679471221826,
166
+ "loss": 3.5057,
167
+ "step": 180
168
+ },
169
+ {
170
+ "epoch": 0.09230021860578091,
171
+ "grad_norm": 21.67806053161621,
172
+ "learning_rate": 0.00016605404421963453,
173
+ "loss": 3.6538,
174
+ "step": 190
175
+ },
176
+ {
177
+ "epoch": 0.09715812484819043,
178
+ "grad_norm": 19.599658966064453,
179
+ "learning_rate": 0.00015975,
180
+ "loss": 3.1059,
181
+ "step": 200
182
+ },
183
+ {
184
+ "epoch": 0.09715812484819043,
185
+ "eval_loss": 1.7558817863464355,
186
+ "eval_runtime": 51.7429,
187
+ "eval_samples_per_second": 16.756,
188
+ "eval_steps_per_second": 4.194,
189
+ "step": 200
190
  }
191
  ],
192
  "logging_steps": 10,
 
201
  "early_stopping_threshold": 0.0
202
  },
203
  "attributes": {
204
+ "early_stopping_patience_counter": 3
205
  }
206
  },
207
  "TrainerControl": {
 
210
  "should_evaluate": false,
211
  "should_log": false,
212
  "should_save": true,
213
+ "should_training_stop": true
214
  },
215
  "attributes": {}
216
  }
217
  },
218
+ "total_flos": 3.57749502246912e+16,
219
  "train_batch_size": 4,
220
  "trial_name": null,
221
  "trial_params": null