ngwgsang commited on
Commit
3d8ef10
·
verified ·
1 Parent(s): e257a65

Training in progress, epoch 3, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75562cd31a0f9b43a875d53eafeae39ff9f4884568c984afec548fb0b3a18bea
3
  size 442668636
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44e654e4d50bd4c08e40f1c40359055b24af92e519f539420a2ae3729b5bff38
3
  size 442668636
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4737340e44de8f447a156c00d13c226c8d13eaaa3479d55229dea45aefbcbd95
3
  size 885457146
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a152b235056b4fcbaeb415b6d13c581c062ea5ef61192c30dcf14433ed941558
3
  size 885457146
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc930492c5e0375b00eb1faa8503ca1a4cd6495e47aeaa009df65f9bce5b16e3
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76b3b8471ca0351c811d90c5b574a45dac1c24b25ffcf13ec6b85586685c4c47
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9707f7f99f72a12c2631595b1bcc8638efdeda09ae580feffc3a464b56550f1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8143cf4c2b0cbae224cb6ee44d414097e32f9582a6067851f3fe7a3ab225aca6
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 6.929315567016602,
3
- "best_model_checkpoint": "./results/checkpoint-1832",
4
- "epoch": 2.0,
5
  "eval_steps": 500,
6
- "global_step": 1832,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -157,6 +157,81 @@
157
  "eval_samples_per_second": 272.065,
158
  "eval_steps_per_second": 8.503,
159
  "step": 1832
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  }
161
  ],
162
  "logging_steps": 100,
@@ -176,7 +251,7 @@
176
  "attributes": {}
177
  }
178
  },
179
- "total_flos": 3855532466677248.0,
180
  "train_batch_size": 32,
181
  "trial_name": null,
182
  "trial_params": null
 
1
  {
2
+ "best_metric": 6.088820139567058,
3
+ "best_model_checkpoint": "./results/checkpoint-2748",
4
+ "epoch": 3.0,
5
  "eval_steps": 500,
6
+ "global_step": 2748,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
157
  "eval_samples_per_second": 272.065,
158
  "eval_steps_per_second": 8.503,
159
  "step": 1832
160
+ },
161
+ {
162
+ "epoch": 2.074235807860262,
163
+ "grad_norm": 40.95843505859375,
164
+ "learning_rate": 2.2221615720524018e-05,
165
+ "loss": 6.1465,
166
+ "step": 1900
167
+ },
168
+ {
169
+ "epoch": 2.183406113537118,
170
+ "grad_norm": 26.046171188354492,
171
+ "learning_rate": 2.181222707423581e-05,
172
+ "loss": 5.9925,
173
+ "step": 2000
174
+ },
175
+ {
176
+ "epoch": 2.2925764192139737,
177
+ "grad_norm": 36.05866622924805,
178
+ "learning_rate": 2.1402838427947596e-05,
179
+ "loss": 5.8884,
180
+ "step": 2100
181
+ },
182
+ {
183
+ "epoch": 2.4017467248908297,
184
+ "grad_norm": 23.126216888427734,
185
+ "learning_rate": 2.099344978165939e-05,
186
+ "loss": 5.9357,
187
+ "step": 2200
188
+ },
189
+ {
190
+ "epoch": 2.5109170305676853,
191
+ "grad_norm": 29.862232208251953,
192
+ "learning_rate": 2.058406113537118e-05,
193
+ "loss": 5.8846,
194
+ "step": 2300
195
+ },
196
+ {
197
+ "epoch": 2.6200873362445414,
198
+ "grad_norm": 30.4029541015625,
199
+ "learning_rate": 2.0174672489082972e-05,
200
+ "loss": 5.8334,
201
+ "step": 2400
202
+ },
203
+ {
204
+ "epoch": 2.7292576419213974,
205
+ "grad_norm": 30.72637367248535,
206
+ "learning_rate": 1.976528384279476e-05,
207
+ "loss": 5.8922,
208
+ "step": 2500
209
+ },
210
+ {
211
+ "epoch": 2.8384279475982535,
212
+ "grad_norm": 24.41779136657715,
213
+ "learning_rate": 1.935589519650655e-05,
214
+ "loss": 5.912,
215
+ "step": 2600
216
+ },
217
+ {
218
+ "epoch": 2.947598253275109,
219
+ "grad_norm": 27.00792121887207,
220
+ "learning_rate": 1.894650655021834e-05,
221
+ "loss": 5.655,
222
+ "step": 2700
223
+ },
224
+ {
225
+ "epoch": 3.0,
226
+ "eval_avg_mae": 6.088820139567058,
227
+ "eval_loss": 6.088819980621338,
228
+ "eval_mae_lex": 5.295498847961426,
229
+ "eval_mae_sem": 4.145097255706787,
230
+ "eval_mae_syn": 8.8258638381958,
231
+ "eval_runtime": 26.9401,
232
+ "eval_samples_per_second": 271.974,
233
+ "eval_steps_per_second": 8.5,
234
+ "step": 2748
235
  }
236
  ],
237
  "logging_steps": 100,
 
251
  "attributes": {}
252
  }
253
  },
254
+ "total_flos": 5783298700015872.0,
255
  "train_batch_size": 32,
256
  "trial_name": null,
257
  "trial_params": null