ngwgsang commited on
Commit
69a32d5
·
verified ·
1 Parent(s): ca07bc2

Training in progress, epoch 3, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e2732a6d21b842687bf63e8ed4677683d4c127cb8c9417afe437147b86b7919
3
  size 442668636
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7381c11af32f450ef90a1f41be45370df688105c59000f73049a6f3d855bf5bf
3
  size 442668636
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cdce2c84944c39c9782d1ca61591a7c11485d3a4ca5f9456c59f7d8b41bf5d5f
3
  size 885457146
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fa5f76c2cf56d06474142e07bd2538df2e0f93a495907116066ebde69a2488c
3
  size 885457146
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc930492c5e0375b00eb1faa8503ca1a4cd6495e47aeaa009df65f9bce5b16e3
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76b3b8471ca0351c811d90c5b574a45dac1c24b25ffcf13ec6b85586685c4c47
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9707f7f99f72a12c2631595b1bcc8638efdeda09ae580feffc3a464b56550f1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8143cf4c2b0cbae224cb6ee44d414097e32f9582a6067851f3fe7a3ab225aca6
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 6.067600250244141,
3
- "best_model_checkpoint": "./results/checkpoint-1832",
4
- "epoch": 2.0,
5
  "eval_steps": 500,
6
- "global_step": 1832,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -157,6 +157,81 @@
157
  "eval_samples_per_second": 269.184,
158
  "eval_steps_per_second": 8.413,
159
  "step": 1832
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  }
161
  ],
162
  "logging_steps": 100,
@@ -176,7 +251,7 @@
176
  "attributes": {}
177
  }
178
  },
179
- "total_flos": 3855532466677248.0,
180
  "train_batch_size": 32,
181
  "trial_name": null,
182
  "trial_params": null
 
1
  {
2
+ "best_metric": 5.645811716715495,
3
+ "best_model_checkpoint": "./results/checkpoint-2748",
4
+ "epoch": 3.0,
5
  "eval_steps": 500,
6
+ "global_step": 2748,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
157
  "eval_samples_per_second": 269.184,
158
  "eval_steps_per_second": 8.413,
159
  "step": 1832
160
+ },
161
+ {
162
+ "epoch": 2.074235807860262,
163
+ "grad_norm": 36.847415924072266,
164
+ "learning_rate": 2.2221615720524018e-05,
165
+ "loss": 5.9928,
166
+ "step": 1900
167
+ },
168
+ {
169
+ "epoch": 2.183406113537118,
170
+ "grad_norm": 37.08506393432617,
171
+ "learning_rate": 2.181222707423581e-05,
172
+ "loss": 5.9648,
173
+ "step": 2000
174
+ },
175
+ {
176
+ "epoch": 2.2925764192139737,
177
+ "grad_norm": 35.595909118652344,
178
+ "learning_rate": 2.1402838427947596e-05,
179
+ "loss": 5.8648,
180
+ "step": 2100
181
+ },
182
+ {
183
+ "epoch": 2.4017467248908297,
184
+ "grad_norm": 23.82405662536621,
185
+ "learning_rate": 2.099344978165939e-05,
186
+ "loss": 5.9043,
187
+ "step": 2200
188
+ },
189
+ {
190
+ "epoch": 2.5109170305676853,
191
+ "grad_norm": 30.872852325439453,
192
+ "learning_rate": 2.058406113537118e-05,
193
+ "loss": 5.8428,
194
+ "step": 2300
195
+ },
196
+ {
197
+ "epoch": 2.6200873362445414,
198
+ "grad_norm": 42.079261779785156,
199
+ "learning_rate": 2.0174672489082972e-05,
200
+ "loss": 5.8529,
201
+ "step": 2400
202
+ },
203
+ {
204
+ "epoch": 2.7292576419213974,
205
+ "grad_norm": 23.549190521240234,
206
+ "learning_rate": 1.976528384279476e-05,
207
+ "loss": 5.8328,
208
+ "step": 2500
209
+ },
210
+ {
211
+ "epoch": 2.8384279475982535,
212
+ "grad_norm": 32.223079681396484,
213
+ "learning_rate": 1.935589519650655e-05,
214
+ "loss": 5.8484,
215
+ "step": 2600
216
+ },
217
+ {
218
+ "epoch": 2.947598253275109,
219
+ "grad_norm": 25.67125129699707,
220
+ "learning_rate": 1.894650655021834e-05,
221
+ "loss": 5.5861,
222
+ "step": 2700
223
+ },
224
+ {
225
+ "epoch": 3.0,
226
+ "eval_avg_mae": 5.645811716715495,
227
+ "eval_loss": 5.645811080932617,
228
+ "eval_mae_lex": 4.994715213775635,
229
+ "eval_mae_sem": 3.6993861198425293,
230
+ "eval_mae_syn": 8.24333381652832,
231
+ "eval_runtime": 27.1472,
232
+ "eval_samples_per_second": 269.899,
233
+ "eval_steps_per_second": 8.435,
234
+ "step": 2748
235
  }
236
  ],
237
  "logging_steps": 100,
 
251
  "attributes": {}
252
  }
253
  },
254
+ "total_flos": 5783298700015872.0,
255
  "train_batch_size": 32,
256
  "trial_name": null,
257
  "trial_params": null