ngwgsang commited on
Commit
24e8382
·
verified ·
1 Parent(s): 4da57be

Training in progress, epoch 2, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:737bff9e53771f919fd8e65ceb4d4c2d6f17ab4d48951f73831dec9db5e974f5
3
  size 442668636
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e2732a6d21b842687bf63e8ed4677683d4c127cb8c9417afe437147b86b7919
3
  size 442668636
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:24c15aed075594926bdfb8b0a3fd65422193e443f12843c149dfd9d5c185c0f0
3
  size 885457146
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdce2c84944c39c9782d1ca61591a7c11485d3a4ca5f9456c59f7d8b41bf5d5f
3
  size 885457146
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2477d3715e68f2549c9ecd6a18f4a17a0bfb0a625f50ce4fafa0aa2652affb1c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc930492c5e0375b00eb1faa8503ca1a4cd6495e47aeaa009df65f9bce5b16e3
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:00b7251b468e4d3cb44eba0757f056754012975e532dc253bb53666972923e5b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9707f7f99f72a12c2631595b1bcc8638efdeda09ae580feffc3a464b56550f1
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 7.529487609863281,
3
- "best_model_checkpoint": "./results/checkpoint-916",
4
- "epoch": 1.0,
5
  "eval_steps": 500,
6
- "global_step": 916,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -82,6 +82,81 @@
82
  "eval_samples_per_second": 269.609,
83
  "eval_steps_per_second": 8.426,
84
  "step": 916
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  }
86
  ],
87
  "logging_steps": 100,
@@ -101,7 +176,7 @@
101
  "attributes": {}
102
  }
103
  },
104
- "total_flos": 1927766233338624.0,
105
  "train_batch_size": 32,
106
  "trial_name": null,
107
  "trial_params": null
 
1
  {
2
+ "best_metric": 6.067600250244141,
3
+ "best_model_checkpoint": "./results/checkpoint-1832",
4
+ "epoch": 2.0,
5
  "eval_steps": 500,
6
+ "global_step": 1832,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
82
  "eval_samples_per_second": 269.609,
83
  "eval_steps_per_second": 8.426,
84
  "step": 916
85
+ },
86
+ {
87
+ "epoch": 1.091703056768559,
88
+ "grad_norm": 26.391666412353516,
89
+ "learning_rate": 2.5906113537117905e-05,
90
+ "loss": 6.7699,
91
+ "step": 1000
92
+ },
93
+ {
94
+ "epoch": 1.2008733624454149,
95
+ "grad_norm": 22.994029998779297,
96
+ "learning_rate": 2.5496724890829696e-05,
97
+ "loss": 6.5552,
98
+ "step": 1100
99
+ },
100
+ {
101
+ "epoch": 1.3100436681222707,
102
+ "grad_norm": 20.722883224487305,
103
+ "learning_rate": 2.5087336244541486e-05,
104
+ "loss": 6.5897,
105
+ "step": 1200
106
+ },
107
+ {
108
+ "epoch": 1.4192139737991267,
109
+ "grad_norm": 32.02668380737305,
110
+ "learning_rate": 2.4677947598253277e-05,
111
+ "loss": 6.5073,
112
+ "step": 1300
113
+ },
114
+ {
115
+ "epoch": 1.5283842794759825,
116
+ "grad_norm": 32.40359115600586,
117
+ "learning_rate": 2.4268558951965064e-05,
118
+ "loss": 6.4684,
119
+ "step": 1400
120
+ },
121
+ {
122
+ "epoch": 1.6375545851528384,
123
+ "grad_norm": 47.73025131225586,
124
+ "learning_rate": 2.3859170305676855e-05,
125
+ "loss": 6.3165,
126
+ "step": 1500
127
+ },
128
+ {
129
+ "epoch": 1.7467248908296944,
130
+ "grad_norm": 47.35511016845703,
131
+ "learning_rate": 2.344978165938865e-05,
132
+ "loss": 6.2866,
133
+ "step": 1600
134
+ },
135
+ {
136
+ "epoch": 1.8558951965065502,
137
+ "grad_norm": 44.51765441894531,
138
+ "learning_rate": 2.3040393013100437e-05,
139
+ "loss": 6.3404,
140
+ "step": 1700
141
+ },
142
+ {
143
+ "epoch": 1.965065502183406,
144
+ "grad_norm": 26.496959686279297,
145
+ "learning_rate": 2.2631004366812227e-05,
146
+ "loss": 6.1681,
147
+ "step": 1800
148
+ },
149
+ {
150
+ "epoch": 2.0,
151
+ "eval_avg_mae": 6.067600250244141,
152
+ "eval_loss": 6.067600250244141,
153
+ "eval_mae_lex": 5.595421314239502,
154
+ "eval_mae_sem": 4.1164045333862305,
155
+ "eval_mae_syn": 8.490975379943848,
156
+ "eval_runtime": 27.2193,
157
+ "eval_samples_per_second": 269.184,
158
+ "eval_steps_per_second": 8.413,
159
+ "step": 1832
160
  }
161
  ],
162
  "logging_steps": 100,
 
176
  "attributes": {}
177
  }
178
  },
179
+ "total_flos": 3855532466677248.0,
180
  "train_batch_size": 32,
181
  "trial_name": null,
182
  "trial_params": null