kapilkd13 commited on
Commit
8caf192
·
1 Parent(s): a00d257

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +14 -0
  2. eval_results.json +9 -0
  3. train_results.json +8 -0
  4. trainer_state.json +301 -0
all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 27.12,
3
+ "eval_loss": 0.7805310487747192,
4
+ "eval_runtime": 81.6634,
5
+ "eval_samples": 2095,
6
+ "eval_samples_per_second": 25.654,
7
+ "eval_steps_per_second": 3.208,
8
+ "eval_wer": 0.4340145266527145,
9
+ "train_loss": 0.48406150817871096,
10
+ "train_runtime": 11109.9321,
11
+ "train_samples": 4711,
12
+ "train_samples_per_second": 11.521,
13
+ "train_steps_per_second": 0.72
14
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 27.12,
3
+ "eval_loss": 0.7805310487747192,
4
+ "eval_runtime": 81.6634,
5
+ "eval_samples": 2095,
6
+ "eval_samples_per_second": 25.654,
7
+ "eval_steps_per_second": 3.208,
8
+ "eval_wer": 0.4340145266527145
9
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 27.12,
3
+ "train_loss": 0.48406150817871096,
4
+ "train_runtime": 11109.9321,
5
+ "train_samples": 4711,
6
+ "train_samples_per_second": 11.521,
7
+ "train_steps_per_second": 0.72
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 27.11864406779661,
5
+ "global_step": 8000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.36,
12
+ "eval_loss": 1.9129917621612549,
13
+ "eval_runtime": 85.8103,
14
+ "eval_samples_per_second": 24.414,
15
+ "eval_steps_per_second": 3.053,
16
+ "eval_wer": 0.9244121629939678,
17
+ "step": 400
18
+ },
19
+ {
20
+ "epoch": 1.69,
21
+ "learning_rate": 0.0002982,
22
+ "loss": 5.0013,
23
+ "step": 500
24
+ },
25
+ {
26
+ "epoch": 2.71,
27
+ "eval_loss": 0.7788760662078857,
28
+ "eval_runtime": 85.5378,
29
+ "eval_samples_per_second": 24.492,
30
+ "eval_steps_per_second": 3.063,
31
+ "eval_wer": 0.5943616890311462,
32
+ "step": 800
33
+ },
34
+ {
35
+ "epoch": 3.39,
36
+ "learning_rate": 0.00028011999999999997,
37
+ "loss": 0.6544,
38
+ "step": 1000
39
+ },
40
+ {
41
+ "epoch": 4.07,
42
+ "eval_loss": 0.7297950983047485,
43
+ "eval_runtime": 84.8704,
44
+ "eval_samples_per_second": 24.685,
45
+ "eval_steps_per_second": 3.087,
46
+ "eval_wer": 0.585190200664779,
47
+ "step": 1200
48
+ },
49
+ {
50
+ "epoch": 5.08,
51
+ "learning_rate": 0.00026011999999999997,
52
+ "loss": 0.4021,
53
+ "step": 1500
54
+ },
55
+ {
56
+ "epoch": 5.42,
57
+ "eval_loss": 0.697790265083313,
58
+ "eval_runtime": 84.1609,
59
+ "eval_samples_per_second": 24.893,
60
+ "eval_steps_per_second": 3.113,
61
+ "eval_wer": 0.5667241167056506,
62
+ "step": 1600
63
+ },
64
+ {
65
+ "epoch": 6.78,
66
+ "learning_rate": 0.00024011999999999997,
67
+ "loss": 0.3003,
68
+ "step": 2000
69
+ },
70
+ {
71
+ "epoch": 6.78,
72
+ "eval_loss": 0.6764048337936401,
73
+ "eval_runtime": 85.0271,
74
+ "eval_samples_per_second": 24.639,
75
+ "eval_steps_per_second": 3.081,
76
+ "eval_wer": 0.5382247937953958,
77
+ "step": 2000
78
+ },
79
+ {
80
+ "epoch": 8.14,
81
+ "eval_loss": 0.72489994764328,
82
+ "eval_runtime": 83.797,
83
+ "eval_samples_per_second": 25.001,
84
+ "eval_steps_per_second": 3.127,
85
+ "eval_wer": 0.5462883171242152,
86
+ "step": 2400
87
+ },
88
+ {
89
+ "epoch": 8.47,
90
+ "learning_rate": 0.00022011999999999997,
91
+ "loss": 0.2345,
92
+ "step": 2500
93
+ },
94
+ {
95
+ "epoch": 9.49,
96
+ "eval_loss": 0.7279900908470154,
97
+ "eval_runtime": 81.6862,
98
+ "eval_samples_per_second": 25.647,
99
+ "eval_steps_per_second": 3.207,
100
+ "eval_wer": 0.5124338298658131,
101
+ "step": 2800
102
+ },
103
+ {
104
+ "epoch": 10.17,
105
+ "learning_rate": 0.00020012,
106
+ "loss": 0.1993,
107
+ "step": 3000
108
+ },
109
+ {
110
+ "epoch": 10.85,
111
+ "eval_loss": 0.7288674712181091,
112
+ "eval_runtime": 81.4638,
113
+ "eval_samples_per_second": 25.717,
114
+ "eval_steps_per_second": 3.216,
115
+ "eval_wer": 0.4690385325618614,
116
+ "step": 3200
117
+ },
118
+ {
119
+ "epoch": 11.86,
120
+ "learning_rate": 0.00018012,
121
+ "loss": 0.1617,
122
+ "step": 3500
123
+ },
124
+ {
125
+ "epoch": 12.2,
126
+ "eval_loss": 0.7430842518806458,
127
+ "eval_runtime": 81.3254,
128
+ "eval_samples_per_second": 25.761,
129
+ "eval_steps_per_second": 3.222,
130
+ "eval_wer": 0.4732857318724609,
131
+ "step": 3600
132
+ },
133
+ {
134
+ "epoch": 13.56,
135
+ "learning_rate": 0.00016011999999999998,
136
+ "loss": 0.1432,
137
+ "step": 4000
138
+ },
139
+ {
140
+ "epoch": 13.56,
141
+ "eval_loss": 0.7448425889015198,
142
+ "eval_runtime": 83.7392,
143
+ "eval_samples_per_second": 25.018,
144
+ "eval_steps_per_second": 3.129,
145
+ "eval_wer": 0.4732857318724609,
146
+ "step": 4000
147
+ },
148
+ {
149
+ "epoch": 14.92,
150
+ "eval_loss": 0.7745729088783264,
151
+ "eval_runtime": 83.3791,
152
+ "eval_samples_per_second": 25.126,
153
+ "eval_steps_per_second": 3.142,
154
+ "eval_wer": 0.4484796257540318,
155
+ "step": 4400
156
+ },
157
+ {
158
+ "epoch": 15.25,
159
+ "learning_rate": 0.00014012,
160
+ "loss": 0.1172,
161
+ "step": 4500
162
+ },
163
+ {
164
+ "epoch": 16.27,
165
+ "eval_loss": 0.7588675022125244,
166
+ "eval_runtime": 83.1746,
167
+ "eval_samples_per_second": 25.188,
168
+ "eval_steps_per_second": 3.15,
169
+ "eval_wer": 0.4742090360704173,
170
+ "step": 4800
171
+ },
172
+ {
173
+ "epoch": 16.95,
174
+ "learning_rate": 0.00012011999999999998,
175
+ "loss": 0.1035,
176
+ "step": 5000
177
+ },
178
+ {
179
+ "epoch": 17.63,
180
+ "eval_loss": 0.7539412975311279,
181
+ "eval_runtime": 83.8808,
182
+ "eval_samples_per_second": 24.976,
183
+ "eval_steps_per_second": 3.123,
184
+ "eval_wer": 0.4353071525298535,
185
+ "step": 5200
186
+ },
187
+ {
188
+ "epoch": 18.64,
189
+ "learning_rate": 0.00010011999999999998,
190
+ "loss": 0.0956,
191
+ "step": 5500
192
+ },
193
+ {
194
+ "epoch": 18.98,
195
+ "eval_loss": 0.7648215293884277,
196
+ "eval_runtime": 83.2596,
197
+ "eval_samples_per_second": 25.162,
198
+ "eval_steps_per_second": 3.147,
199
+ "eval_wer": 0.44946448356518526,
200
+ "step": 5600
201
+ },
202
+ {
203
+ "epoch": 20.34,
204
+ "learning_rate": 8.012e-05,
205
+ "loss": 0.0845,
206
+ "step": 6000
207
+ },
208
+ {
209
+ "epoch": 20.34,
210
+ "eval_loss": 0.7876783013343811,
211
+ "eval_runtime": 84.6623,
212
+ "eval_samples_per_second": 24.745,
213
+ "eval_steps_per_second": 3.095,
214
+ "eval_wer": 0.4718699987689277,
215
+ "step": 6000
216
+ },
217
+ {
218
+ "epoch": 21.69,
219
+ "eval_loss": 0.7883597016334534,
220
+ "eval_runtime": 84.2577,
221
+ "eval_samples_per_second": 24.864,
222
+ "eval_steps_per_second": 3.11,
223
+ "eval_wer": 0.44343222947187,
224
+ "step": 6400
225
+ },
226
+ {
227
+ "epoch": 22.03,
228
+ "learning_rate": 6.0119999999999994e-05,
229
+ "loss": 0.0761,
230
+ "step": 6500
231
+ },
232
+ {
233
+ "epoch": 23.05,
234
+ "eval_loss": 0.7796189188957214,
235
+ "eval_runtime": 81.0587,
236
+ "eval_samples_per_second": 25.845,
237
+ "eval_steps_per_second": 3.232,
238
+ "eval_wer": 0.4385694940292995,
239
+ "step": 6800
240
+ },
241
+ {
242
+ "epoch": 23.73,
243
+ "learning_rate": 4.012e-05,
244
+ "loss": 0.0634,
245
+ "step": 7000
246
+ },
247
+ {
248
+ "epoch": 24.41,
249
+ "eval_loss": 0.7729123830795288,
250
+ "eval_runtime": 81.4812,
251
+ "eval_samples_per_second": 25.711,
252
+ "eval_steps_per_second": 3.215,
253
+ "eval_wer": 0.4306290779268743,
254
+ "step": 7200
255
+ },
256
+ {
257
+ "epoch": 25.42,
258
+ "learning_rate": 2.0159999999999997e-05,
259
+ "loss": 0.0571,
260
+ "step": 7500
261
+ },
262
+ {
263
+ "epoch": 25.76,
264
+ "eval_loss": 0.7826283574104309,
265
+ "eval_runtime": 81.9341,
266
+ "eval_samples_per_second": 25.569,
267
+ "eval_steps_per_second": 3.198,
268
+ "eval_wer": 0.4298288809553121,
269
+ "step": 7600
270
+ },
271
+ {
272
+ "epoch": 27.12,
273
+ "learning_rate": 1.6e-07,
274
+ "loss": 0.0508,
275
+ "step": 8000
276
+ },
277
+ {
278
+ "epoch": 27.12,
279
+ "eval_loss": 0.7805310487747192,
280
+ "eval_runtime": 81.3643,
281
+ "eval_samples_per_second": 25.748,
282
+ "eval_steps_per_second": 3.22,
283
+ "eval_wer": 0.4340145266527145,
284
+ "step": 8000
285
+ },
286
+ {
287
+ "epoch": 27.12,
288
+ "step": 8000,
289
+ "total_flos": 1.5928886672052732e+19,
290
+ "train_loss": 0.48406150817871096,
291
+ "train_runtime": 11109.9321,
292
+ "train_samples_per_second": 11.521,
293
+ "train_steps_per_second": 0.72
294
+ }
295
+ ],
296
+ "max_steps": 8000,
297
+ "num_train_epochs": 28,
298
+ "total_flos": 1.5928886672052732e+19,
299
+ "trial_name": null,
300
+ "trial_params": null
301
+ }