bagasshw commited on
Commit
5eedea7
·
verified ·
1 Parent(s): b6e4414

Model save

Browse files
all_results.json CHANGED
@@ -1,8 +1,21 @@
1
  {
 
 
 
 
 
 
 
2
  "pretrained_eval_loss": 5.0976738929748535,
3
  "pretrained_eval_model_preparation_time": 0.0025,
4
  "pretrained_eval_runtime": 5500.5064,
5
  "pretrained_eval_samples_per_second": 3.364,
6
  "pretrained_eval_steps_per_second": 0.21,
7
- "pretrained_eval_wer": 1.3043423861619188
 
 
 
 
 
 
8
  }
 
1
  {
2
+ "epoch": 4.9922212618841835,
3
+ "eval_loss": 0.5638014078140259,
4
+ "eval_runtime": 4176.5631,
5
+ "eval_samples": 18504,
6
+ "eval_samples_per_second": 4.43,
7
+ "eval_steps_per_second": 0.277,
8
+ "eval_wer": 0.4824482518630893,
9
  "pretrained_eval_loss": 5.0976738929748535,
10
  "pretrained_eval_model_preparation_time": 0.0025,
11
  "pretrained_eval_runtime": 5500.5064,
12
  "pretrained_eval_samples_per_second": 3.364,
13
  "pretrained_eval_steps_per_second": 0.21,
14
+ "pretrained_eval_wer": 1.3043423861619188,
15
+ "total_flos": 1.819709817421824e+19,
16
+ "train_loss": 0.5687196460974671,
17
+ "train_runtime": 175635.8061,
18
+ "train_samples": 148052,
19
+ "train_samples_per_second": 4.215,
20
+ "train_steps_per_second": 0.016
21
  }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.9922212618841835,
3
+ "eval_loss": 0.5638014078140259,
4
+ "eval_runtime": 4176.5631,
5
+ "eval_samples": 18504,
6
+ "eval_samples_per_second": 4.43,
7
+ "eval_steps_per_second": 0.277,
8
+ "eval_wer": 0.4824482518630893
9
+ }
runs/Mar09_03-41-00_dgx-a100/events.out.tfevents.1741646309.dgx-a100.2307703.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1544e12bf804d4d635b6287b0e6af5d033b00e7d6eeae59890e7cfc593fe0ea0
3
+ size 406
runs/Mar12_00-38-44_dgx-a100/events.out.tfevents.1741714743.dgx-a100.2122149.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3b774d7e22c7b4412a913395ebb5bc988f69e1cfebfcf118a42aedaae041173
3
+ size 7336
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.9922212618841835,
3
+ "total_flos": 1.819709817421824e+19,
4
+ "train_loss": 0.5687196460974671,
5
+ "train_runtime": 175635.8061,
6
+ "train_samples": 148052,
7
+ "train_samples_per_second": 4.215,
8
+ "train_steps_per_second": 0.016
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,283 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.4824482518630893,
3
+ "best_model_checkpoint": "/raid/p-storage/slp01_565a7357/bagas-fine-tune-whisper/whisper-tiny-javanese-openslr-v2/checkpoint-500",
4
+ "epoch": 4.9922212618841835,
5
+ "eval_steps": 500,
6
+ "global_step": 2890,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.17286084701815038,
13
+ "grad_norm": 6.286952018737793,
14
+ "learning_rate": 3.3910034602076125e-06,
15
+ "loss": 3.0341,
16
+ "step": 100
17
+ },
18
+ {
19
+ "epoch": 0.34572169403630076,
20
+ "grad_norm": 4.7146077156066895,
21
+ "learning_rate": 6.85121107266436e-06,
22
+ "loss": 1.3208,
23
+ "step": 200
24
+ },
25
+ {
26
+ "epoch": 0.5185825410544511,
27
+ "grad_norm": 4.3300933837890625,
28
+ "learning_rate": 9.965397923875434e-06,
29
+ "loss": 0.9403,
30
+ "step": 300
31
+ },
32
+ {
33
+ "epoch": 0.6914433880726015,
34
+ "grad_norm": 4.19991397857666,
35
+ "learning_rate": 9.580930411380239e-06,
36
+ "loss": 0.7674,
37
+ "step": 400
38
+ },
39
+ {
40
+ "epoch": 0.8643042350907519,
41
+ "grad_norm": 3.640639066696167,
42
+ "learning_rate": 9.196462898885045e-06,
43
+ "loss": 0.6676,
44
+ "step": 500
45
+ },
46
+ {
47
+ "epoch": 0.8643042350907519,
48
+ "eval_loss": 0.5638014078140259,
49
+ "eval_runtime": 6063.2097,
50
+ "eval_samples_per_second": 3.052,
51
+ "eval_steps_per_second": 0.191,
52
+ "eval_wer": 0.4824482518630893,
53
+ "step": 500
54
+ },
55
+ {
56
+ "epoch": 1.0363007778738116,
57
+ "grad_norm": 3.6083388328552246,
58
+ "learning_rate": 8.811995386389852e-06,
59
+ "loss": 0.6009,
60
+ "step": 600
61
+ },
62
+ {
63
+ "epoch": 1.2091616248919619,
64
+ "grad_norm": 3.529407024383545,
65
+ "learning_rate": 8.427527873894657e-06,
66
+ "loss": 0.5501,
67
+ "step": 700
68
+ },
69
+ {
70
+ "epoch": 1.3820224719101124,
71
+ "grad_norm": 3.4671249389648438,
72
+ "learning_rate": 8.043060361399463e-06,
73
+ "loss": 0.523,
74
+ "step": 800
75
+ },
76
+ {
77
+ "epoch": 1.5548833189282627,
78
+ "grad_norm": 3.357516288757324,
79
+ "learning_rate": 7.658592848904268e-06,
80
+ "loss": 0.4992,
81
+ "step": 900
82
+ },
83
+ {
84
+ "epoch": 1.727744165946413,
85
+ "grad_norm": 3.260972738265991,
86
+ "learning_rate": 7.274125336409074e-06,
87
+ "loss": 0.4792,
88
+ "step": 1000
89
+ },
90
+ {
91
+ "epoch": 1.727744165946413,
92
+ "eval_loss": 0.42836251854896545,
93
+ "eval_runtime": 5157.0546,
94
+ "eval_samples_per_second": 3.588,
95
+ "eval_steps_per_second": 0.224,
96
+ "eval_wer": 0.5330351347758427,
97
+ "step": 1000
98
+ },
99
+ {
100
+ "epoch": 1.9006050129645635,
101
+ "grad_norm": 3.132084608078003,
102
+ "learning_rate": 6.88965782391388e-06,
103
+ "loss": 0.4654,
104
+ "step": 1100
105
+ },
106
+ {
107
+ "epoch": 2.072601555747623,
108
+ "grad_norm": 3.0205070972442627,
109
+ "learning_rate": 6.505190311418685e-06,
110
+ "loss": 0.4387,
111
+ "step": 1200
112
+ },
113
+ {
114
+ "epoch": 2.2454624027657735,
115
+ "grad_norm": 3.090315103530884,
116
+ "learning_rate": 6.120722798923492e-06,
117
+ "loss": 0.4201,
118
+ "step": 1300
119
+ },
120
+ {
121
+ "epoch": 2.4183232497839238,
122
+ "grad_norm": 3.003020763397217,
123
+ "learning_rate": 5.736255286428297e-06,
124
+ "loss": 0.4111,
125
+ "step": 1400
126
+ },
127
+ {
128
+ "epoch": 2.591184096802074,
129
+ "grad_norm": 2.9050354957580566,
130
+ "learning_rate": 5.351787773933102e-06,
131
+ "loss": 0.3988,
132
+ "step": 1500
133
+ },
134
+ {
135
+ "epoch": 2.591184096802074,
136
+ "eval_loss": 0.3772418200969696,
137
+ "eval_runtime": 3820.8123,
138
+ "eval_samples_per_second": 4.843,
139
+ "eval_steps_per_second": 0.303,
140
+ "eval_wer": 0.5687485826445312,
141
+ "step": 1500
142
+ },
143
+ {
144
+ "epoch": 2.764044943820225,
145
+ "grad_norm": 2.983358383178711,
146
+ "learning_rate": 4.967320261437909e-06,
147
+ "loss": 0.392,
148
+ "step": 1600
149
+ },
150
+ {
151
+ "epoch": 2.936905790838375,
152
+ "grad_norm": 2.932623863220215,
153
+ "learning_rate": 4.5828527489427145e-06,
154
+ "loss": 0.3882,
155
+ "step": 1700
156
+ },
157
+ {
158
+ "epoch": 3.1089023336214345,
159
+ "grad_norm": 2.8963847160339355,
160
+ "learning_rate": 4.19838523644752e-06,
161
+ "loss": 0.3684,
162
+ "step": 1800
163
+ },
164
+ {
165
+ "epoch": 3.2817631806395853,
166
+ "grad_norm": 3.005558967590332,
167
+ "learning_rate": 3.8139177239523264e-06,
168
+ "loss": 0.3638,
169
+ "step": 1900
170
+ },
171
+ {
172
+ "epoch": 3.4546240276577356,
173
+ "grad_norm": 2.9087977409362793,
174
+ "learning_rate": 3.429450211457132e-06,
175
+ "loss": 0.3565,
176
+ "step": 2000
177
+ },
178
+ {
179
+ "epoch": 3.4546240276577356,
180
+ "eval_loss": 0.35282832384109497,
181
+ "eval_runtime": 3674.1258,
182
+ "eval_samples_per_second": 5.036,
183
+ "eval_steps_per_second": 0.315,
184
+ "eval_wer": 0.6203598714409715,
185
+ "step": 2000
186
+ },
187
+ {
188
+ "epoch": 3.627484874675886,
189
+ "grad_norm": 2.949126958847046,
190
+ "learning_rate": 3.044982698961938e-06,
191
+ "loss": 0.3582,
192
+ "step": 2100
193
+ },
194
+ {
195
+ "epoch": 3.800345721694036,
196
+ "grad_norm": 2.861595869064331,
197
+ "learning_rate": 2.660515186466744e-06,
198
+ "loss": 0.3549,
199
+ "step": 2200
200
+ },
201
+ {
202
+ "epoch": 3.973206568712187,
203
+ "grad_norm": 2.6879312992095947,
204
+ "learning_rate": 2.2760476739715497e-06,
205
+ "loss": 0.351,
206
+ "step": 2300
207
+ },
208
+ {
209
+ "epoch": 4.145203111495246,
210
+ "grad_norm": 3.0249099731445312,
211
+ "learning_rate": 1.8915801614763554e-06,
212
+ "loss": 0.339,
213
+ "step": 2400
214
+ },
215
+ {
216
+ "epoch": 4.318063958513397,
217
+ "grad_norm": 2.900782585144043,
218
+ "learning_rate": 1.5071126489811613e-06,
219
+ "loss": 0.3386,
220
+ "step": 2500
221
+ },
222
+ {
223
+ "epoch": 4.318063958513397,
224
+ "eval_loss": 0.34060564637184143,
225
+ "eval_runtime": 4820.2463,
226
+ "eval_samples_per_second": 3.839,
227
+ "eval_steps_per_second": 0.24,
228
+ "eval_wer": 0.6603038810125197,
229
+ "step": 2500
230
+ },
231
+ {
232
+ "epoch": 4.490924805531547,
233
+ "grad_norm": 2.7883245944976807,
234
+ "learning_rate": 1.122645136485967e-06,
235
+ "loss": 0.3351,
236
+ "step": 2600
237
+ },
238
+ {
239
+ "epoch": 4.663785652549698,
240
+ "grad_norm": 2.759507894515991,
241
+ "learning_rate": 7.381776239907729e-07,
242
+ "loss": 0.3364,
243
+ "step": 2700
244
+ },
245
+ {
246
+ "epoch": 4.8366464995678475,
247
+ "grad_norm": 2.8885769844055176,
248
+ "learning_rate": 3.5371011149557864e-07,
249
+ "loss": 0.334,
250
+ "step": 2800
251
+ },
252
+ {
253
+ "epoch": 4.9922212618841835,
254
+ "step": 2890,
255
+ "total_flos": 1.819709817421824e+19,
256
+ "train_loss": 0.5687196460974671,
257
+ "train_runtime": 175635.8061,
258
+ "train_samples_per_second": 4.215,
259
+ "train_steps_per_second": 0.016
260
+ }
261
+ ],
262
+ "logging_steps": 100,
263
+ "max_steps": 2890,
264
+ "num_input_tokens_seen": 0,
265
+ "num_train_epochs": 5,
266
+ "save_steps": 500,
267
+ "stateful_callbacks": {
268
+ "TrainerControl": {
269
+ "args": {
270
+ "should_epoch_stop": false,
271
+ "should_evaluate": false,
272
+ "should_log": false,
273
+ "should_save": true,
274
+ "should_training_stop": true
275
+ },
276
+ "attributes": {}
277
+ }
278
+ },
279
+ "total_flos": 1.819709817421824e+19,
280
+ "train_batch_size": 64,
281
+ "trial_name": null,
282
+ "trial_params": null
283
+ }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8832c7ab5c94699e8a114b5e67fa5ca5cbce301283cf455ccd5ee01c30192f3a
3
  size 5688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bf01ebf38d42df0215b28382ea14e2974aecb25650ca6663d0f767f11530762
3
  size 5688