iqbalasrif commited on
Commit
dfe28eb
·
verified ·
1 Parent(s): eefc7a5

Upload 17 files

Browse files
README.md CHANGED
@@ -9,7 +9,7 @@ datasets:
9
  metrics:
10
  - wer
11
  model-index:
12
- - name: finetune
13
  results:
14
  - task:
15
  name: Automatic Speech Recognition
@@ -23,19 +23,19 @@ model-index:
23
  metrics:
24
  - name: Wer
25
  type: wer
26
- value: 0.7835602493955974
27
  ---
28
 
29
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
30
  should probably proofread and complete it, then remove this comment. -->
31
 
32
- # finetune
33
 
34
  This model is a fine-tuned version of [openai/whisper-tiny.en](https://huggingface.co/openai/whisper-tiny.en) on the lalipa/jv_id_asr_split jv_id_asr_source dataset.
35
  It achieves the following results on the evaluation set:
36
- - Loss: 1.7784
37
- - Wer: 0.7836
38
- - Cer: 0.2535
39
 
40
  ## Model description
41
 
@@ -55,25 +55,30 @@ More information needed
55
 
56
  The following hyperparameters were used during training:
57
  - learning_rate: 1e-05
58
- - train_batch_size: 32
59
  - eval_batch_size: 16
60
  - seed: 42
61
  - gradient_accumulation_steps: 2
62
- - total_train_batch_size: 64
63
  - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
64
  - lr_scheduler_type: linear
65
- - lr_scheduler_warmup_steps: 30
66
- - training_steps: 150
67
 
68
  ### Training results
69
 
70
  | Training Loss | Epoch | Step | Validation Loss | Wer | Cer |
71
  |:-------------:|:------:|:----:|:---------------:|:------:|:------:|
72
- | 3.6903 | 0.2041 | 30 | 2.9875 | 1.0127 | 0.4365 |
73
- | 2.533 | 0.4082 | 60 | 2.2360 | 0.8879 | 0.2921 |
74
- | 2.0604 | 0.6122 | 90 | 1.9514 | 0.8253 | 0.2670 |
75
- | 1.852 | 0.8163 | 120 | 1.8182 | 0.7949 | 0.2581 |
76
- | 1.7929 | 1.0204 | 150 | 1.7784 | 0.7836 | 0.2535 |
 
 
 
 
 
77
 
78
 
79
  ### Framework versions
 
9
  metrics:
10
  - wer
11
  model-index:
12
+ - name: hyperparameter
13
  results:
14
  - task:
15
  name: Automatic Speech Recognition
 
23
  metrics:
24
  - name: Wer
25
  type: wer
26
+ value: 0.6883827458964245
27
  ---
28
 
29
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
30
  should probably proofread and complete it, then remove this comment. -->
31
 
32
+ # hyperparameter
33
 
34
  This model is a fine-tuned version of [openai/whisper-tiny.en](https://huggingface.co/openai/whisper-tiny.en) on the lalipa/jv_id_asr_split jv_id_asr_source dataset.
35
  It achieves the following results on the evaluation set:
36
+ - Loss: 1.4506
37
+ - Wer: 0.6884
38
+ - Cer: 0.2050
39
 
40
  ## Model description
41
 
 
55
 
56
  The following hyperparameters were used during training:
57
  - learning_rate: 1e-05
58
+ - train_batch_size: 16
59
  - eval_batch_size: 16
60
  - seed: 42
61
  - gradient_accumulation_steps: 2
62
+ - total_train_batch_size: 32
63
  - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
64
  - lr_scheduler_type: linear
65
+ - lr_scheduler_warmup_steps: 100
66
+ - training_steps: 300
67
 
68
  ### Training results
69
 
70
  | Training Loss | Epoch | Step | Validation Loss | Wer | Cer |
71
  |:-------------:|:------:|:----:|:---------------:|:------:|:------:|
72
+ | 3.9694 | 0.1020 | 30 | 3.7782 | 1.8748 | 1.0887 |
73
+ | 3.3735 | 0.2041 | 60 | 2.9598 | 1.0019 | 0.4254 |
74
+ | 2.5449 | 0.3061 | 90 | 2.1989 | 0.8820 | 0.3221 |
75
+ | 1.9987 | 0.4082 | 120 | 1.8648 | 0.8004 | 0.2606 |
76
+ | 1.7671 | 0.5102 | 150 | 1.6909 | 0.7619 | 0.2312 |
77
+ | 1.6285 | 0.6122 | 180 | 1.5863 | 0.7336 | 0.2245 |
78
+ | 1.5475 | 0.7143 | 210 | 1.5251 | 0.7216 | 0.2213 |
79
+ | 1.4793 | 0.8163 | 240 | 1.4807 | 0.6942 | 0.2035 |
80
+ | 1.5013 | 0.9184 | 270 | 1.4582 | 0.6904 | 0.2057 |
81
+ | 1.4438 | 1.0204 | 300 | 1.4506 | 0.6884 | 0.2050 |
82
 
83
 
84
  ### Framework versions
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
  "epoch": 1.0204081632653061,
3
- "eval_cer": 0.253486835896952,
4
- "eval_loss": 1.7784144878387451,
5
- "eval_runtime": 159.8385,
6
  "eval_samples": 1136,
7
- "eval_samples_per_second": 7.107,
8
- "eval_steps_per_second": 0.444,
9
- "eval_wer": 0.7835602493955974,
10
  "total_flos": 2.3614434607104e+17,
11
- "train_loss": 2.385703277587891,
12
- "train_runtime": 5094.7107,
13
  "train_samples": 9400,
14
- "train_samples_per_second": 1.884,
15
- "train_steps_per_second": 0.029
16
  }
 
1
  {
2
  "epoch": 1.0204081632653061,
3
+ "eval_cer": 0.20496366896291404,
4
+ "eval_loss": 1.4505608081817627,
5
+ "eval_runtime": 170.4457,
6
  "eval_samples": 1136,
7
+ "eval_samples_per_second": 6.665,
8
+ "eval_steps_per_second": 0.417,
9
+ "eval_wer": 0.6883827458964245,
10
  "total_flos": 2.3614434607104e+17,
11
+ "train_loss": 2.125396842956543,
12
+ "train_runtime": 3027.4927,
13
  "train_samples": 9400,
14
+ "train_samples_per_second": 3.171,
15
+ "train_steps_per_second": 0.099
16
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "epoch": 1.0204081632653061,
3
- "eval_cer": 0.253486835896952,
4
- "eval_loss": 1.7784144878387451,
5
- "eval_runtime": 159.8385,
6
  "eval_samples": 1136,
7
- "eval_samples_per_second": 7.107,
8
- "eval_steps_per_second": 0.444,
9
- "eval_wer": 0.7835602493955974
10
  }
 
1
  {
2
  "epoch": 1.0204081632653061,
3
+ "eval_cer": 0.20496366896291404,
4
+ "eval_loss": 1.4505608081817627,
5
+ "eval_runtime": 170.4457,
6
  "eval_samples": 1136,
7
+ "eval_samples_per_second": 6.665,
8
+ "eval_steps_per_second": 0.417,
9
+ "eval_wer": 0.6883827458964245
10
  }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6f00ae2f8b1baee3e68c188dd5930ce91257cfd62060f4a93fc39e101675c14
3
+ size 151060136
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.0204081632653061,
3
  "total_flos": 2.3614434607104e+17,
4
- "train_loss": 2.385703277587891,
5
- "train_runtime": 5094.7107,
6
  "train_samples": 9400,
7
- "train_samples_per_second": 1.884,
8
- "train_steps_per_second": 0.029
9
  }
 
1
  {
2
  "epoch": 1.0204081632653061,
3
  "total_flos": 2.3614434607104e+17,
4
+ "train_loss": 2.125396842956543,
5
+ "train_runtime": 3027.4927,
6
  "train_samples": 9400,
7
+ "train_samples_per_second": 3.171,
8
+ "train_steps_per_second": 0.099
9
  }
trainer_state.json CHANGED
@@ -3,108 +3,193 @@
3
  "best_model_checkpoint": null,
4
  "epoch": 1.0204081632653061,
5
  "eval_steps": 30,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.20408163265306123,
13
- "grad_norm": 28829.30859375,
14
- "learning_rate": 1e-05,
15
- "loss": 3.6903,
16
  "step": 30
17
  },
18
  {
19
- "epoch": 0.20408163265306123,
20
- "eval_cer": 0.43646314994809854,
21
- "eval_loss": 2.987450122833252,
22
- "eval_runtime": 203.3511,
23
- "eval_samples_per_second": 5.586,
24
- "eval_steps_per_second": 0.349,
25
- "eval_wer": 1.0127242651736863,
26
  "step": 30
27
  },
28
  {
29
- "epoch": 0.40816326530612246,
30
- "grad_norm": 26780.62109375,
31
- "learning_rate": 7.500000000000001e-06,
32
- "loss": 2.533,
33
  "step": 60
34
  },
35
  {
36
- "epoch": 0.40816326530612246,
37
- "eval_cer": 0.2920826649051618,
38
- "eval_loss": 2.235991954803467,
39
- "eval_runtime": 163.1323,
40
- "eval_samples_per_second": 6.964,
41
- "eval_steps_per_second": 0.435,
42
- "eval_wer": 0.8878992238198244,
43
  "step": 60
44
  },
45
  {
46
- "epoch": 0.6122448979591837,
47
- "grad_norm": 9.42530632019043,
48
- "learning_rate": 5e-06,
49
- "loss": 2.0604,
50
  "step": 90
51
  },
52
  {
53
- "epoch": 0.6122448979591837,
54
- "eval_cer": 0.26696234783429273,
55
- "eval_loss": 1.951379656791687,
56
- "eval_runtime": 171.6219,
57
- "eval_samples_per_second": 6.619,
58
- "eval_steps_per_second": 0.414,
59
- "eval_wer": 0.8252958391652883,
60
  "step": 90
61
  },
62
  {
63
- "epoch": 0.8163265306122449,
64
- "grad_norm": 724720.0625,
65
- "learning_rate": 2.5e-06,
66
- "loss": 1.852,
67
  "step": 120
68
  },
69
  {
70
- "epoch": 0.8163265306122449,
71
- "eval_cer": 0.25809191280551097,
72
- "eval_loss": 1.8181612491607666,
73
- "eval_runtime": 180.4927,
74
- "eval_samples_per_second": 6.294,
75
- "eval_steps_per_second": 0.393,
76
- "eval_wer": 0.7948848454001781,
77
  "step": 120
78
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  {
80
  "epoch": 1.0204081632653061,
81
- "grad_norm": 9.10916519165039,
82
  "learning_rate": 0.0,
83
- "loss": 1.7929,
84
- "step": 150
85
  },
86
  {
87
  "epoch": 1.0204081632653061,
88
- "eval_cer": 0.253486835896952,
89
- "eval_loss": 1.7784144878387451,
90
- "eval_runtime": 183.691,
91
- "eval_samples_per_second": 6.184,
92
- "eval_steps_per_second": 0.387,
93
- "eval_wer": 0.7835602493955974,
94
- "step": 150
95
  },
96
  {
97
  "epoch": 1.0204081632653061,
98
- "step": 150,
99
  "total_flos": 2.3614434607104e+17,
100
- "train_loss": 2.385703277587891,
101
- "train_runtime": 5094.7107,
102
- "train_samples_per_second": 1.884,
103
- "train_steps_per_second": 0.029
104
  }
105
  ],
106
  "logging_steps": 30,
107
- "max_steps": 150,
108
  "num_input_tokens_seen": 0,
109
  "num_train_epochs": 2,
110
  "save_steps": 30,
@@ -121,7 +206,7 @@
121
  }
122
  },
123
  "total_flos": 2.3614434607104e+17,
124
- "train_batch_size": 32,
125
  "trial_name": null,
126
  "trial_params": null
127
  }
 
3
  "best_model_checkpoint": null,
4
  "epoch": 1.0204081632653061,
5
  "eval_steps": 30,
6
+ "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.10204081632653061,
13
+ "grad_norm": 32.964595794677734,
14
+ "learning_rate": 3e-06,
15
+ "loss": 3.9694,
16
  "step": 30
17
  },
18
  {
19
+ "epoch": 0.10204081632653061,
20
+ "eval_cer": 1.08866660375578,
21
+ "eval_loss": 3.778170347213745,
22
+ "eval_runtime": 309.6504,
23
+ "eval_samples_per_second": 3.669,
24
+ "eval_steps_per_second": 0.229,
25
+ "eval_wer": 1.8747932306909276,
26
  "step": 30
27
  },
28
  {
29
+ "epoch": 0.20408163265306123,
30
+ "grad_norm": 17.289554595947266,
31
+ "learning_rate": 6e-06,
32
+ "loss": 3.3735,
33
  "step": 60
34
  },
35
  {
36
+ "epoch": 0.20408163265306123,
37
+ "eval_cer": 0.42536566952911203,
38
+ "eval_loss": 2.9597644805908203,
39
+ "eval_runtime": 203.9425,
40
+ "eval_samples_per_second": 5.57,
41
+ "eval_steps_per_second": 0.348,
42
+ "eval_wer": 1.0019086397760528,
43
  "step": 60
44
  },
45
  {
46
+ "epoch": 0.30612244897959184,
47
+ "grad_norm": 14.627169609069824,
48
+ "learning_rate": 9e-06,
49
+ "loss": 2.5449,
50
  "step": 90
51
  },
52
  {
53
+ "epoch": 0.30612244897959184,
54
+ "eval_cer": 0.3221289044069076,
55
+ "eval_loss": 2.198906421661377,
56
+ "eval_runtime": 188.0543,
57
+ "eval_samples_per_second": 6.041,
58
+ "eval_steps_per_second": 0.378,
59
+ "eval_wer": 0.8820460618399287,
60
  "step": 90
61
  },
62
  {
63
+ "epoch": 0.40816326530612246,
64
+ "grad_norm": 17.809280395507812,
65
+ "learning_rate": 9e-06,
66
+ "loss": 1.9987,
67
  "step": 120
68
  },
69
  {
70
+ "epoch": 0.40816326530612246,
71
+ "eval_cer": 0.2606398037180334,
72
+ "eval_loss": 1.8648453950881958,
73
+ "eval_runtime": 173.0054,
74
+ "eval_samples_per_second": 6.566,
75
+ "eval_steps_per_second": 0.41,
76
+ "eval_wer": 0.8003562794248632,
77
  "step": 120
78
  },
79
+ {
80
+ "epoch": 0.5102040816326531,
81
+ "grad_norm": 12.8538236618042,
82
+ "learning_rate": 7.500000000000001e-06,
83
+ "loss": 1.7671,
84
+ "step": 150
85
+ },
86
+ {
87
+ "epoch": 0.5102040816326531,
88
+ "eval_cer": 0.2312352552609229,
89
+ "eval_loss": 1.6909141540527344,
90
+ "eval_runtime": 166.0118,
91
+ "eval_samples_per_second": 6.843,
92
+ "eval_steps_per_second": 0.428,
93
+ "eval_wer": 0.7619289986003308,
94
+ "step": 150
95
+ },
96
+ {
97
+ "epoch": 0.6122448979591837,
98
+ "grad_norm": 1594495.5,
99
+ "learning_rate": 6e-06,
100
+ "loss": 1.6285,
101
+ "step": 180
102
+ },
103
+ {
104
+ "epoch": 0.6122448979591837,
105
+ "eval_cer": 0.22453524582428988,
106
+ "eval_loss": 1.5862839221954346,
107
+ "eval_runtime": 170.3953,
108
+ "eval_samples_per_second": 6.667,
109
+ "eval_steps_per_second": 0.417,
110
+ "eval_wer": 0.7335538872630105,
111
+ "step": 180
112
+ },
113
+ {
114
+ "epoch": 0.7142857142857143,
115
+ "grad_norm": 13.195433616638184,
116
+ "learning_rate": 4.5e-06,
117
+ "loss": 1.5475,
118
+ "step": 210
119
+ },
120
+ {
121
+ "epoch": 0.7142857142857143,
122
+ "eval_cer": 0.2212512975370388,
123
+ "eval_loss": 1.525095820426941,
124
+ "eval_runtime": 176.0288,
125
+ "eval_samples_per_second": 6.453,
126
+ "eval_steps_per_second": 0.403,
127
+ "eval_wer": 0.7215930779997455,
128
+ "step": 210
129
+ },
130
+ {
131
+ "epoch": 0.8163265306122449,
132
+ "grad_norm": 11.149357795715332,
133
+ "learning_rate": 3e-06,
134
+ "loss": 1.4793,
135
+ "step": 240
136
+ },
137
+ {
138
+ "epoch": 0.8163265306122449,
139
+ "eval_cer": 0.20349155421345663,
140
+ "eval_loss": 1.4806641340255737,
141
+ "eval_runtime": 165.379,
142
+ "eval_samples_per_second": 6.869,
143
+ "eval_steps_per_second": 0.429,
144
+ "eval_wer": 0.6942359078763202,
145
+ "step": 240
146
+ },
147
+ {
148
+ "epoch": 0.9183673469387755,
149
+ "grad_norm": 11.996837615966797,
150
+ "learning_rate": 1.5e-06,
151
+ "loss": 1.5013,
152
+ "step": 270
153
+ },
154
+ {
155
+ "epoch": 0.9183673469387755,
156
+ "eval_cer": 0.2057374728696801,
157
+ "eval_loss": 1.4582278728485107,
158
+ "eval_runtime": 171.0364,
159
+ "eval_samples_per_second": 6.642,
160
+ "eval_steps_per_second": 0.415,
161
+ "eval_wer": 0.6904186283242143,
162
+ "step": 270
163
+ },
164
  {
165
  "epoch": 1.0204081632653061,
166
+ "grad_norm": 13.957674980163574,
167
  "learning_rate": 0.0,
168
+ "loss": 1.4438,
169
+ "step": 300
170
  },
171
  {
172
  "epoch": 1.0204081632653061,
173
+ "eval_cer": 0.20496366896291404,
174
+ "eval_loss": 1.4505608081817627,
175
+ "eval_runtime": 170.7406,
176
+ "eval_samples_per_second": 6.653,
177
+ "eval_steps_per_second": 0.416,
178
+ "eval_wer": 0.6883827458964245,
179
+ "step": 300
180
  },
181
  {
182
  "epoch": 1.0204081632653061,
183
+ "step": 300,
184
  "total_flos": 2.3614434607104e+17,
185
+ "train_loss": 2.125396842956543,
186
+ "train_runtime": 3027.4927,
187
+ "train_samples_per_second": 3.171,
188
+ "train_steps_per_second": 0.099
189
  }
190
  ],
191
  "logging_steps": 30,
192
+ "max_steps": 300,
193
  "num_input_tokens_seen": 0,
194
  "num_train_epochs": 2,
195
  "save_steps": 30,
 
206
  }
207
  },
208
  "total_flos": 2.3614434607104e+17,
209
+ "train_batch_size": 16,
210
  "trial_name": null,
211
  "trial_params": null
212
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:adb59dce1518b3dd6568a6cc562e4afcb56e424e6b498b28a4052dc7bfa10edd
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32aa2deb8106dc2b84ec85669778f76eab69a5fb35be741e7625b0d96cfd6257
3
  size 5368