nhxnnz commited on
Commit
4d71851
·
verified ·
1 Parent(s): 9cdd971

End of training

Browse files
.gitattributes CHANGED
@@ -41,3 +41,4 @@ kang/kang[[:space:]](6).wav filter=lfs diff=lfs merge=lfs -text
41
  kang/kang[[:space:]](8).wav filter=lfs diff=lfs merge=lfs -text
42
  kang/kang[[:space:]](9).wav filter=lfs diff=lfs merge=lfs -text
43
  kang/kang_1.wav filter=lfs diff=lfs merge=lfs -text
 
 
41
  kang/kang[[:space:]](8).wav filter=lfs diff=lfs merge=lfs -text
42
  kang/kang[[:space:]](9).wav filter=lfs diff=lfs merge=lfs -text
43
  kang/kang_1.wav filter=lfs diff=lfs merge=lfs -text
44
+ kang/kang_2.wav filter=lfs diff=lfs merge=lfs -text
adapter_config.json CHANGED
@@ -27,8 +27,8 @@
27
  "rank_pattern": {},
28
  "revision": null,
29
  "target_modules": [
30
- "v_proj",
31
- "q_proj"
32
  ],
33
  "task_type": null,
34
  "use_dora": false,
 
27
  "rank_pattern": {},
28
  "revision": null,
29
  "target_modules": [
30
+ "q_proj",
31
+ "v_proj"
32
  ],
33
  "task_type": null,
34
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df88e0c996327c515e25de6a58c119ceadd953b0f82a9129e4a7a2897cc8ea64
3
  size 7098064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63bc963057a86f8afa5f6de9fd9f42b884bd976ba06efda3654a50ae41657833
3
  size 7098064
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.0,
3
  "total_flos": 2.42193908736e+18,
4
- "train_loss": 0.3701638735680484,
5
- "train_runtime": 4045.1211,
6
- "train_samples_per_second": 2.057,
7
- "train_steps_per_second": 0.257
8
  }
 
1
  {
2
  "epoch": 3.0,
3
  "total_flos": 2.42193908736e+18,
4
+ "train_loss": 0.6797876440482455,
5
+ "train_runtime": 2875.7206,
6
+ "train_samples_per_second": 2.893,
7
+ "train_steps_per_second": 0.362
8
  }
kang/kang_2.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db45dcf47c641aae00d68e0957b7756cf38107c1589d43c4aa54192a696894bb
3
+ size 830764
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.0,
3
  "total_flos": 2.42193908736e+18,
4
- "train_loss": 0.3701638735680484,
5
- "train_runtime": 4045.1211,
6
- "train_samples_per_second": 2.057,
7
- "train_steps_per_second": 0.257
8
  }
 
1
  {
2
  "epoch": 3.0,
3
  "total_flos": 2.42193908736e+18,
4
+ "train_loss": 0.6797876440482455,
5
+ "train_runtime": 2875.7206,
6
+ "train_samples_per_second": 2.893,
7
+ "train_steps_per_second": 0.362
8
  }
trainer_state.json CHANGED
@@ -2,237 +2,132 @@
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 3.0,
5
- "eval_steps": 100,
6
  "global_step": 1041,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
- {
12
- "epoch": 0.1440922190201729,
13
- "grad_norm": 2.598850727081299,
14
- "learning_rate": 0.0001903938520653218,
15
- "loss": 0.6467,
16
- "step": 50
17
- },
18
- {
19
- "epoch": 0.2881844380403458,
20
- "grad_norm": 2.249300956726074,
21
- "learning_rate": 0.00018078770413064362,
22
- "loss": 0.4668,
23
- "step": 100
24
- },
25
  {
26
  "epoch": 0.2881844380403458,
27
- "eval_runtime": 232.7234,
28
- "eval_samples_per_second": 5.315,
29
- "eval_steps_per_second": 2.66,
30
  "step": 100
31
  },
32
- {
33
- "epoch": 0.4322766570605187,
34
- "grad_norm": 1.964786410331726,
35
- "learning_rate": 0.00017118155619596544,
36
- "loss": 0.4482,
37
- "step": 150
38
- },
39
  {
40
  "epoch": 0.5763688760806917,
41
- "grad_norm": 1.8695533275604248,
42
- "learning_rate": 0.00016157540826128723,
43
- "loss": 0.4197,
44
  "step": 200
45
  },
46
  {
47
  "epoch": 0.5763688760806917,
48
- "eval_runtime": 227.4078,
49
- "eval_samples_per_second": 5.44,
50
- "eval_steps_per_second": 2.722,
51
  "step": 200
52
  },
53
- {
54
- "epoch": 0.7204610951008645,
55
- "grad_norm": 1.8768256902694702,
56
- "learning_rate": 0.00015196926032660902,
57
- "loss": 0.4265,
58
- "step": 250
59
- },
60
  {
61
  "epoch": 0.8645533141210374,
62
- "grad_norm": 1.8593772649765015,
63
- "learning_rate": 0.00014236311239193086,
64
- "loss": 0.4039,
65
  "step": 300
66
  },
67
- {
68
- "epoch": 0.8645533141210374,
69
- "eval_runtime": 225.9543,
70
- "eval_samples_per_second": 5.475,
71
- "eval_steps_per_second": 2.739,
72
- "step": 300
73
- },
74
- {
75
- "epoch": 1.0086455331412103,
76
- "grad_norm": 1.2640736103057861,
77
- "learning_rate": 0.00013275696445725266,
78
- "loss": 0.3906,
79
- "step": 350
80
- },
81
  {
82
  "epoch": 1.1527377521613833,
83
- "grad_norm": 1.3723323345184326,
84
- "learning_rate": 0.00012315081652257445,
85
- "loss": 0.3547,
86
  "step": 400
87
  },
88
  {
89
  "epoch": 1.1527377521613833,
90
- "eval_runtime": 226.5998,
91
- "eval_samples_per_second": 5.459,
92
- "eval_steps_per_second": 2.732,
93
  "step": 400
94
  },
95
- {
96
- "epoch": 1.2968299711815563,
97
- "grad_norm": 1.395857810974121,
98
- "learning_rate": 0.00011354466858789625,
99
- "loss": 0.3402,
100
- "step": 450
101
- },
102
- {
103
- "epoch": 1.440922190201729,
104
- "grad_norm": 1.6172202825546265,
105
- "learning_rate": 0.00010393852065321807,
106
- "loss": 0.34,
107
- "step": 500
108
- },
109
  {
110
  "epoch": 1.440922190201729,
111
- "eval_runtime": 227.4832,
112
- "eval_samples_per_second": 5.438,
113
- "eval_steps_per_second": 2.721,
114
  "step": 500
115
  },
116
- {
117
- "epoch": 1.585014409221902,
118
- "grad_norm": 1.5597540140151978,
119
- "learning_rate": 9.433237271853987e-05,
120
- "loss": 0.337,
121
- "step": 550
122
- },
123
  {
124
  "epoch": 1.729106628242075,
125
- "grad_norm": 1.3169169425964355,
126
- "learning_rate": 8.472622478386168e-05,
127
- "loss": 0.3597,
128
  "step": 600
129
  },
130
  {
131
  "epoch": 1.729106628242075,
132
- "eval_runtime": 224.0401,
133
- "eval_samples_per_second": 5.521,
134
- "eval_steps_per_second": 2.763,
135
  "step": 600
136
  },
137
- {
138
- "epoch": 1.8731988472622478,
139
- "grad_norm": 1.5286619663238525,
140
- "learning_rate": 7.512007684918348e-05,
141
- "loss": 0.3541,
142
- "step": 650
143
- },
144
- {
145
- "epoch": 2.0172910662824206,
146
- "grad_norm": 1.4207804203033447,
147
- "learning_rate": 6.551392891450529e-05,
148
- "loss": 0.3503,
149
- "step": 700
150
- },
151
  {
152
  "epoch": 2.0172910662824206,
153
- "eval_runtime": 229.2463,
154
- "eval_samples_per_second": 5.396,
155
- "eval_steps_per_second": 2.7,
156
  "step": 700
157
  },
158
- {
159
- "epoch": 2.161383285302594,
160
- "grad_norm": 1.2903691530227661,
161
- "learning_rate": 5.59077809798271e-05,
162
- "loss": 0.3175,
163
- "step": 750
164
- },
165
  {
166
  "epoch": 2.3054755043227666,
167
- "grad_norm": 1.656386375427246,
168
- "learning_rate": 4.63016330451489e-05,
169
- "loss": 0.3074,
170
  "step": 800
171
  },
172
  {
173
  "epoch": 2.3054755043227666,
174
- "eval_runtime": 228.7555,
175
- "eval_samples_per_second": 5.408,
176
- "eval_steps_per_second": 2.706,
177
  "step": 800
178
  },
179
- {
180
- "epoch": 2.4495677233429394,
181
- "grad_norm": 1.3060048818588257,
182
- "learning_rate": 3.66954851104707e-05,
183
- "loss": 0.3095,
184
- "step": 850
185
- },
186
- {
187
- "epoch": 2.5936599423631126,
188
- "grad_norm": 1.136078953742981,
189
- "learning_rate": 2.7089337175792506e-05,
190
- "loss": 0.2877,
191
- "step": 900
192
- },
193
  {
194
  "epoch": 2.5936599423631126,
195
- "eval_runtime": 228.5101,
196
- "eval_samples_per_second": 5.413,
197
- "eval_steps_per_second": 2.709,
198
  "step": 900
199
  },
200
- {
201
- "epoch": 2.7377521613832854,
202
- "grad_norm": 1.0104094743728638,
203
- "learning_rate": 1.7483189241114314e-05,
204
- "loss": 0.2997,
205
- "step": 950
206
- },
207
  {
208
  "epoch": 2.881844380403458,
209
- "grad_norm": 1.434010624885559,
210
- "learning_rate": 7.87704130643612e-06,
211
- "loss": 0.2896,
212
  "step": 1000
213
  },
214
  {
215
  "epoch": 2.881844380403458,
216
- "eval_runtime": 226.8836,
217
- "eval_samples_per_second": 5.452,
218
- "eval_steps_per_second": 2.728,
219
  "step": 1000
220
  },
221
  {
222
  "epoch": 3.0,
223
  "step": 1041,
224
  "total_flos": 2.42193908736e+18,
225
- "train_loss": 0.3701638735680484,
226
- "train_runtime": 4045.1211,
227
- "train_samples_per_second": 2.057,
228
- "train_steps_per_second": 0.257
229
  }
230
  ],
231
- "logging_steps": 50,
232
  "max_steps": 1041,
233
  "num_input_tokens_seen": 0,
234
  "num_train_epochs": 3,
235
- "save_steps": 100,
236
  "stateful_callbacks": {
237
  "TrainerControl": {
238
  "args": {
 
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 3.0,
5
+ "eval_steps": 200,
6
  "global_step": 1041,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  {
12
  "epoch": 0.2881844380403458,
13
+ "grad_norm": 1.327895164489746,
14
+ "learning_rate": 0.0001815561959654179,
15
+ "loss": 3.1246,
16
  "step": 100
17
  },
 
 
 
 
 
 
 
18
  {
19
  "epoch": 0.5763688760806917,
20
+ "grad_norm": 1.4119809865951538,
21
+ "learning_rate": 0.00016234390009606147,
22
+ "loss": 0.8831,
23
  "step": 200
24
  },
25
  {
26
  "epoch": 0.5763688760806917,
27
+ "eval_runtime": 221.4793,
28
+ "eval_samples_per_second": 5.585,
29
+ "eval_steps_per_second": 2.795,
30
  "step": 200
31
  },
 
 
 
 
 
 
 
32
  {
33
  "epoch": 0.8645533141210374,
34
+ "grad_norm": 1.9445453882217407,
35
+ "learning_rate": 0.0001431316042267051,
36
+ "loss": 0.4528,
37
  "step": 300
38
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  {
40
  "epoch": 1.1527377521613833,
41
+ "grad_norm": 1.2831073999404907,
42
+ "learning_rate": 0.00012391930835734872,
43
+ "loss": 0.4009,
44
  "step": 400
45
  },
46
  {
47
  "epoch": 1.1527377521613833,
48
+ "eval_runtime": 222.2115,
49
+ "eval_samples_per_second": 5.567,
50
+ "eval_steps_per_second": 2.786,
51
  "step": 400
52
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  {
54
  "epoch": 1.440922190201729,
55
+ "grad_norm": 1.79501473903656,
56
+ "learning_rate": 0.00010470701248799233,
57
+ "loss": 0.368,
58
  "step": 500
59
  },
 
 
 
 
 
 
 
60
  {
61
  "epoch": 1.729106628242075,
62
+ "grad_norm": 1.340496301651001,
63
+ "learning_rate": 8.549471661863592e-05,
64
+ "loss": 0.3721,
65
  "step": 600
66
  },
67
  {
68
  "epoch": 1.729106628242075,
69
+ "eval_runtime": 223.5538,
70
+ "eval_samples_per_second": 5.533,
71
+ "eval_steps_per_second": 2.769,
72
  "step": 600
73
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  {
75
  "epoch": 2.0172910662824206,
76
+ "grad_norm": 1.4648982286453247,
77
+ "learning_rate": 6.628242074927953e-05,
78
+ "loss": 0.3737,
79
  "step": 700
80
  },
 
 
 
 
 
 
 
81
  {
82
  "epoch": 2.3054755043227666,
83
+ "grad_norm": 1.6813404560089111,
84
+ "learning_rate": 4.7070124879923156e-05,
85
+ "loss": 0.3306,
86
  "step": 800
87
  },
88
  {
89
  "epoch": 2.3054755043227666,
90
+ "eval_runtime": 224.219,
91
+ "eval_samples_per_second": 5.517,
92
+ "eval_steps_per_second": 2.761,
93
  "step": 800
94
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  {
96
  "epoch": 2.5936599423631126,
97
+ "grad_norm": 1.084306001663208,
98
+ "learning_rate": 2.7857829010566765e-05,
99
+ "loss": 0.3155,
100
  "step": 900
101
  },
 
 
 
 
 
 
 
102
  {
103
  "epoch": 2.881844380403458,
104
+ "grad_norm": 1.6043856143951416,
105
+ "learning_rate": 8.645533141210376e-06,
106
+ "loss": 0.3168,
107
  "step": 1000
108
  },
109
  {
110
  "epoch": 2.881844380403458,
111
+ "eval_runtime": 225.5438,
112
+ "eval_samples_per_second": 5.485,
113
+ "eval_steps_per_second": 2.744,
114
  "step": 1000
115
  },
116
  {
117
  "epoch": 3.0,
118
  "step": 1041,
119
  "total_flos": 2.42193908736e+18,
120
+ "train_loss": 0.6797876440482455,
121
+ "train_runtime": 2875.7206,
122
+ "train_samples_per_second": 2.893,
123
+ "train_steps_per_second": 0.362
124
  }
125
  ],
126
+ "logging_steps": 100,
127
  "max_steps": 1041,
128
  "num_input_tokens_seen": 0,
129
  "num_train_epochs": 3,
130
+ "save_steps": 200,
131
  "stateful_callbacks": {
132
  "TrainerControl": {
133
  "args": {
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ecfa4925538d5d694197138cbcc95396fedc580f5bf53b61a2b5623a82235f7
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10fe486164d02b5141ca201b6f0bd243d13bc3799a3938aa4a56750bdd9b4ffb
3
  size 5304