Training in progress, step 4000
Browse files- config.json +21 -9
- model.safetensors +2 -2
- training_args.bin +1 -1
config.json
CHANGED
@@ -4,7 +4,7 @@
|
|
4 |
],
|
5 |
"bos_token_id": 50000,
|
6 |
"decoder": {
|
7 |
-
"_attn_implementation_autoset":
|
8 |
"_name_or_path": "fav-kky/gpt2-small-cs",
|
9 |
"activation_function": "gelu",
|
10 |
"add_cross_attention": true,
|
@@ -14,15 +14,21 @@
|
|
14 |
"attn_pdrop": 0.1,
|
15 |
"bad_words_ids": null,
|
16 |
"begin_suppress_tokens": null,
|
17 |
-
"bos_token_id":
|
18 |
"chunk_size_feed_forward": 0,
|
19 |
"cross_attention_hidden_size": null,
|
|
|
20 |
"decoder_start_token_id": 50000,
|
|
|
21 |
"diversity_penalty": 0.0,
|
22 |
"do_sample": false,
|
23 |
"early_stopping": false,
|
24 |
"embd_pdrop": 0.1,
|
|
|
|
|
25 |
"encoder_no_repeat_ngram_size": 0,
|
|
|
|
|
26 |
"eos_token_id": 0,
|
27 |
"exponential_decay_length_penalty": null,
|
28 |
"finetuning_task": null,
|
@@ -99,7 +105,7 @@
|
|
99 |
},
|
100 |
"decoder_start_token_id": 50000,
|
101 |
"encoder": {
|
102 |
-
"_attn_implementation_autoset":
|
103 |
"_name_or_path": "fav-kky/wav2vec2-base-cs-80k-ClTRUS",
|
104 |
"activation_dropout": 0.1,
|
105 |
"adapter_attn_dim": null,
|
@@ -114,7 +120,7 @@
|
|
114 |
"attention_dropout": 0.1,
|
115 |
"bad_words_ids": null,
|
116 |
"begin_suppress_tokens": null,
|
117 |
-
"bos_token_id":
|
118 |
"chunk_size_feed_forward": 0,
|
119 |
"classifier_proj_size": 256,
|
120 |
"codevector_dim": 256,
|
@@ -148,16 +154,22 @@
|
|
148 |
2
|
149 |
],
|
150 |
"cross_attention_hidden_size": null,
|
151 |
-
"ctc_loss_reduction": "
|
152 |
"ctc_zero_infinity": false,
|
|
|
153 |
"decoder_start_token_id": null,
|
|
|
154 |
"diversity_loss_weight": 0.1,
|
155 |
"diversity_penalty": 0.0,
|
156 |
"do_sample": false,
|
157 |
"do_stable_layer_norm": false,
|
158 |
"early_stopping": false,
|
|
|
|
|
159 |
"encoder_no_repeat_ngram_size": 0,
|
160 |
-
"
|
|
|
|
|
161 |
"exponential_decay_length_penalty": null,
|
162 |
"feat_extract_activation": "gelu",
|
163 |
"feat_extract_norm": "group",
|
@@ -183,7 +195,7 @@
|
|
183 |
"LABEL_1": 1
|
184 |
},
|
185 |
"layer_norm_eps": 1e-05,
|
186 |
-
"layerdrop": 0.
|
187 |
"length_penalty": 1.0,
|
188 |
"mask_feature_length": 10,
|
189 |
"mask_feature_min_masks": 0,
|
@@ -204,7 +216,7 @@
|
|
204 |
"num_conv_pos_embedding_groups": 16,
|
205 |
"num_conv_pos_embeddings": 128,
|
206 |
"num_feat_extract_layers": 7,
|
207 |
-
"num_hidden_layers":
|
208 |
"num_negatives": 100,
|
209 |
"num_return_sequences": 1,
|
210 |
"output_attentions": false,
|
@@ -256,7 +268,7 @@
|
|
256 |
"typical_p": 1.0,
|
257 |
"use_bfloat16": false,
|
258 |
"use_weighted_layer_sum": false,
|
259 |
-
"vocab_size":
|
260 |
"xvector_output_dim": 512
|
261 |
},
|
262 |
"eos_token_id": 0,
|
|
|
4 |
],
|
5 |
"bos_token_id": 50000,
|
6 |
"decoder": {
|
7 |
+
"_attn_implementation_autoset": false,
|
8 |
"_name_or_path": "fav-kky/gpt2-small-cs",
|
9 |
"activation_function": "gelu",
|
10 |
"add_cross_attention": true,
|
|
|
14 |
"attn_pdrop": 0.1,
|
15 |
"bad_words_ids": null,
|
16 |
"begin_suppress_tokens": null,
|
17 |
+
"bos_token_id": 50000,
|
18 |
"chunk_size_feed_forward": 0,
|
19 |
"cross_attention_hidden_size": null,
|
20 |
+
"decoder_ignore_mismatched_sizes": true,
|
21 |
"decoder_start_token_id": 50000,
|
22 |
+
"decoder_vocab_size": 50002,
|
23 |
"diversity_penalty": 0.0,
|
24 |
"do_sample": false,
|
25 |
"early_stopping": false,
|
26 |
"embd_pdrop": 0.1,
|
27 |
+
"encoder_ctc_loss_reduction": "mean",
|
28 |
+
"encoder_layerdrop": 0.0,
|
29 |
"encoder_no_repeat_ngram_size": 0,
|
30 |
+
"encoder_pad_token_id": 50001,
|
31 |
+
"encoder_vocab_size": 50002,
|
32 |
"eos_token_id": 0,
|
33 |
"exponential_decay_length_penalty": null,
|
34 |
"finetuning_task": null,
|
|
|
105 |
},
|
106 |
"decoder_start_token_id": 50000,
|
107 |
"encoder": {
|
108 |
+
"_attn_implementation_autoset": false,
|
109 |
"_name_or_path": "fav-kky/wav2vec2-base-cs-80k-ClTRUS",
|
110 |
"activation_dropout": 0.1,
|
111 |
"adapter_attn_dim": null,
|
|
|
120 |
"attention_dropout": 0.1,
|
121 |
"bad_words_ids": null,
|
122 |
"begin_suppress_tokens": null,
|
123 |
+
"bos_token_id": 50000,
|
124 |
"chunk_size_feed_forward": 0,
|
125 |
"classifier_proj_size": 256,
|
126 |
"codevector_dim": 256,
|
|
|
154 |
2
|
155 |
],
|
156 |
"cross_attention_hidden_size": null,
|
157 |
+
"ctc_loss_reduction": "sum",
|
158 |
"ctc_zero_infinity": false,
|
159 |
+
"decoder_ignore_mismatched_sizes": true,
|
160 |
"decoder_start_token_id": null,
|
161 |
+
"decoder_vocab_size": 50002,
|
162 |
"diversity_loss_weight": 0.1,
|
163 |
"diversity_penalty": 0.0,
|
164 |
"do_sample": false,
|
165 |
"do_stable_layer_norm": false,
|
166 |
"early_stopping": false,
|
167 |
+
"encoder_ctc_loss_reduction": "mean",
|
168 |
+
"encoder_layerdrop": 0.0,
|
169 |
"encoder_no_repeat_ngram_size": 0,
|
170 |
+
"encoder_pad_token_id": 50001,
|
171 |
+
"encoder_vocab_size": 50002,
|
172 |
+
"eos_token_id": 0,
|
173 |
"exponential_decay_length_penalty": null,
|
174 |
"feat_extract_activation": "gelu",
|
175 |
"feat_extract_norm": "group",
|
|
|
195 |
"LABEL_1": 1
|
196 |
},
|
197 |
"layer_norm_eps": 1e-05,
|
198 |
+
"layerdrop": 0.1,
|
199 |
"length_penalty": 1.0,
|
200 |
"mask_feature_length": 10,
|
201 |
"mask_feature_min_masks": 0,
|
|
|
216 |
"num_conv_pos_embedding_groups": 16,
|
217 |
"num_conv_pos_embeddings": 128,
|
218 |
"num_feat_extract_layers": 7,
|
219 |
+
"num_hidden_layers": 6,
|
220 |
"num_negatives": 100,
|
221 |
"num_return_sequences": 1,
|
222 |
"output_attentions": false,
|
|
|
268 |
"typical_p": 1.0,
|
269 |
"use_bfloat16": false,
|
270 |
"use_weighted_layer_sum": false,
|
271 |
+
"vocab_size": 32,
|
272 |
"xvector_output_dim": 512
|
273 |
},
|
274 |
"eos_token_id": 0,
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:692f4ba8c00e86f69bd51288c81e3adc5f18e6554d3c1e877deeb232ca6388b3
|
3 |
+
size 380746176
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5624
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c43c2bc9c5d15ca69f1d4a0bd25b0b8b8d8b1d87aeacfd821546afbf1f2d0645
|
3 |
size 5624
|