TheRamsay commited on
Commit
3f57cc7
·
verified ·
1 Parent(s): e0dc240

Training in progress, step 4000

Browse files
Files changed (3) hide show
  1. config.json +21 -9
  2. model.safetensors +2 -2
  3. training_args.bin +1 -1
config.json CHANGED
@@ -4,7 +4,7 @@
4
  ],
5
  "bos_token_id": 50000,
6
  "decoder": {
7
- "_attn_implementation_autoset": true,
8
  "_name_or_path": "fav-kky/gpt2-small-cs",
9
  "activation_function": "gelu",
10
  "add_cross_attention": true,
@@ -14,15 +14,21 @@
14
  "attn_pdrop": 0.1,
15
  "bad_words_ids": null,
16
  "begin_suppress_tokens": null,
17
- "bos_token_id": 0,
18
  "chunk_size_feed_forward": 0,
19
  "cross_attention_hidden_size": null,
 
20
  "decoder_start_token_id": 50000,
 
21
  "diversity_penalty": 0.0,
22
  "do_sample": false,
23
  "early_stopping": false,
24
  "embd_pdrop": 0.1,
 
 
25
  "encoder_no_repeat_ngram_size": 0,
 
 
26
  "eos_token_id": 0,
27
  "exponential_decay_length_penalty": null,
28
  "finetuning_task": null,
@@ -99,7 +105,7 @@
99
  },
100
  "decoder_start_token_id": 50000,
101
  "encoder": {
102
- "_attn_implementation_autoset": true,
103
  "_name_or_path": "fav-kky/wav2vec2-base-cs-80k-ClTRUS",
104
  "activation_dropout": 0.1,
105
  "adapter_attn_dim": null,
@@ -114,7 +120,7 @@
114
  "attention_dropout": 0.1,
115
  "bad_words_ids": null,
116
  "begin_suppress_tokens": null,
117
- "bos_token_id": 1,
118
  "chunk_size_feed_forward": 0,
119
  "classifier_proj_size": 256,
120
  "codevector_dim": 256,
@@ -148,16 +154,22 @@
148
  2
149
  ],
150
  "cross_attention_hidden_size": null,
151
- "ctc_loss_reduction": "mean",
152
  "ctc_zero_infinity": false,
 
153
  "decoder_start_token_id": null,
 
154
  "diversity_loss_weight": 0.1,
155
  "diversity_penalty": 0.0,
156
  "do_sample": false,
157
  "do_stable_layer_norm": false,
158
  "early_stopping": false,
 
 
159
  "encoder_no_repeat_ngram_size": 0,
160
- "eos_token_id": 2,
 
 
161
  "exponential_decay_length_penalty": null,
162
  "feat_extract_activation": "gelu",
163
  "feat_extract_norm": "group",
@@ -183,7 +195,7 @@
183
  "LABEL_1": 1
184
  },
185
  "layer_norm_eps": 1e-05,
186
- "layerdrop": 0.0,
187
  "length_penalty": 1.0,
188
  "mask_feature_length": 10,
189
  "mask_feature_min_masks": 0,
@@ -204,7 +216,7 @@
204
  "num_conv_pos_embedding_groups": 16,
205
  "num_conv_pos_embeddings": 128,
206
  "num_feat_extract_layers": 7,
207
- "num_hidden_layers": 12,
208
  "num_negatives": 100,
209
  "num_return_sequences": 1,
210
  "output_attentions": false,
@@ -256,7 +268,7 @@
256
  "typical_p": 1.0,
257
  "use_bfloat16": false,
258
  "use_weighted_layer_sum": false,
259
- "vocab_size": 50002,
260
  "xvector_output_dim": 512
261
  },
262
  "eos_token_id": 0,
 
4
  ],
5
  "bos_token_id": 50000,
6
  "decoder": {
7
+ "_attn_implementation_autoset": false,
8
  "_name_or_path": "fav-kky/gpt2-small-cs",
9
  "activation_function": "gelu",
10
  "add_cross_attention": true,
 
14
  "attn_pdrop": 0.1,
15
  "bad_words_ids": null,
16
  "begin_suppress_tokens": null,
17
+ "bos_token_id": 50000,
18
  "chunk_size_feed_forward": 0,
19
  "cross_attention_hidden_size": null,
20
+ "decoder_ignore_mismatched_sizes": true,
21
  "decoder_start_token_id": 50000,
22
+ "decoder_vocab_size": 50002,
23
  "diversity_penalty": 0.0,
24
  "do_sample": false,
25
  "early_stopping": false,
26
  "embd_pdrop": 0.1,
27
+ "encoder_ctc_loss_reduction": "mean",
28
+ "encoder_layerdrop": 0.0,
29
  "encoder_no_repeat_ngram_size": 0,
30
+ "encoder_pad_token_id": 50001,
31
+ "encoder_vocab_size": 50002,
32
  "eos_token_id": 0,
33
  "exponential_decay_length_penalty": null,
34
  "finetuning_task": null,
 
105
  },
106
  "decoder_start_token_id": 50000,
107
  "encoder": {
108
+ "_attn_implementation_autoset": false,
109
  "_name_or_path": "fav-kky/wav2vec2-base-cs-80k-ClTRUS",
110
  "activation_dropout": 0.1,
111
  "adapter_attn_dim": null,
 
120
  "attention_dropout": 0.1,
121
  "bad_words_ids": null,
122
  "begin_suppress_tokens": null,
123
+ "bos_token_id": 50000,
124
  "chunk_size_feed_forward": 0,
125
  "classifier_proj_size": 256,
126
  "codevector_dim": 256,
 
154
  2
155
  ],
156
  "cross_attention_hidden_size": null,
157
+ "ctc_loss_reduction": "sum",
158
  "ctc_zero_infinity": false,
159
+ "decoder_ignore_mismatched_sizes": true,
160
  "decoder_start_token_id": null,
161
+ "decoder_vocab_size": 50002,
162
  "diversity_loss_weight": 0.1,
163
  "diversity_penalty": 0.0,
164
  "do_sample": false,
165
  "do_stable_layer_norm": false,
166
  "early_stopping": false,
167
+ "encoder_ctc_loss_reduction": "mean",
168
+ "encoder_layerdrop": 0.0,
169
  "encoder_no_repeat_ngram_size": 0,
170
+ "encoder_pad_token_id": 50001,
171
+ "encoder_vocab_size": 50002,
172
+ "eos_token_id": 0,
173
  "exponential_decay_length_penalty": null,
174
  "feat_extract_activation": "gelu",
175
  "feat_extract_norm": "group",
 
195
  "LABEL_1": 1
196
  },
197
  "layer_norm_eps": 1e-05,
198
+ "layerdrop": 0.1,
199
  "length_penalty": 1.0,
200
  "mask_feature_length": 10,
201
  "mask_feature_min_masks": 0,
 
216
  "num_conv_pos_embedding_groups": 16,
217
  "num_conv_pos_embeddings": 128,
218
  "num_feat_extract_layers": 7,
219
+ "num_hidden_layers": 6,
220
  "num_negatives": 100,
221
  "num_return_sequences": 1,
222
  "output_attentions": false,
 
268
  "typical_p": 1.0,
269
  "use_bfloat16": false,
270
  "use_weighted_layer_sum": false,
271
+ "vocab_size": 32,
272
  "xvector_output_dim": 512
273
  },
274
  "eos_token_id": 0,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b2b4e8b577ef56e4853a104918fd58948170bca1439f5c8bfe6e78d5f25a198
3
- size 550866768
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:692f4ba8c00e86f69bd51288c81e3adc5f18e6554d3c1e877deeb232ca6388b3
3
+ size 380746176
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27af53f70960823efb4ce8633d977fdc7b9ebaa9d22dc3c29a5337512a7c07ef
3
  size 5624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c43c2bc9c5d15ca69f1d4a0bd25b0b8b8d8b1d87aeacfd821546afbf1f2d0645
3
  size 5624