Training in progress, step 4000

Browse files

Files changed (3) hide show

config.json +21 -9
model.safetensors +2 -2
training_args.bin +1 -1

config.json CHANGED Viewed

@@ -4,7 +4,7 @@
   ],
   "bos_token_id": 50000,
   "decoder": {
-    "_attn_implementation_autoset": true,
     "_name_or_path": "fav-kky/gpt2-small-cs",
     "activation_function": "gelu",
     "add_cross_attention": true,
@@ -14,15 +14,21 @@
     "attn_pdrop": 0.1,
     "bad_words_ids": null,
     "begin_suppress_tokens": null,
-    "bos_token_id": 0,
     "chunk_size_feed_forward": 0,
     "cross_attention_hidden_size": null,
     "decoder_start_token_id": 50000,
     "diversity_penalty": 0.0,
     "do_sample": false,
     "early_stopping": false,
     "embd_pdrop": 0.1,
     "encoder_no_repeat_ngram_size": 0,
     "eos_token_id": 0,
     "exponential_decay_length_penalty": null,
     "finetuning_task": null,
@@ -99,7 +105,7 @@
   },
   "decoder_start_token_id": 50000,
   "encoder": {
-    "_attn_implementation_autoset": true,
     "_name_or_path": "fav-kky/wav2vec2-base-cs-80k-ClTRUS",
     "activation_dropout": 0.1,
     "adapter_attn_dim": null,
@@ -114,7 +120,7 @@
     "attention_dropout": 0.1,
     "bad_words_ids": null,
     "begin_suppress_tokens": null,
-    "bos_token_id": 1,
     "chunk_size_feed_forward": 0,
     "classifier_proj_size": 256,
     "codevector_dim": 256,
@@ -148,16 +154,22 @@
       2
     ],
     "cross_attention_hidden_size": null,
-    "ctc_loss_reduction": "mean",
     "ctc_zero_infinity": false,
     "decoder_start_token_id": null,
     "diversity_loss_weight": 0.1,
     "diversity_penalty": 0.0,
     "do_sample": false,
     "do_stable_layer_norm": false,
     "early_stopping": false,
     "encoder_no_repeat_ngram_size": 0,
-    "eos_token_id": 2,
     "exponential_decay_length_penalty": null,
     "feat_extract_activation": "gelu",
     "feat_extract_norm": "group",
@@ -183,7 +195,7 @@
       "LABEL_1": 1
     },
     "layer_norm_eps": 1e-05,
-    "layerdrop": 0.0,
     "length_penalty": 1.0,
     "mask_feature_length": 10,
     "mask_feature_min_masks": 0,
@@ -204,7 +216,7 @@
     "num_conv_pos_embedding_groups": 16,
     "num_conv_pos_embeddings": 128,
     "num_feat_extract_layers": 7,
-    "num_hidden_layers": 12,
     "num_negatives": 100,
     "num_return_sequences": 1,
     "output_attentions": false,
@@ -256,7 +268,7 @@
     "typical_p": 1.0,
     "use_bfloat16": false,
     "use_weighted_layer_sum": false,
-    "vocab_size": 50002,
     "xvector_output_dim": 512
   },
   "eos_token_id": 0,

   ],
   "bos_token_id": 50000,
   "decoder": {
+    "_attn_implementation_autoset": false,
     "_name_or_path": "fav-kky/gpt2-small-cs",
     "activation_function": "gelu",
     "add_cross_attention": true,
     "attn_pdrop": 0.1,
     "bad_words_ids": null,
     "begin_suppress_tokens": null,
+    "bos_token_id": 50000,
     "chunk_size_feed_forward": 0,
     "cross_attention_hidden_size": null,
+    "decoder_ignore_mismatched_sizes": true,
     "decoder_start_token_id": 50000,
+    "decoder_vocab_size": 50002,
     "diversity_penalty": 0.0,
     "do_sample": false,
     "early_stopping": false,
     "embd_pdrop": 0.1,
+    "encoder_ctc_loss_reduction": "mean",
+    "encoder_layerdrop": 0.0,
     "encoder_no_repeat_ngram_size": 0,
+    "encoder_pad_token_id": 50001,
+    "encoder_vocab_size": 50002,
     "eos_token_id": 0,
     "exponential_decay_length_penalty": null,
     "finetuning_task": null,
   },
   "decoder_start_token_id": 50000,
   "encoder": {
+    "_attn_implementation_autoset": false,
     "_name_or_path": "fav-kky/wav2vec2-base-cs-80k-ClTRUS",
     "activation_dropout": 0.1,
     "adapter_attn_dim": null,
     "attention_dropout": 0.1,
     "bad_words_ids": null,
     "begin_suppress_tokens": null,
+    "bos_token_id": 50000,
     "chunk_size_feed_forward": 0,
     "classifier_proj_size": 256,
     "codevector_dim": 256,
       2
     ],
     "cross_attention_hidden_size": null,
+    "ctc_loss_reduction": "sum",
     "ctc_zero_infinity": false,
+    "decoder_ignore_mismatched_sizes": true,
     "decoder_start_token_id": null,
+    "decoder_vocab_size": 50002,
     "diversity_loss_weight": 0.1,
     "diversity_penalty": 0.0,
     "do_sample": false,
     "do_stable_layer_norm": false,
     "early_stopping": false,
+    "encoder_ctc_loss_reduction": "mean",
+    "encoder_layerdrop": 0.0,
     "encoder_no_repeat_ngram_size": 0,
+    "encoder_pad_token_id": 50001,
+    "encoder_vocab_size": 50002,
+    "eos_token_id": 0,
     "exponential_decay_length_penalty": null,
     "feat_extract_activation": "gelu",
     "feat_extract_norm": "group",
       "LABEL_1": 1
     },
     "layer_norm_eps": 1e-05,
+    "layerdrop": 0.1,
     "length_penalty": 1.0,
     "mask_feature_length": 10,
     "mask_feature_min_masks": 0,
     "num_conv_pos_embedding_groups": 16,
     "num_conv_pos_embeddings": 128,
     "num_feat_extract_layers": 7,
+    "num_hidden_layers": 6,
     "num_negatives": 100,
     "num_return_sequences": 1,
     "output_attentions": false,
     "typical_p": 1.0,
     "use_bfloat16": false,
     "use_weighted_layer_sum": false,
+    "vocab_size": 32,
     "xvector_output_dim": 512
   },
   "eos_token_id": 0,

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3b2b4e8b577ef56e4853a104918fd58948170bca1439f5c8bfe6e78d5f25a198
-size 550866768

 version https://git-lfs.github.com/spec/v1
+oid sha256:692f4ba8c00e86f69bd51288c81e3adc5f18e6554d3c1e877deeb232ca6388b3
+size 380746176

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:27af53f70960823efb4ce8633d977fdc7b9ebaa9d22dc3c29a5337512a7c07ef
 size 5624

 version https://git-lfs.github.com/spec/v1
+oid sha256:c43c2bc9c5d15ca69f1d4a0bd25b0b8b8d8b1d87aeacfd821546afbf1f2d0645
 size 5624