{ "_name_or_path": "", "activation_dropout": 0.15, "adapter_attn_dim": 256, "adapter_kernel_size": 5, "adapter_stride": 2, "add_adapter": true, "apply_spec_augment": true, "architectures": [ "Wav2Vec2ForHierarchicalClassification" ], "attention_dropout": 0.12, "bos_token_id": 1, "classifier_proj_size": 512, "codevector_dim": 384, "contrastive_logits_temperature": 0.07, "conv_bias": true, "conv_dim": [ 768, 768, 896, 896, 1024, 1024, 1024 ], "conv_kernel": [ 10, 5, 5, 3, 3, 2, 2 ], "conv_stride": [ 5, 2, 2, 2, 2, 2, 2 ], "ctc_loss_reduction": "sum", "ctc_zero_infinity": true, "diversity_loss_weight": 0.15, "do_stable_layer_norm": true, "eos_token_id": 2, "feat_extract_activation": "mish", "feat_extract_norm": "layer", "feat_proj_dropout": 0.15, "feat_quantizer_dropout": 0.05, "final_dropout": 0.1, "freeze_feat_extract_train": false, "hidden_act": "quick_gelu", "hidden_dropout": 0.12, "hidden_size": 1024, "id2label": { "0": "synthetic", "1": "authentic" }, "initializer_range": 0.02, "intermediate_size": 4096, "label2id": { "synthetic": "0", "authentic": "1" }, "layer_norm_eps": 1e-06, "layerdrop": 0.05, "mask_channel_length": 64, "mask_channel_min_space": 1, "mask_channel_other": 0.0, "mask_channel_prob": 0.1, "mask_channel_selection": "dynamic", "mask_feature_length": 64, "mask_feature_min_masks": 2, "mask_feature_prob": 0.1, "mask_time_length": 10, "mask_time_min_masks": 2, "mask_time_min_space": 2, "mask_time_other": 0.0, "mask_time_prob": 0.08, "mask_time_selection": "dynamic", "model_type": "wav2vec2", "no_mask_channel_overlap": true, "no_mask_time_overlap": true, "num_adapter_layers": 4, "num_attention_heads": 16, "num_codevector_groups": 4, "num_codevectors_per_group": 480, "num_conv_pos_embedding_groups": 32, "num_conv_pos_embeddings": 256, "num_feat_extract_layers": 7, "num_hidden_layers": 24, "num_negatives": 150, "output_hidden_size": 1024, "pad_token_id": 0, "proj_codevector_dim": 384, "tdnn_dilation": [ 1, 2, 3, 4, 1 ], "tdnn_dim": [ 768, 768, 896, 896, 1500 ], "tdnn_kernel": [ 5, 3, 3, 3, 1 ], "torch_dtype": "float32", "transformers_version": "4.39.3", "use_weighted_layer_sum": true, "vocab_size": 32, "xvector_output_dim": 768, "advanced_config": { "attention_type": "multihead_relative", "positional_encoding": "rotary", "layer_norm_type": "apex", "activation_checkpointing": true, "gradient_checkpointing": true, "mixed_precision_training": true, "optimization": { "kernel_fusion": true, "memory_efficient_attention": true, "flash_attention": true, "activation_recomputation": true, "dynamic_padding": true }, "regularization": { "stochastic_depth_rate": 0.1, "label_smoothing": 0.1, "mixup_alpha": 0.2, "gradient_clip_norm": 1.0 }, "training_dynamics": { "loss_scaling": "dynamic", "gradient_accumulation_steps": 4, "batch_size_scaling": true, "adaptive_learning_rate": true } } }