Spaces:
Sleeping
Sleeping
update
Browse files
examples/vm_sound_classification/conv2d_classifier.yaml
CHANGED
|
@@ -13,21 +13,21 @@ mel_spectrogram_param:
|
|
| 13 |
conv2d_block_param_list:
|
| 14 |
- batch_norm: true
|
| 15 |
in_channels: 1
|
| 16 |
-
out_channels:
|
| 17 |
kernel_size: 3
|
| 18 |
stride: 1
|
| 19 |
dilation: 3
|
| 20 |
activation: relu
|
| 21 |
dropout: 0.1
|
| 22 |
-
- in_channels:
|
| 23 |
-
out_channels:
|
| 24 |
kernel_size: 5
|
| 25 |
stride: 2
|
| 26 |
dilation: 3
|
| 27 |
activation: relu
|
| 28 |
dropout: 0.1
|
| 29 |
-
- in_channels:
|
| 30 |
-
out_channels:
|
| 31 |
kernel_size: 3
|
| 32 |
stride: 1
|
| 33 |
dilation: 2
|
|
@@ -35,7 +35,7 @@ conv2d_block_param_list:
|
|
| 35 |
dropout: 0.1
|
| 36 |
|
| 37 |
cls_head_param:
|
| 38 |
-
input_dim:
|
| 39 |
num_layers: 2
|
| 40 |
hidden_dims:
|
| 41 |
- 128
|
|
|
|
| 13 |
conv2d_block_param_list:
|
| 14 |
- batch_norm: true
|
| 15 |
in_channels: 1
|
| 16 |
+
out_channels: 32
|
| 17 |
kernel_size: 3
|
| 18 |
stride: 1
|
| 19 |
dilation: 3
|
| 20 |
activation: relu
|
| 21 |
dropout: 0.1
|
| 22 |
+
- in_channels: 32
|
| 23 |
+
out_channels: 32
|
| 24 |
kernel_size: 5
|
| 25 |
stride: 2
|
| 26 |
dilation: 3
|
| 27 |
activation: relu
|
| 28 |
dropout: 0.1
|
| 29 |
+
- in_channels: 32
|
| 30 |
+
out_channels: 32
|
| 31 |
kernel_size: 3
|
| 32 |
stride: 1
|
| 33 |
dilation: 2
|
|
|
|
| 35 |
dropout: 0.1
|
| 36 |
|
| 37 |
cls_head_param:
|
| 38 |
+
input_dim: 864
|
| 39 |
num_layers: 2
|
| 40 |
hidden_dims:
|
| 41 |
- 128
|
examples/vm_sound_classification/run.sh
CHANGED
|
@@ -13,7 +13,7 @@ E:/programmer/asr_datasets/voicemail/wav_finished/id-ID/wav_finished/*/*.wav" \
|
|
| 13 |
sh run.sh --stage 2 --stop_stage 2 --system_version windows --file_folder_name file_dir --final_model_name vm_sound_classification3
|
| 14 |
sh run.sh --stage 3 --stop_stage 3 --system_version windows --file_folder_name file_dir --final_model_name vm_sound_classification3
|
| 15 |
|
| 16 |
-
sh run.sh --stage 0 --stop_stage 5 --system_version centos --file_folder_name file_dir --final_model_name vm_sound_classification8-
|
| 17 |
--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav"
|
| 18 |
|
| 19 |
|
|
|
|
| 13 |
sh run.sh --stage 2 --stop_stage 2 --system_version windows --file_folder_name file_dir --final_model_name vm_sound_classification3
|
| 14 |
sh run.sh --stage 3 --stop_stage 3 --system_version windows --file_folder_name file_dir --final_model_name vm_sound_classification3
|
| 15 |
|
| 16 |
+
sh run.sh --stage 0 --stop_stage 5 --system_version centos --file_folder_name file_dir --final_model_name vm_sound_classification8-ch32 \
|
| 17 |
--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav"
|
| 18 |
|
| 19 |
|
examples/vm_sound_classification/step_3_train_model.py
CHANGED
|
@@ -181,7 +181,7 @@ def main():
|
|
| 181 |
# )
|
| 182 |
lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
|
| 183 |
optimizer,
|
| 184 |
-
milestones=[10000, 20000, 30000], gamma=0.5
|
| 185 |
)
|
| 186 |
focal_loss = FocalLoss(
|
| 187 |
num_classes=vocabulary.get_vocab_size(namespace="labels"),
|
|
|
|
| 181 |
# )
|
| 182 |
lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
|
| 183 |
optimizer,
|
| 184 |
+
milestones=[10000, 20000, 30000, 40000, 50000], gamma=0.5
|
| 185 |
)
|
| 186 |
focal_loss = FocalLoss(
|
| 187 |
num_classes=vocabulary.get_vocab_size(namespace="labels"),
|