Spaces:
Sleeping
Sleeping
add yaml config
Browse files- examples/sample_filter/test1.py +25 -11
- examples/sample_filter/test2.py +78 -0
- examples/sample_filter/test4.py +76 -0
- examples/vm_sound_classification/run.sh +12 -7
- examples/vm_sound_classification/run_batch.sh +8 -0
- examples/vm_sound_classification/yaml/conv2d-classifier-2-ch16.yaml +45 -0
- examples/vm_sound_classification/yaml/conv2d-classifier-2-ch32.yaml +45 -0
- examples/vm_sound_classification/{conv2d_classifier.yaml → yaml/conv2d-classifier-2-ch4.yaml} +0 -0
- examples/vm_sound_classification/yaml/conv2d-classifier-2-ch8.yaml +45 -0
- examples/vm_sound_classification/yaml/conv2d-classifier-3-ch16.yaml +45 -0
- examples/vm_sound_classification/yaml/conv2d-classifier-3-ch32.yaml +45 -0
- examples/vm_sound_classification/yaml/conv2d-classifier-3-ch4.yaml +45 -0
- examples/vm_sound_classification/yaml/conv2d-classifier-3-ch8.yaml +45 -0
- examples/vm_sound_classification/yaml/conv2d-classifier-4-ch16.yaml +45 -0
- examples/vm_sound_classification/yaml/conv2d-classifier-4-ch32.yaml +45 -0
- examples/vm_sound_classification/yaml/conv2d-classifier-4-ch4.yaml +45 -0
- examples/vm_sound_classification/yaml/conv2d-classifier-4-ch8.yaml +45 -0
- examples/vm_sound_classification/yaml/conv2d-classifier-8-ch16.yaml +45 -0
- examples/vm_sound_classification/yaml/conv2d-classifier-8-ch32.yaml +45 -0
- examples/vm_sound_classification/yaml/conv2d-classifier-8-ch4.yaml +45 -0
- examples/vm_sound_classification/yaml/conv2d-classifier-8-ch8.yaml +45 -0
- main.py +8 -7
examples/sample_filter/test1.py
CHANGED
@@ -12,7 +12,9 @@ def get_args():
|
|
12 |
parser = argparse.ArgumentParser()
|
13 |
parser.add_argument(
|
14 |
"--data_dir",
|
15 |
-
default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\
|
|
|
|
|
16 |
type=str
|
17 |
)
|
18 |
parser.add_argument(
|
@@ -40,28 +42,40 @@ def main():
|
|
40 |
|
41 |
client = Client("http://127.0.0.1:7864/")
|
42 |
|
43 |
-
for filename in tqdm(data_dir.glob("*.wav")):
|
|
|
|
|
44 |
filename = filename.as_posix()
|
45 |
|
46 |
-
|
47 |
audio=handle_file(filename),
|
48 |
-
model_name="vm_sound_classification8-ch32",
|
|
|
49 |
ground_true="Hello!!",
|
50 |
api_name="/click_button"
|
51 |
)
|
52 |
-
|
53 |
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
else:
|
|
|
60 |
shutil.move(
|
61 |
filename,
|
62 |
keep_dir.as_posix(),
|
63 |
)
|
64 |
-
|
65 |
return
|
66 |
|
67 |
|
|
|
12 |
parser = argparse.ArgumentParser()
|
13 |
parser.add_argument(
|
14 |
"--data_dir",
|
15 |
+
# default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\data",
|
16 |
+
# default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\temp-3\temp\VoiceAppVoicemailDetection-1",
|
17 |
+
default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\transfer",
|
18 |
type=str
|
19 |
)
|
20 |
parser.add_argument(
|
|
|
42 |
|
43 |
client = Client("http://127.0.0.1:7864/")
|
44 |
|
45 |
+
for idx, filename in tqdm(enumerate(data_dir.glob("*.wav"))):
|
46 |
+
# if idx < 639:
|
47 |
+
# continue
|
48 |
filename = filename.as_posix()
|
49 |
|
50 |
+
label1, prob1 = client.predict(
|
51 |
audio=handle_file(filename),
|
52 |
+
# model_name="vm_sound_classification8-ch32",
|
53 |
+
model_name="voicemail-ms-my-2-ch32",
|
54 |
ground_true="Hello!!",
|
55 |
api_name="/click_button"
|
56 |
)
|
57 |
+
prob1 = float(prob1)
|
58 |
|
59 |
+
label2, prob2 = client.predict(
|
60 |
+
audio=handle_file(filename),
|
61 |
+
# model_name="vm_sound_classification8-ch32",
|
62 |
+
model_name="sound-8-ch32",
|
63 |
+
ground_true="Hello!!",
|
64 |
+
api_name="/click_button"
|
65 |
+
)
|
66 |
+
prob2 = float(prob2)
|
67 |
+
|
68 |
+
if label1 == "voicemail" and label2 in ("voicemail", "bell") and prob2 > 0.6:
|
69 |
+
pass
|
70 |
+
elif label1 == "non_voicemail" and label2 not in ("voicemail", "bell") and prob2 > 0.6:
|
71 |
+
pass
|
72 |
else:
|
73 |
+
print(f"label1: {label1}, prob1: {prob1}, label2: {label2}, prob2: {prob2}")
|
74 |
shutil.move(
|
75 |
filename,
|
76 |
keep_dir.as_posix(),
|
77 |
)
|
78 |
+
# exit(0)
|
79 |
return
|
80 |
|
81 |
|
examples/sample_filter/test2.py
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import argparse
|
4 |
+
from pathlib import Path
|
5 |
+
import shutil
|
6 |
+
|
7 |
+
from gradio_client import Client, handle_file
|
8 |
+
from tqdm import tqdm
|
9 |
+
|
10 |
+
from project_settings import project_path
|
11 |
+
|
12 |
+
|
13 |
+
def get_args():
|
14 |
+
parser = argparse.ArgumentParser()
|
15 |
+
parser.add_argument(
|
16 |
+
"--data_dir",
|
17 |
+
# default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\data-1",
|
18 |
+
# default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\temp-2\temp\VoiceAppVoicemailDetection-1",
|
19 |
+
# default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\temp-3\temp\VoiceAppVoicemailDetection-1",
|
20 |
+
# default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\temp-4\temp\VoiceAppVoicemailDetection-1",
|
21 |
+
default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\transfer",
|
22 |
+
type=str
|
23 |
+
)
|
24 |
+
parser.add_argument(
|
25 |
+
"--keep_dir",
|
26 |
+
default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\keep-3",
|
27 |
+
type=str
|
28 |
+
)
|
29 |
+
parser.add_argument(
|
30 |
+
"--trash_dir",
|
31 |
+
default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\trash",
|
32 |
+
type=str
|
33 |
+
)
|
34 |
+
args = parser.parse_args()
|
35 |
+
return args
|
36 |
+
|
37 |
+
|
38 |
+
def main():
|
39 |
+
args = get_args()
|
40 |
+
|
41 |
+
data_dir = Path(args.data_dir)
|
42 |
+
keep_dir = Path(args.keep_dir)
|
43 |
+
keep_dir.mkdir(parents=True, exist_ok=True)
|
44 |
+
trash_dir = Path(args.trash_dir)
|
45 |
+
trash_dir.mkdir(parents=True, exist_ok=True)
|
46 |
+
|
47 |
+
client = Client("http://127.0.0.1:7864/")
|
48 |
+
|
49 |
+
for idx, filename in tqdm(enumerate(data_dir.glob("*.wav"))):
|
50 |
+
if idx < 200:
|
51 |
+
continue
|
52 |
+
filename = filename.as_posix()
|
53 |
+
|
54 |
+
label1, prob1 = client.predict(
|
55 |
+
audio=handle_file(filename),
|
56 |
+
# model_name="vm_sound_classification8-ch32",
|
57 |
+
model_name="voicemail-ms-my-2-ch32",
|
58 |
+
ground_true="Hello!!",
|
59 |
+
api_name="/click_button"
|
60 |
+
)
|
61 |
+
prob1 = float(prob1)
|
62 |
+
print(f"label: {label1}, prob: {prob1}")
|
63 |
+
|
64 |
+
if label1 == "voicemail" and prob1 < 0.95:
|
65 |
+
shutil.move(
|
66 |
+
filename,
|
67 |
+
keep_dir.as_posix(),
|
68 |
+
)
|
69 |
+
elif label1 != "voicemail" and prob1 < 0.85:
|
70 |
+
shutil.move(
|
71 |
+
filename,
|
72 |
+
keep_dir.as_posix(),
|
73 |
+
)
|
74 |
+
return
|
75 |
+
|
76 |
+
|
77 |
+
if __name__ == '__main__':
|
78 |
+
main()
|
examples/sample_filter/test4.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import argparse
|
4 |
+
import os
|
5 |
+
from pathlib import Path
|
6 |
+
import shutil
|
7 |
+
|
8 |
+
import pandas as pd
|
9 |
+
from gradio_client import Client, handle_file
|
10 |
+
from tqdm import tqdm
|
11 |
+
|
12 |
+
from project_settings import project_path
|
13 |
+
|
14 |
+
|
15 |
+
def get_args():
|
16 |
+
parser = argparse.ArgumentParser()
|
17 |
+
parser.add_argument(
|
18 |
+
"--task_file",
|
19 |
+
default=r"task_DcTask_1_MY_LIVE_20250109_20250109-1.xlsx",
|
20 |
+
type=str
|
21 |
+
)
|
22 |
+
parser.add_argument(
|
23 |
+
"--wav_dir",
|
24 |
+
# default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\data-1",
|
25 |
+
# default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\temp-2\temp\VoiceAppVoicemailDetection-1",
|
26 |
+
# default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\temp-3\temp\VoiceAppVoicemailDetection-1",
|
27 |
+
default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\temp-4\temp\VoiceAppVoicemailDetection-1",
|
28 |
+
type=str
|
29 |
+
)
|
30 |
+
parser.add_argument(
|
31 |
+
"--output_dir",
|
32 |
+
default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\transfer",
|
33 |
+
type=str
|
34 |
+
)
|
35 |
+
args = parser.parse_args()
|
36 |
+
return args
|
37 |
+
|
38 |
+
|
39 |
+
def main():
|
40 |
+
args = get_args()
|
41 |
+
wav_dir = Path(args.wav_dir)
|
42 |
+
output_dir = Path(args.output_dir)
|
43 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
44 |
+
|
45 |
+
df = pd.read_excel(args.task_file)
|
46 |
+
|
47 |
+
transfer_set = set()
|
48 |
+
for i, row in df.iterrows():
|
49 |
+
call_id = row["通话ID"]
|
50 |
+
intent_str = row["意向标签"]
|
51 |
+
if intent_str == "Connection - Transferred to agent":
|
52 |
+
transfer_set.add(call_id)
|
53 |
+
if intent_str == "Connection - No human voice detected":
|
54 |
+
transfer_set.add(call_id)
|
55 |
+
|
56 |
+
print(f"transfer count: {len(transfer_set)}")
|
57 |
+
|
58 |
+
for idx, filename in tqdm(enumerate(wav_dir.glob("**/*.wav"))):
|
59 |
+
|
60 |
+
basename = filename.stem
|
61 |
+
call_id, _, _, _ = basename.split("_")
|
62 |
+
|
63 |
+
if call_id not in transfer_set:
|
64 |
+
continue
|
65 |
+
|
66 |
+
print(filename.as_posix())
|
67 |
+
shutil.move(
|
68 |
+
filename.as_posix(),
|
69 |
+
output_dir.as_posix()
|
70 |
+
)
|
71 |
+
|
72 |
+
return
|
73 |
+
|
74 |
+
|
75 |
+
if __name__ == '__main__':
|
76 |
+
main()
|
examples/vm_sound_classification/run.sh
CHANGED
@@ -2,22 +2,25 @@
|
|
2 |
|
3 |
: <<'END'
|
4 |
|
5 |
-
sh run.sh --stage 0 --stop_stage 1 --system_version windows --file_folder_name file_dir --final_model_name
|
6 |
--filename_patterns "E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/wav_finished/en-US/wav_finished/*/*.wav \
|
7 |
E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/id-ID/wav_finished/*/*.wav" \
|
8 |
--label_plan 4
|
9 |
|
10 |
-
sh run.sh --stage 2 --stop_stage 2 --system_version windows --file_folder_name file_dir --final_model_name
|
11 |
--filename_patterns "E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/wav_finished/en-US/wav_finished/*/*.wav \
|
12 |
E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/id-ID/wav_finished/*/*.wav" \
|
13 |
--label_plan 4
|
14 |
|
15 |
-
sh run.sh --stage 0 --stop_stage
|
16 |
-
--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav"
|
17 |
-
|
18 |
-
|
19 |
-
--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/th-TH/wav_finished/*/*.wav" --label_plan 2-voicemail
|
20 |
|
|
|
|
|
|
|
|
|
21 |
|
22 |
END
|
23 |
|
@@ -33,6 +36,7 @@ file_folder_name=file_folder_name
|
|
33 |
final_model_name=final_model_name
|
34 |
filename_patterns="/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav"
|
35 |
label_plan=4
|
|
|
36 |
nohup_name=nohup.out
|
37 |
|
38 |
country=en-US
|
@@ -125,6 +129,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
|
|
125 |
--train_dataset "${train_dataset}" \
|
126 |
--valid_dataset "${valid_dataset}" \
|
127 |
--serialization_dir "${file_dir}" \
|
|
|
128 |
|
129 |
fi
|
130 |
|
|
|
2 |
|
3 |
: <<'END'
|
4 |
|
5 |
+
sh run.sh --stage 0 --stop_stage 1 --system_version windows --file_folder_name file_dir --final_model_name sound-4-ch32 \
|
6 |
--filename_patterns "E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/wav_finished/en-US/wav_finished/*/*.wav \
|
7 |
E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/id-ID/wav_finished/*/*.wav" \
|
8 |
--label_plan 4
|
9 |
|
10 |
+
sh run.sh --stage 2 --stop_stage 2 --system_version windows --file_folder_name file_dir --final_model_name sound-2-ch32 \
|
11 |
--filename_patterns "E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/wav_finished/en-US/wav_finished/*/*.wav \
|
12 |
E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/id-ID/wav_finished/*/*.wav" \
|
13 |
--label_plan 4
|
14 |
|
15 |
+
sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-ch32 \
|
16 |
+
--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
17 |
+
--label_plan 3 \
|
18 |
+
--config_file "yaml/conv2d-classifier-3-ch4.yaml"
|
|
|
19 |
|
20 |
+
sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ms-my-2-ch32 \
|
21 |
+
--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ms-MY/wav_finished/*/*.wav" \
|
22 |
+
--label_plan 2-voicemail \
|
23 |
+
--config_file "yaml/conv2d-classifier-2-ch4.yaml"
|
24 |
|
25 |
END
|
26 |
|
|
|
36 |
final_model_name=final_model_name
|
37 |
filename_patterns="/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav"
|
38 |
label_plan=4
|
39 |
+
config_file="yaml/conv2d-classifier-2-ch4.yaml"
|
40 |
nohup_name=nohup.out
|
41 |
|
42 |
country=en-US
|
|
|
129 |
--train_dataset "${train_dataset}" \
|
130 |
--valid_dataset "${valid_dataset}" \
|
131 |
--serialization_dir "${file_dir}" \
|
132 |
+
--config_file "${config_file}" \
|
133 |
|
134 |
fi
|
135 |
|
examples/vm_sound_classification/run_batch.sh
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env bash
|
2 |
+
|
3 |
+
|
4 |
+
sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-ch4 \
|
5 |
+
--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
|
6 |
+
--label_plan 3 \
|
7 |
+
--config_file "yaml/conv2d-classifier-3-ch4.yaml"
|
8 |
+
|
examples/vm_sound_classification/yaml/conv2d-classifier-2-ch16.yaml
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model_name: "cnn_audio_classifier"
|
2 |
+
|
3 |
+
mel_spectrogram_param:
|
4 |
+
sample_rate: 8000
|
5 |
+
n_fft: 512
|
6 |
+
win_length: 200
|
7 |
+
hop_length: 80
|
8 |
+
f_min: 10
|
9 |
+
f_max: 3800
|
10 |
+
window_fn: hamming
|
11 |
+
n_mels: 80
|
12 |
+
|
13 |
+
conv2d_block_param_list:
|
14 |
+
- batch_norm: true
|
15 |
+
in_channels: 1
|
16 |
+
out_channels: 16
|
17 |
+
kernel_size: 3
|
18 |
+
stride: 1
|
19 |
+
dilation: 3
|
20 |
+
activation: relu
|
21 |
+
dropout: 0.1
|
22 |
+
- in_channels: 16
|
23 |
+
out_channels: 16
|
24 |
+
kernel_size: 5
|
25 |
+
stride: 2
|
26 |
+
dilation: 3
|
27 |
+
activation: relu
|
28 |
+
dropout: 0.1
|
29 |
+
- in_channels: 16
|
30 |
+
out_channels: 16
|
31 |
+
kernel_size: 3
|
32 |
+
stride: 1
|
33 |
+
dilation: 2
|
34 |
+
activation: relu
|
35 |
+
dropout: 0.1
|
36 |
+
|
37 |
+
cls_head_param:
|
38 |
+
input_dim: 432
|
39 |
+
num_layers: 2
|
40 |
+
hidden_dims:
|
41 |
+
- 128
|
42 |
+
- 32
|
43 |
+
activations: relu
|
44 |
+
dropout: 0.1
|
45 |
+
num_labels: 2
|
examples/vm_sound_classification/yaml/conv2d-classifier-2-ch32.yaml
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model_name: "cnn_audio_classifier"
|
2 |
+
|
3 |
+
mel_spectrogram_param:
|
4 |
+
sample_rate: 8000
|
5 |
+
n_fft: 512
|
6 |
+
win_length: 200
|
7 |
+
hop_length: 80
|
8 |
+
f_min: 10
|
9 |
+
f_max: 3800
|
10 |
+
window_fn: hamming
|
11 |
+
n_mels: 80
|
12 |
+
|
13 |
+
conv2d_block_param_list:
|
14 |
+
- batch_norm: true
|
15 |
+
in_channels: 1
|
16 |
+
out_channels: 32
|
17 |
+
kernel_size: 3
|
18 |
+
stride: 1
|
19 |
+
dilation: 3
|
20 |
+
activation: relu
|
21 |
+
dropout: 0.1
|
22 |
+
- in_channels: 32
|
23 |
+
out_channels: 32
|
24 |
+
kernel_size: 5
|
25 |
+
stride: 2
|
26 |
+
dilation: 3
|
27 |
+
activation: relu
|
28 |
+
dropout: 0.1
|
29 |
+
- in_channels: 32
|
30 |
+
out_channels: 32
|
31 |
+
kernel_size: 3
|
32 |
+
stride: 1
|
33 |
+
dilation: 2
|
34 |
+
activation: relu
|
35 |
+
dropout: 0.1
|
36 |
+
|
37 |
+
cls_head_param:
|
38 |
+
input_dim: 864
|
39 |
+
num_layers: 2
|
40 |
+
hidden_dims:
|
41 |
+
- 128
|
42 |
+
- 32
|
43 |
+
activations: relu
|
44 |
+
dropout: 0.1
|
45 |
+
num_labels: 2
|
examples/vm_sound_classification/{conv2d_classifier.yaml → yaml/conv2d-classifier-2-ch4.yaml}
RENAMED
File without changes
|
examples/vm_sound_classification/yaml/conv2d-classifier-2-ch8.yaml
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model_name: "cnn_audio_classifier"
|
2 |
+
|
3 |
+
mel_spectrogram_param:
|
4 |
+
sample_rate: 8000
|
5 |
+
n_fft: 512
|
6 |
+
win_length: 200
|
7 |
+
hop_length: 80
|
8 |
+
f_min: 10
|
9 |
+
f_max: 3800
|
10 |
+
window_fn: hamming
|
11 |
+
n_mels: 80
|
12 |
+
|
13 |
+
conv2d_block_param_list:
|
14 |
+
- batch_norm: true
|
15 |
+
in_channels: 1
|
16 |
+
out_channels: 8
|
17 |
+
kernel_size: 3
|
18 |
+
stride: 1
|
19 |
+
dilation: 3
|
20 |
+
activation: relu
|
21 |
+
dropout: 0.1
|
22 |
+
- in_channels: 8
|
23 |
+
out_channels: 8
|
24 |
+
kernel_size: 5
|
25 |
+
stride: 2
|
26 |
+
dilation: 3
|
27 |
+
activation: relu
|
28 |
+
dropout: 0.1
|
29 |
+
- in_channels: 8
|
30 |
+
out_channels: 8
|
31 |
+
kernel_size: 3
|
32 |
+
stride: 1
|
33 |
+
dilation: 2
|
34 |
+
activation: relu
|
35 |
+
dropout: 0.1
|
36 |
+
|
37 |
+
cls_head_param:
|
38 |
+
input_dim: 216
|
39 |
+
num_layers: 2
|
40 |
+
hidden_dims:
|
41 |
+
- 128
|
42 |
+
- 32
|
43 |
+
activations: relu
|
44 |
+
dropout: 0.1
|
45 |
+
num_labels: 2
|
examples/vm_sound_classification/yaml/conv2d-classifier-3-ch16.yaml
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model_name: "cnn_audio_classifier"
|
2 |
+
|
3 |
+
mel_spectrogram_param:
|
4 |
+
sample_rate: 8000
|
5 |
+
n_fft: 512
|
6 |
+
win_length: 200
|
7 |
+
hop_length: 80
|
8 |
+
f_min: 10
|
9 |
+
f_max: 3800
|
10 |
+
window_fn: hamming
|
11 |
+
n_mels: 80
|
12 |
+
|
13 |
+
conv2d_block_param_list:
|
14 |
+
- batch_norm: true
|
15 |
+
in_channels: 1
|
16 |
+
out_channels: 16
|
17 |
+
kernel_size: 3
|
18 |
+
stride: 1
|
19 |
+
dilation: 3
|
20 |
+
activation: relu
|
21 |
+
dropout: 0.1
|
22 |
+
- in_channels: 16
|
23 |
+
out_channels: 16
|
24 |
+
kernel_size: 5
|
25 |
+
stride: 2
|
26 |
+
dilation: 3
|
27 |
+
activation: relu
|
28 |
+
dropout: 0.1
|
29 |
+
- in_channels: 16
|
30 |
+
out_channels: 16
|
31 |
+
kernel_size: 3
|
32 |
+
stride: 1
|
33 |
+
dilation: 2
|
34 |
+
activation: relu
|
35 |
+
dropout: 0.1
|
36 |
+
|
37 |
+
cls_head_param:
|
38 |
+
input_dim: 432
|
39 |
+
num_layers: 2
|
40 |
+
hidden_dims:
|
41 |
+
- 128
|
42 |
+
- 32
|
43 |
+
activations: relu
|
44 |
+
dropout: 0.1
|
45 |
+
num_labels: 3
|
examples/vm_sound_classification/yaml/conv2d-classifier-3-ch32.yaml
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model_name: "cnn_audio_classifier"
|
2 |
+
|
3 |
+
mel_spectrogram_param:
|
4 |
+
sample_rate: 8000
|
5 |
+
n_fft: 512
|
6 |
+
win_length: 200
|
7 |
+
hop_length: 80
|
8 |
+
f_min: 10
|
9 |
+
f_max: 3800
|
10 |
+
window_fn: hamming
|
11 |
+
n_mels: 80
|
12 |
+
|
13 |
+
conv2d_block_param_list:
|
14 |
+
- batch_norm: true
|
15 |
+
in_channels: 1
|
16 |
+
out_channels: 32
|
17 |
+
kernel_size: 3
|
18 |
+
stride: 1
|
19 |
+
dilation: 3
|
20 |
+
activation: relu
|
21 |
+
dropout: 0.1
|
22 |
+
- in_channels: 32
|
23 |
+
out_channels: 32
|
24 |
+
kernel_size: 5
|
25 |
+
stride: 2
|
26 |
+
dilation: 3
|
27 |
+
activation: relu
|
28 |
+
dropout: 0.1
|
29 |
+
- in_channels: 32
|
30 |
+
out_channels: 32
|
31 |
+
kernel_size: 3
|
32 |
+
stride: 1
|
33 |
+
dilation: 2
|
34 |
+
activation: relu
|
35 |
+
dropout: 0.1
|
36 |
+
|
37 |
+
cls_head_param:
|
38 |
+
input_dim: 864
|
39 |
+
num_layers: 2
|
40 |
+
hidden_dims:
|
41 |
+
- 128
|
42 |
+
- 32
|
43 |
+
activations: relu
|
44 |
+
dropout: 0.1
|
45 |
+
num_labels: 3
|
examples/vm_sound_classification/yaml/conv2d-classifier-3-ch4.yaml
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model_name: "cnn_audio_classifier"
|
2 |
+
|
3 |
+
mel_spectrogram_param:
|
4 |
+
sample_rate: 8000
|
5 |
+
n_fft: 512
|
6 |
+
win_length: 200
|
7 |
+
hop_length: 80
|
8 |
+
f_min: 10
|
9 |
+
f_max: 3800
|
10 |
+
window_fn: hamming
|
11 |
+
n_mels: 80
|
12 |
+
|
13 |
+
conv2d_block_param_list:
|
14 |
+
- batch_norm: true
|
15 |
+
in_channels: 1
|
16 |
+
out_channels: 4
|
17 |
+
kernel_size: 3
|
18 |
+
stride: 1
|
19 |
+
dilation: 3
|
20 |
+
activation: relu
|
21 |
+
dropout: 0.1
|
22 |
+
- in_channels: 4
|
23 |
+
out_channels: 4
|
24 |
+
kernel_size: 5
|
25 |
+
stride: 2
|
26 |
+
dilation: 3
|
27 |
+
activation: relu
|
28 |
+
dropout: 0.1
|
29 |
+
- in_channels: 4
|
30 |
+
out_channels: 4
|
31 |
+
kernel_size: 3
|
32 |
+
stride: 1
|
33 |
+
dilation: 2
|
34 |
+
activation: relu
|
35 |
+
dropout: 0.1
|
36 |
+
|
37 |
+
cls_head_param:
|
38 |
+
input_dim: 108
|
39 |
+
num_layers: 2
|
40 |
+
hidden_dims:
|
41 |
+
- 128
|
42 |
+
- 32
|
43 |
+
activations: relu
|
44 |
+
dropout: 0.1
|
45 |
+
num_labels: 3
|
examples/vm_sound_classification/yaml/conv2d-classifier-3-ch8.yaml
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model_name: "cnn_audio_classifier"
|
2 |
+
|
3 |
+
mel_spectrogram_param:
|
4 |
+
sample_rate: 8000
|
5 |
+
n_fft: 512
|
6 |
+
win_length: 200
|
7 |
+
hop_length: 80
|
8 |
+
f_min: 10
|
9 |
+
f_max: 3800
|
10 |
+
window_fn: hamming
|
11 |
+
n_mels: 80
|
12 |
+
|
13 |
+
conv2d_block_param_list:
|
14 |
+
- batch_norm: true
|
15 |
+
in_channels: 1
|
16 |
+
out_channels: 8
|
17 |
+
kernel_size: 3
|
18 |
+
stride: 1
|
19 |
+
dilation: 3
|
20 |
+
activation: relu
|
21 |
+
dropout: 0.1
|
22 |
+
- in_channels: 8
|
23 |
+
out_channels: 8
|
24 |
+
kernel_size: 5
|
25 |
+
stride: 2
|
26 |
+
dilation: 3
|
27 |
+
activation: relu
|
28 |
+
dropout: 0.1
|
29 |
+
- in_channels: 8
|
30 |
+
out_channels: 8
|
31 |
+
kernel_size: 3
|
32 |
+
stride: 1
|
33 |
+
dilation: 2
|
34 |
+
activation: relu
|
35 |
+
dropout: 0.1
|
36 |
+
|
37 |
+
cls_head_param:
|
38 |
+
input_dim: 216
|
39 |
+
num_layers: 2
|
40 |
+
hidden_dims:
|
41 |
+
- 128
|
42 |
+
- 32
|
43 |
+
activations: relu
|
44 |
+
dropout: 0.1
|
45 |
+
num_labels: 3
|
examples/vm_sound_classification/yaml/conv2d-classifier-4-ch16.yaml
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model_name: "cnn_audio_classifier"
|
2 |
+
|
3 |
+
mel_spectrogram_param:
|
4 |
+
sample_rate: 8000
|
5 |
+
n_fft: 512
|
6 |
+
win_length: 200
|
7 |
+
hop_length: 80
|
8 |
+
f_min: 10
|
9 |
+
f_max: 3800
|
10 |
+
window_fn: hamming
|
11 |
+
n_mels: 80
|
12 |
+
|
13 |
+
conv2d_block_param_list:
|
14 |
+
- batch_norm: true
|
15 |
+
in_channels: 1
|
16 |
+
out_channels: 16
|
17 |
+
kernel_size: 3
|
18 |
+
stride: 1
|
19 |
+
dilation: 3
|
20 |
+
activation: relu
|
21 |
+
dropout: 0.1
|
22 |
+
- in_channels: 16
|
23 |
+
out_channels: 16
|
24 |
+
kernel_size: 5
|
25 |
+
stride: 2
|
26 |
+
dilation: 3
|
27 |
+
activation: relu
|
28 |
+
dropout: 0.1
|
29 |
+
- in_channels: 16
|
30 |
+
out_channels: 16
|
31 |
+
kernel_size: 3
|
32 |
+
stride: 1
|
33 |
+
dilation: 2
|
34 |
+
activation: relu
|
35 |
+
dropout: 0.1
|
36 |
+
|
37 |
+
cls_head_param:
|
38 |
+
input_dim: 432
|
39 |
+
num_layers: 2
|
40 |
+
hidden_dims:
|
41 |
+
- 128
|
42 |
+
- 32
|
43 |
+
activations: relu
|
44 |
+
dropout: 0.1
|
45 |
+
num_labels: 4
|
examples/vm_sound_classification/yaml/conv2d-classifier-4-ch32.yaml
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model_name: "cnn_audio_classifier"
|
2 |
+
|
3 |
+
mel_spectrogram_param:
|
4 |
+
sample_rate: 8000
|
5 |
+
n_fft: 512
|
6 |
+
win_length: 200
|
7 |
+
hop_length: 80
|
8 |
+
f_min: 10
|
9 |
+
f_max: 3800
|
10 |
+
window_fn: hamming
|
11 |
+
n_mels: 80
|
12 |
+
|
13 |
+
conv2d_block_param_list:
|
14 |
+
- batch_norm: true
|
15 |
+
in_channels: 1
|
16 |
+
out_channels: 32
|
17 |
+
kernel_size: 3
|
18 |
+
stride: 1
|
19 |
+
dilation: 3
|
20 |
+
activation: relu
|
21 |
+
dropout: 0.1
|
22 |
+
- in_channels: 32
|
23 |
+
out_channels: 32
|
24 |
+
kernel_size: 5
|
25 |
+
stride: 2
|
26 |
+
dilation: 3
|
27 |
+
activation: relu
|
28 |
+
dropout: 0.1
|
29 |
+
- in_channels: 32
|
30 |
+
out_channels: 32
|
31 |
+
kernel_size: 3
|
32 |
+
stride: 1
|
33 |
+
dilation: 2
|
34 |
+
activation: relu
|
35 |
+
dropout: 0.1
|
36 |
+
|
37 |
+
cls_head_param:
|
38 |
+
input_dim: 864
|
39 |
+
num_layers: 2
|
40 |
+
hidden_dims:
|
41 |
+
- 128
|
42 |
+
- 32
|
43 |
+
activations: relu
|
44 |
+
dropout: 0.1
|
45 |
+
num_labels: 4
|
examples/vm_sound_classification/yaml/conv2d-classifier-4-ch4.yaml
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model_name: "cnn_audio_classifier"
|
2 |
+
|
3 |
+
mel_spectrogram_param:
|
4 |
+
sample_rate: 8000
|
5 |
+
n_fft: 512
|
6 |
+
win_length: 200
|
7 |
+
hop_length: 80
|
8 |
+
f_min: 10
|
9 |
+
f_max: 3800
|
10 |
+
window_fn: hamming
|
11 |
+
n_mels: 80
|
12 |
+
|
13 |
+
conv2d_block_param_list:
|
14 |
+
- batch_norm: true
|
15 |
+
in_channels: 1
|
16 |
+
out_channels: 4
|
17 |
+
kernel_size: 3
|
18 |
+
stride: 1
|
19 |
+
dilation: 3
|
20 |
+
activation: relu
|
21 |
+
dropout: 0.1
|
22 |
+
- in_channels: 4
|
23 |
+
out_channels: 4
|
24 |
+
kernel_size: 5
|
25 |
+
stride: 2
|
26 |
+
dilation: 3
|
27 |
+
activation: relu
|
28 |
+
dropout: 0.1
|
29 |
+
- in_channels: 4
|
30 |
+
out_channels: 4
|
31 |
+
kernel_size: 3
|
32 |
+
stride: 1
|
33 |
+
dilation: 2
|
34 |
+
activation: relu
|
35 |
+
dropout: 0.1
|
36 |
+
|
37 |
+
cls_head_param:
|
38 |
+
input_dim: 108
|
39 |
+
num_layers: 2
|
40 |
+
hidden_dims:
|
41 |
+
- 128
|
42 |
+
- 32
|
43 |
+
activations: relu
|
44 |
+
dropout: 0.1
|
45 |
+
num_labels: 4
|
examples/vm_sound_classification/yaml/conv2d-classifier-4-ch8.yaml
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model_name: "cnn_audio_classifier"
|
2 |
+
|
3 |
+
mel_spectrogram_param:
|
4 |
+
sample_rate: 8000
|
5 |
+
n_fft: 512
|
6 |
+
win_length: 200
|
7 |
+
hop_length: 80
|
8 |
+
f_min: 10
|
9 |
+
f_max: 3800
|
10 |
+
window_fn: hamming
|
11 |
+
n_mels: 80
|
12 |
+
|
13 |
+
conv2d_block_param_list:
|
14 |
+
- batch_norm: true
|
15 |
+
in_channels: 1
|
16 |
+
out_channels: 8
|
17 |
+
kernel_size: 3
|
18 |
+
stride: 1
|
19 |
+
dilation: 3
|
20 |
+
activation: relu
|
21 |
+
dropout: 0.1
|
22 |
+
- in_channels: 8
|
23 |
+
out_channels: 8
|
24 |
+
kernel_size: 5
|
25 |
+
stride: 2
|
26 |
+
dilation: 3
|
27 |
+
activation: relu
|
28 |
+
dropout: 0.1
|
29 |
+
- in_channels: 8
|
30 |
+
out_channels: 8
|
31 |
+
kernel_size: 3
|
32 |
+
stride: 1
|
33 |
+
dilation: 2
|
34 |
+
activation: relu
|
35 |
+
dropout: 0.1
|
36 |
+
|
37 |
+
cls_head_param:
|
38 |
+
input_dim: 216
|
39 |
+
num_layers: 2
|
40 |
+
hidden_dims:
|
41 |
+
- 128
|
42 |
+
- 32
|
43 |
+
activations: relu
|
44 |
+
dropout: 0.1
|
45 |
+
num_labels: 4
|
examples/vm_sound_classification/yaml/conv2d-classifier-8-ch16.yaml
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model_name: "cnn_audio_classifier"
|
2 |
+
|
3 |
+
mel_spectrogram_param:
|
4 |
+
sample_rate: 8000
|
5 |
+
n_fft: 512
|
6 |
+
win_length: 200
|
7 |
+
hop_length: 80
|
8 |
+
f_min: 10
|
9 |
+
f_max: 3800
|
10 |
+
window_fn: hamming
|
11 |
+
n_mels: 80
|
12 |
+
|
13 |
+
conv2d_block_param_list:
|
14 |
+
- batch_norm: true
|
15 |
+
in_channels: 1
|
16 |
+
out_channels: 16
|
17 |
+
kernel_size: 3
|
18 |
+
stride: 1
|
19 |
+
dilation: 3
|
20 |
+
activation: relu
|
21 |
+
dropout: 0.1
|
22 |
+
- in_channels: 16
|
23 |
+
out_channels: 16
|
24 |
+
kernel_size: 5
|
25 |
+
stride: 2
|
26 |
+
dilation: 3
|
27 |
+
activation: relu
|
28 |
+
dropout: 0.1
|
29 |
+
- in_channels: 16
|
30 |
+
out_channels: 16
|
31 |
+
kernel_size: 3
|
32 |
+
stride: 1
|
33 |
+
dilation: 2
|
34 |
+
activation: relu
|
35 |
+
dropout: 0.1
|
36 |
+
|
37 |
+
cls_head_param:
|
38 |
+
input_dim: 432
|
39 |
+
num_layers: 2
|
40 |
+
hidden_dims:
|
41 |
+
- 128
|
42 |
+
- 32
|
43 |
+
activations: relu
|
44 |
+
dropout: 0.1
|
45 |
+
num_labels: 8
|
examples/vm_sound_classification/yaml/conv2d-classifier-8-ch32.yaml
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model_name: "cnn_audio_classifier"
|
2 |
+
|
3 |
+
mel_spectrogram_param:
|
4 |
+
sample_rate: 8000
|
5 |
+
n_fft: 512
|
6 |
+
win_length: 200
|
7 |
+
hop_length: 80
|
8 |
+
f_min: 10
|
9 |
+
f_max: 3800
|
10 |
+
window_fn: hamming
|
11 |
+
n_mels: 80
|
12 |
+
|
13 |
+
conv2d_block_param_list:
|
14 |
+
- batch_norm: true
|
15 |
+
in_channels: 1
|
16 |
+
out_channels: 32
|
17 |
+
kernel_size: 3
|
18 |
+
stride: 1
|
19 |
+
dilation: 3
|
20 |
+
activation: relu
|
21 |
+
dropout: 0.1
|
22 |
+
- in_channels: 32
|
23 |
+
out_channels: 32
|
24 |
+
kernel_size: 5
|
25 |
+
stride: 2
|
26 |
+
dilation: 3
|
27 |
+
activation: relu
|
28 |
+
dropout: 0.1
|
29 |
+
- in_channels: 32
|
30 |
+
out_channels: 32
|
31 |
+
kernel_size: 3
|
32 |
+
stride: 1
|
33 |
+
dilation: 2
|
34 |
+
activation: relu
|
35 |
+
dropout: 0.1
|
36 |
+
|
37 |
+
cls_head_param:
|
38 |
+
input_dim: 864
|
39 |
+
num_layers: 2
|
40 |
+
hidden_dims:
|
41 |
+
- 128
|
42 |
+
- 32
|
43 |
+
activations: relu
|
44 |
+
dropout: 0.1
|
45 |
+
num_labels: 8
|
examples/vm_sound_classification/yaml/conv2d-classifier-8-ch4.yaml
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model_name: "cnn_audio_classifier"
|
2 |
+
|
3 |
+
mel_spectrogram_param:
|
4 |
+
sample_rate: 8000
|
5 |
+
n_fft: 512
|
6 |
+
win_length: 200
|
7 |
+
hop_length: 80
|
8 |
+
f_min: 10
|
9 |
+
f_max: 3800
|
10 |
+
window_fn: hamming
|
11 |
+
n_mels: 80
|
12 |
+
|
13 |
+
conv2d_block_param_list:
|
14 |
+
- batch_norm: true
|
15 |
+
in_channels: 1
|
16 |
+
out_channels: 4
|
17 |
+
kernel_size: 3
|
18 |
+
stride: 1
|
19 |
+
dilation: 3
|
20 |
+
activation: relu
|
21 |
+
dropout: 0.1
|
22 |
+
- in_channels: 4
|
23 |
+
out_channels: 4
|
24 |
+
kernel_size: 5
|
25 |
+
stride: 2
|
26 |
+
dilation: 3
|
27 |
+
activation: relu
|
28 |
+
dropout: 0.1
|
29 |
+
- in_channels: 4
|
30 |
+
out_channels: 4
|
31 |
+
kernel_size: 3
|
32 |
+
stride: 1
|
33 |
+
dilation: 2
|
34 |
+
activation: relu
|
35 |
+
dropout: 0.1
|
36 |
+
|
37 |
+
cls_head_param:
|
38 |
+
input_dim: 108
|
39 |
+
num_layers: 2
|
40 |
+
hidden_dims:
|
41 |
+
- 128
|
42 |
+
- 32
|
43 |
+
activations: relu
|
44 |
+
dropout: 0.1
|
45 |
+
num_labels: 8
|
examples/vm_sound_classification/yaml/conv2d-classifier-8-ch8.yaml
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model_name: "cnn_audio_classifier"
|
2 |
+
|
3 |
+
mel_spectrogram_param:
|
4 |
+
sample_rate: 8000
|
5 |
+
n_fft: 512
|
6 |
+
win_length: 200
|
7 |
+
hop_length: 80
|
8 |
+
f_min: 10
|
9 |
+
f_max: 3800
|
10 |
+
window_fn: hamming
|
11 |
+
n_mels: 80
|
12 |
+
|
13 |
+
conv2d_block_param_list:
|
14 |
+
- batch_norm: true
|
15 |
+
in_channels: 1
|
16 |
+
out_channels: 8
|
17 |
+
kernel_size: 3
|
18 |
+
stride: 1
|
19 |
+
dilation: 3
|
20 |
+
activation: relu
|
21 |
+
dropout: 0.1
|
22 |
+
- in_channels: 8
|
23 |
+
out_channels: 8
|
24 |
+
kernel_size: 5
|
25 |
+
stride: 2
|
26 |
+
dilation: 3
|
27 |
+
activation: relu
|
28 |
+
dropout: 0.1
|
29 |
+
- in_channels: 8
|
30 |
+
out_channels: 8
|
31 |
+
kernel_size: 3
|
32 |
+
stride: 1
|
33 |
+
dilation: 2
|
34 |
+
activation: relu
|
35 |
+
dropout: 0.1
|
36 |
+
|
37 |
+
cls_head_param:
|
38 |
+
input_dim: 216
|
39 |
+
num_layers: 2
|
40 |
+
hidden_dims:
|
41 |
+
- 128
|
42 |
+
- 32
|
43 |
+
activations: relu
|
44 |
+
dropout: 0.1
|
45 |
+
num_labels: 8
|
main.py
CHANGED
@@ -2,7 +2,6 @@
|
|
2 |
# -*- coding: utf-8 -*-
|
3 |
import argparse
|
4 |
from functools import lru_cache
|
5 |
-
import json
|
6 |
from pathlib import Path
|
7 |
import platform
|
8 |
import shutil
|
@@ -11,7 +10,6 @@ import zipfile
|
|
11 |
from typing import Tuple
|
12 |
|
13 |
import gradio as gr
|
14 |
-
from dill.pointers import parents
|
15 |
from huggingface_hub import snapshot_download
|
16 |
import numpy as np
|
17 |
import torch
|
@@ -114,13 +112,14 @@ def main():
|
|
114 |
|
115 |
examples_dir = Path(args.examples_dir)
|
116 |
trained_model_dir = Path(args.trained_model_dir)
|
117 |
-
trained_model_dir.mkdir(parents=True, exist_ok=True)
|
118 |
|
119 |
# download models
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
|
|
|
|
124 |
|
125 |
# examples
|
126 |
example_zip_file = trained_model_dir / "examples.zip"
|
@@ -138,6 +137,7 @@ def main():
|
|
138 |
if model_name == "examples":
|
139 |
continue
|
140 |
model_choices.append(model_name)
|
|
|
141 |
|
142 |
# examples
|
143 |
examples = list()
|
@@ -187,6 +187,7 @@ def main():
|
|
187 |
outputs=[c_label, c_probability],
|
188 |
)
|
189 |
|
|
|
190 |
blocks.queue().launch(
|
191 |
share=False if platform.system() == "Windows" else False,
|
192 |
server_name="127.0.0.1" if platform.system() == "Windows" else "0.0.0.0",
|
|
|
2 |
# -*- coding: utf-8 -*-
|
3 |
import argparse
|
4 |
from functools import lru_cache
|
|
|
5 |
from pathlib import Path
|
6 |
import platform
|
7 |
import shutil
|
|
|
10 |
from typing import Tuple
|
11 |
|
12 |
import gradio as gr
|
|
|
13 |
from huggingface_hub import snapshot_download
|
14 |
import numpy as np
|
15 |
import torch
|
|
|
112 |
|
113 |
examples_dir = Path(args.examples_dir)
|
114 |
trained_model_dir = Path(args.trained_model_dir)
|
|
|
115 |
|
116 |
# download models
|
117 |
+
if not trained_model_dir.exists():
|
118 |
+
trained_model_dir.mkdir(parents=True, exist_ok=True)
|
119 |
+
_ = snapshot_download(
|
120 |
+
repo_id=args.models_repo_id,
|
121 |
+
local_dir=trained_model_dir.as_posix()
|
122 |
+
)
|
123 |
|
124 |
# examples
|
125 |
example_zip_file = trained_model_dir / "examples.zip"
|
|
|
137 |
if model_name == "examples":
|
138 |
continue
|
139 |
model_choices.append(model_name)
|
140 |
+
model_choices = list(sorted(model_choices))
|
141 |
|
142 |
# examples
|
143 |
examples = list()
|
|
|
187 |
outputs=[c_label, c_probability],
|
188 |
)
|
189 |
|
190 |
+
# http://127.0.0.1:7864/
|
191 |
blocks.queue().launch(
|
192 |
share=False if platform.system() == "Windows" else False,
|
193 |
server_name="127.0.0.1" if platform.system() == "Windows" else "0.0.0.0",
|