HoneyTian commited on
Commit
07f87a8
·
1 Parent(s): 349ff6e

add yaml config

Browse files
examples/sample_filter/test1.py CHANGED
@@ -12,7 +12,9 @@ def get_args():
12
  parser = argparse.ArgumentParser()
13
  parser.add_argument(
14
  "--data_dir",
15
- default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\temp\temp",
 
 
16
  type=str
17
  )
18
  parser.add_argument(
@@ -40,28 +42,40 @@ def main():
40
 
41
  client = Client("http://127.0.0.1:7864/")
42
 
43
- for filename in tqdm(data_dir.glob("*.wav")):
 
 
44
  filename = filename.as_posix()
45
 
46
- label, prob = client.predict(
47
  audio=handle_file(filename),
48
- model_name="vm_sound_classification8-ch32",
 
49
  ground_true="Hello!!",
50
  api_name="/click_button"
51
  )
52
- prob = float(prob)
53
 
54
- if prob > 0.7:
55
- shutil.move(
56
- filename,
57
- trash_dir.as_posix(),
58
- )
 
 
 
 
 
 
 
 
59
  else:
 
60
  shutil.move(
61
  filename,
62
  keep_dir.as_posix(),
63
  )
64
-
65
  return
66
 
67
 
 
12
  parser = argparse.ArgumentParser()
13
  parser.add_argument(
14
  "--data_dir",
15
+ # default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\data",
16
+ # default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\temp-3\temp\VoiceAppVoicemailDetection-1",
17
+ default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\transfer",
18
  type=str
19
  )
20
  parser.add_argument(
 
42
 
43
  client = Client("http://127.0.0.1:7864/")
44
 
45
+ for idx, filename in tqdm(enumerate(data_dir.glob("*.wav"))):
46
+ # if idx < 639:
47
+ # continue
48
  filename = filename.as_posix()
49
 
50
+ label1, prob1 = client.predict(
51
  audio=handle_file(filename),
52
+ # model_name="vm_sound_classification8-ch32",
53
+ model_name="voicemail-ms-my-2-ch32",
54
  ground_true="Hello!!",
55
  api_name="/click_button"
56
  )
57
+ prob1 = float(prob1)
58
 
59
+ label2, prob2 = client.predict(
60
+ audio=handle_file(filename),
61
+ # model_name="vm_sound_classification8-ch32",
62
+ model_name="sound-8-ch32",
63
+ ground_true="Hello!!",
64
+ api_name="/click_button"
65
+ )
66
+ prob2 = float(prob2)
67
+
68
+ if label1 == "voicemail" and label2 in ("voicemail", "bell") and prob2 > 0.6:
69
+ pass
70
+ elif label1 == "non_voicemail" and label2 not in ("voicemail", "bell") and prob2 > 0.6:
71
+ pass
72
  else:
73
+ print(f"label1: {label1}, prob1: {prob1}, label2: {label2}, prob2: {prob2}")
74
  shutil.move(
75
  filename,
76
  keep_dir.as_posix(),
77
  )
78
+ # exit(0)
79
  return
80
 
81
 
examples/sample_filter/test2.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ import argparse
4
+ from pathlib import Path
5
+ import shutil
6
+
7
+ from gradio_client import Client, handle_file
8
+ from tqdm import tqdm
9
+
10
+ from project_settings import project_path
11
+
12
+
13
+ def get_args():
14
+ parser = argparse.ArgumentParser()
15
+ parser.add_argument(
16
+ "--data_dir",
17
+ # default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\data-1",
18
+ # default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\temp-2\temp\VoiceAppVoicemailDetection-1",
19
+ # default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\temp-3\temp\VoiceAppVoicemailDetection-1",
20
+ # default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\temp-4\temp\VoiceAppVoicemailDetection-1",
21
+ default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\transfer",
22
+ type=str
23
+ )
24
+ parser.add_argument(
25
+ "--keep_dir",
26
+ default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\keep-3",
27
+ type=str
28
+ )
29
+ parser.add_argument(
30
+ "--trash_dir",
31
+ default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\trash",
32
+ type=str
33
+ )
34
+ args = parser.parse_args()
35
+ return args
36
+
37
+
38
+ def main():
39
+ args = get_args()
40
+
41
+ data_dir = Path(args.data_dir)
42
+ keep_dir = Path(args.keep_dir)
43
+ keep_dir.mkdir(parents=True, exist_ok=True)
44
+ trash_dir = Path(args.trash_dir)
45
+ trash_dir.mkdir(parents=True, exist_ok=True)
46
+
47
+ client = Client("http://127.0.0.1:7864/")
48
+
49
+ for idx, filename in tqdm(enumerate(data_dir.glob("*.wav"))):
50
+ if idx < 200:
51
+ continue
52
+ filename = filename.as_posix()
53
+
54
+ label1, prob1 = client.predict(
55
+ audio=handle_file(filename),
56
+ # model_name="vm_sound_classification8-ch32",
57
+ model_name="voicemail-ms-my-2-ch32",
58
+ ground_true="Hello!!",
59
+ api_name="/click_button"
60
+ )
61
+ prob1 = float(prob1)
62
+ print(f"label: {label1}, prob: {prob1}")
63
+
64
+ if label1 == "voicemail" and prob1 < 0.95:
65
+ shutil.move(
66
+ filename,
67
+ keep_dir.as_posix(),
68
+ )
69
+ elif label1 != "voicemail" and prob1 < 0.85:
70
+ shutil.move(
71
+ filename,
72
+ keep_dir.as_posix(),
73
+ )
74
+ return
75
+
76
+
77
+ if __name__ == '__main__':
78
+ main()
examples/sample_filter/test4.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ import argparse
4
+ import os
5
+ from pathlib import Path
6
+ import shutil
7
+
8
+ import pandas as pd
9
+ from gradio_client import Client, handle_file
10
+ from tqdm import tqdm
11
+
12
+ from project_settings import project_path
13
+
14
+
15
+ def get_args():
16
+ parser = argparse.ArgumentParser()
17
+ parser.add_argument(
18
+ "--task_file",
19
+ default=r"task_DcTask_1_MY_LIVE_20250109_20250109-1.xlsx",
20
+ type=str
21
+ )
22
+ parser.add_argument(
23
+ "--wav_dir",
24
+ # default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\data-1",
25
+ # default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\temp-2\temp\VoiceAppVoicemailDetection-1",
26
+ # default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\temp-3\temp\VoiceAppVoicemailDetection-1",
27
+ default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\temp-4\temp\VoiceAppVoicemailDetection-1",
28
+ type=str
29
+ )
30
+ parser.add_argument(
31
+ "--output_dir",
32
+ default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\transfer",
33
+ type=str
34
+ )
35
+ args = parser.parse_args()
36
+ return args
37
+
38
+
39
+ def main():
40
+ args = get_args()
41
+ wav_dir = Path(args.wav_dir)
42
+ output_dir = Path(args.output_dir)
43
+ output_dir.mkdir(parents=True, exist_ok=True)
44
+
45
+ df = pd.read_excel(args.task_file)
46
+
47
+ transfer_set = set()
48
+ for i, row in df.iterrows():
49
+ call_id = row["通话ID"]
50
+ intent_str = row["意向标签"]
51
+ if intent_str == "Connection - Transferred to agent":
52
+ transfer_set.add(call_id)
53
+ if intent_str == "Connection - No human voice detected":
54
+ transfer_set.add(call_id)
55
+
56
+ print(f"transfer count: {len(transfer_set)}")
57
+
58
+ for idx, filename in tqdm(enumerate(wav_dir.glob("**/*.wav"))):
59
+
60
+ basename = filename.stem
61
+ call_id, _, _, _ = basename.split("_")
62
+
63
+ if call_id not in transfer_set:
64
+ continue
65
+
66
+ print(filename.as_posix())
67
+ shutil.move(
68
+ filename.as_posix(),
69
+ output_dir.as_posix()
70
+ )
71
+
72
+ return
73
+
74
+
75
+ if __name__ == '__main__':
76
+ main()
examples/vm_sound_classification/run.sh CHANGED
@@ -2,22 +2,25 @@
2
 
3
  : <<'END'
4
 
5
- sh run.sh --stage 0 --stop_stage 1 --system_version windows --file_folder_name file_dir --final_model_name vm_sound_classification4-ch32 \
6
  --filename_patterns "E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/wav_finished/en-US/wav_finished/*/*.wav \
7
  E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/id-ID/wav_finished/*/*.wav" \
8
  --label_plan 4
9
 
10
- sh run.sh --stage 2 --stop_stage 2 --system_version windows --file_folder_name file_dir --final_model_name vm_sound_classification2-ch32 \
11
  --filename_patterns "E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/wav_finished/en-US/wav_finished/*/*.wav \
12
  E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/id-ID/wav_finished/*/*.wav" \
13
  --label_plan 4
14
 
15
- sh run.sh --stage 0 --stop_stage 5 --system_version centos --file_folder_name file_dir --final_model_name vm_sound_classification8-ch32 \
16
- --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" --label_plan 8
17
-
18
- sh run.sh --stage 0 --stop_stage 5 --system_version centos --file_folder_name file_dir --final_model_name vm_sound_classification2-ch32-voicemail \
19
- --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/th-TH/wav_finished/*/*.wav" --label_plan 2-voicemail
20
 
 
 
 
 
21
 
22
  END
23
 
@@ -33,6 +36,7 @@ file_folder_name=file_folder_name
33
  final_model_name=final_model_name
34
  filename_patterns="/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav"
35
  label_plan=4
 
36
  nohup_name=nohup.out
37
 
38
  country=en-US
@@ -125,6 +129,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
125
  --train_dataset "${train_dataset}" \
126
  --valid_dataset "${valid_dataset}" \
127
  --serialization_dir "${file_dir}" \
 
128
 
129
  fi
130
 
 
2
 
3
  : <<'END'
4
 
5
+ sh run.sh --stage 0 --stop_stage 1 --system_version windows --file_folder_name file_dir --final_model_name sound-4-ch32 \
6
  --filename_patterns "E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/wav_finished/en-US/wav_finished/*/*.wav \
7
  E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/id-ID/wav_finished/*/*.wav" \
8
  --label_plan 4
9
 
10
+ sh run.sh --stage 2 --stop_stage 2 --system_version windows --file_folder_name file_dir --final_model_name sound-2-ch32 \
11
  --filename_patterns "E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/wav_finished/en-US/wav_finished/*/*.wav \
12
  E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/id-ID/wav_finished/*/*.wav" \
13
  --label_plan 4
14
 
15
+ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-ch32 \
16
+ --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
17
+ --label_plan 3 \
18
+ --config_file "yaml/conv2d-classifier-3-ch4.yaml"
 
19
 
20
+ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ms-my-2-ch32 \
21
+ --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ms-MY/wav_finished/*/*.wav" \
22
+ --label_plan 2-voicemail \
23
+ --config_file "yaml/conv2d-classifier-2-ch4.yaml"
24
 
25
  END
26
 
 
36
  final_model_name=final_model_name
37
  filename_patterns="/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav"
38
  label_plan=4
39
+ config_file="yaml/conv2d-classifier-2-ch4.yaml"
40
  nohup_name=nohup.out
41
 
42
  country=en-US
 
129
  --train_dataset "${train_dataset}" \
130
  --valid_dataset "${valid_dataset}" \
131
  --serialization_dir "${file_dir}" \
132
+ --config_file "${config_file}" \
133
 
134
  fi
135
 
examples/vm_sound_classification/run_batch.sh ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+
3
+
4
+ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-ch4 \
5
+ --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
6
+ --label_plan 3 \
7
+ --config_file "yaml/conv2d-classifier-3-ch4.yaml"
8
+
examples/vm_sound_classification/yaml/conv2d-classifier-2-ch16.yaml ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name: "cnn_audio_classifier"
2
+
3
+ mel_spectrogram_param:
4
+ sample_rate: 8000
5
+ n_fft: 512
6
+ win_length: 200
7
+ hop_length: 80
8
+ f_min: 10
9
+ f_max: 3800
10
+ window_fn: hamming
11
+ n_mels: 80
12
+
13
+ conv2d_block_param_list:
14
+ - batch_norm: true
15
+ in_channels: 1
16
+ out_channels: 16
17
+ kernel_size: 3
18
+ stride: 1
19
+ dilation: 3
20
+ activation: relu
21
+ dropout: 0.1
22
+ - in_channels: 16
23
+ out_channels: 16
24
+ kernel_size: 5
25
+ stride: 2
26
+ dilation: 3
27
+ activation: relu
28
+ dropout: 0.1
29
+ - in_channels: 16
30
+ out_channels: 16
31
+ kernel_size: 3
32
+ stride: 1
33
+ dilation: 2
34
+ activation: relu
35
+ dropout: 0.1
36
+
37
+ cls_head_param:
38
+ input_dim: 432
39
+ num_layers: 2
40
+ hidden_dims:
41
+ - 128
42
+ - 32
43
+ activations: relu
44
+ dropout: 0.1
45
+ num_labels: 2
examples/vm_sound_classification/yaml/conv2d-classifier-2-ch32.yaml ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name: "cnn_audio_classifier"
2
+
3
+ mel_spectrogram_param:
4
+ sample_rate: 8000
5
+ n_fft: 512
6
+ win_length: 200
7
+ hop_length: 80
8
+ f_min: 10
9
+ f_max: 3800
10
+ window_fn: hamming
11
+ n_mels: 80
12
+
13
+ conv2d_block_param_list:
14
+ - batch_norm: true
15
+ in_channels: 1
16
+ out_channels: 32
17
+ kernel_size: 3
18
+ stride: 1
19
+ dilation: 3
20
+ activation: relu
21
+ dropout: 0.1
22
+ - in_channels: 32
23
+ out_channels: 32
24
+ kernel_size: 5
25
+ stride: 2
26
+ dilation: 3
27
+ activation: relu
28
+ dropout: 0.1
29
+ - in_channels: 32
30
+ out_channels: 32
31
+ kernel_size: 3
32
+ stride: 1
33
+ dilation: 2
34
+ activation: relu
35
+ dropout: 0.1
36
+
37
+ cls_head_param:
38
+ input_dim: 864
39
+ num_layers: 2
40
+ hidden_dims:
41
+ - 128
42
+ - 32
43
+ activations: relu
44
+ dropout: 0.1
45
+ num_labels: 2
examples/vm_sound_classification/{conv2d_classifier.yaml → yaml/conv2d-classifier-2-ch4.yaml} RENAMED
File without changes
examples/vm_sound_classification/yaml/conv2d-classifier-2-ch8.yaml ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name: "cnn_audio_classifier"
2
+
3
+ mel_spectrogram_param:
4
+ sample_rate: 8000
5
+ n_fft: 512
6
+ win_length: 200
7
+ hop_length: 80
8
+ f_min: 10
9
+ f_max: 3800
10
+ window_fn: hamming
11
+ n_mels: 80
12
+
13
+ conv2d_block_param_list:
14
+ - batch_norm: true
15
+ in_channels: 1
16
+ out_channels: 8
17
+ kernel_size: 3
18
+ stride: 1
19
+ dilation: 3
20
+ activation: relu
21
+ dropout: 0.1
22
+ - in_channels: 8
23
+ out_channels: 8
24
+ kernel_size: 5
25
+ stride: 2
26
+ dilation: 3
27
+ activation: relu
28
+ dropout: 0.1
29
+ - in_channels: 8
30
+ out_channels: 8
31
+ kernel_size: 3
32
+ stride: 1
33
+ dilation: 2
34
+ activation: relu
35
+ dropout: 0.1
36
+
37
+ cls_head_param:
38
+ input_dim: 216
39
+ num_layers: 2
40
+ hidden_dims:
41
+ - 128
42
+ - 32
43
+ activations: relu
44
+ dropout: 0.1
45
+ num_labels: 2
examples/vm_sound_classification/yaml/conv2d-classifier-3-ch16.yaml ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name: "cnn_audio_classifier"
2
+
3
+ mel_spectrogram_param:
4
+ sample_rate: 8000
5
+ n_fft: 512
6
+ win_length: 200
7
+ hop_length: 80
8
+ f_min: 10
9
+ f_max: 3800
10
+ window_fn: hamming
11
+ n_mels: 80
12
+
13
+ conv2d_block_param_list:
14
+ - batch_norm: true
15
+ in_channels: 1
16
+ out_channels: 16
17
+ kernel_size: 3
18
+ stride: 1
19
+ dilation: 3
20
+ activation: relu
21
+ dropout: 0.1
22
+ - in_channels: 16
23
+ out_channels: 16
24
+ kernel_size: 5
25
+ stride: 2
26
+ dilation: 3
27
+ activation: relu
28
+ dropout: 0.1
29
+ - in_channels: 16
30
+ out_channels: 16
31
+ kernel_size: 3
32
+ stride: 1
33
+ dilation: 2
34
+ activation: relu
35
+ dropout: 0.1
36
+
37
+ cls_head_param:
38
+ input_dim: 432
39
+ num_layers: 2
40
+ hidden_dims:
41
+ - 128
42
+ - 32
43
+ activations: relu
44
+ dropout: 0.1
45
+ num_labels: 3
examples/vm_sound_classification/yaml/conv2d-classifier-3-ch32.yaml ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name: "cnn_audio_classifier"
2
+
3
+ mel_spectrogram_param:
4
+ sample_rate: 8000
5
+ n_fft: 512
6
+ win_length: 200
7
+ hop_length: 80
8
+ f_min: 10
9
+ f_max: 3800
10
+ window_fn: hamming
11
+ n_mels: 80
12
+
13
+ conv2d_block_param_list:
14
+ - batch_norm: true
15
+ in_channels: 1
16
+ out_channels: 32
17
+ kernel_size: 3
18
+ stride: 1
19
+ dilation: 3
20
+ activation: relu
21
+ dropout: 0.1
22
+ - in_channels: 32
23
+ out_channels: 32
24
+ kernel_size: 5
25
+ stride: 2
26
+ dilation: 3
27
+ activation: relu
28
+ dropout: 0.1
29
+ - in_channels: 32
30
+ out_channels: 32
31
+ kernel_size: 3
32
+ stride: 1
33
+ dilation: 2
34
+ activation: relu
35
+ dropout: 0.1
36
+
37
+ cls_head_param:
38
+ input_dim: 864
39
+ num_layers: 2
40
+ hidden_dims:
41
+ - 128
42
+ - 32
43
+ activations: relu
44
+ dropout: 0.1
45
+ num_labels: 3
examples/vm_sound_classification/yaml/conv2d-classifier-3-ch4.yaml ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name: "cnn_audio_classifier"
2
+
3
+ mel_spectrogram_param:
4
+ sample_rate: 8000
5
+ n_fft: 512
6
+ win_length: 200
7
+ hop_length: 80
8
+ f_min: 10
9
+ f_max: 3800
10
+ window_fn: hamming
11
+ n_mels: 80
12
+
13
+ conv2d_block_param_list:
14
+ - batch_norm: true
15
+ in_channels: 1
16
+ out_channels: 4
17
+ kernel_size: 3
18
+ stride: 1
19
+ dilation: 3
20
+ activation: relu
21
+ dropout: 0.1
22
+ - in_channels: 4
23
+ out_channels: 4
24
+ kernel_size: 5
25
+ stride: 2
26
+ dilation: 3
27
+ activation: relu
28
+ dropout: 0.1
29
+ - in_channels: 4
30
+ out_channels: 4
31
+ kernel_size: 3
32
+ stride: 1
33
+ dilation: 2
34
+ activation: relu
35
+ dropout: 0.1
36
+
37
+ cls_head_param:
38
+ input_dim: 108
39
+ num_layers: 2
40
+ hidden_dims:
41
+ - 128
42
+ - 32
43
+ activations: relu
44
+ dropout: 0.1
45
+ num_labels: 3
examples/vm_sound_classification/yaml/conv2d-classifier-3-ch8.yaml ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name: "cnn_audio_classifier"
2
+
3
+ mel_spectrogram_param:
4
+ sample_rate: 8000
5
+ n_fft: 512
6
+ win_length: 200
7
+ hop_length: 80
8
+ f_min: 10
9
+ f_max: 3800
10
+ window_fn: hamming
11
+ n_mels: 80
12
+
13
+ conv2d_block_param_list:
14
+ - batch_norm: true
15
+ in_channels: 1
16
+ out_channels: 8
17
+ kernel_size: 3
18
+ stride: 1
19
+ dilation: 3
20
+ activation: relu
21
+ dropout: 0.1
22
+ - in_channels: 8
23
+ out_channels: 8
24
+ kernel_size: 5
25
+ stride: 2
26
+ dilation: 3
27
+ activation: relu
28
+ dropout: 0.1
29
+ - in_channels: 8
30
+ out_channels: 8
31
+ kernel_size: 3
32
+ stride: 1
33
+ dilation: 2
34
+ activation: relu
35
+ dropout: 0.1
36
+
37
+ cls_head_param:
38
+ input_dim: 216
39
+ num_layers: 2
40
+ hidden_dims:
41
+ - 128
42
+ - 32
43
+ activations: relu
44
+ dropout: 0.1
45
+ num_labels: 3
examples/vm_sound_classification/yaml/conv2d-classifier-4-ch16.yaml ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name: "cnn_audio_classifier"
2
+
3
+ mel_spectrogram_param:
4
+ sample_rate: 8000
5
+ n_fft: 512
6
+ win_length: 200
7
+ hop_length: 80
8
+ f_min: 10
9
+ f_max: 3800
10
+ window_fn: hamming
11
+ n_mels: 80
12
+
13
+ conv2d_block_param_list:
14
+ - batch_norm: true
15
+ in_channels: 1
16
+ out_channels: 16
17
+ kernel_size: 3
18
+ stride: 1
19
+ dilation: 3
20
+ activation: relu
21
+ dropout: 0.1
22
+ - in_channels: 16
23
+ out_channels: 16
24
+ kernel_size: 5
25
+ stride: 2
26
+ dilation: 3
27
+ activation: relu
28
+ dropout: 0.1
29
+ - in_channels: 16
30
+ out_channels: 16
31
+ kernel_size: 3
32
+ stride: 1
33
+ dilation: 2
34
+ activation: relu
35
+ dropout: 0.1
36
+
37
+ cls_head_param:
38
+ input_dim: 432
39
+ num_layers: 2
40
+ hidden_dims:
41
+ - 128
42
+ - 32
43
+ activations: relu
44
+ dropout: 0.1
45
+ num_labels: 4
examples/vm_sound_classification/yaml/conv2d-classifier-4-ch32.yaml ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name: "cnn_audio_classifier"
2
+
3
+ mel_spectrogram_param:
4
+ sample_rate: 8000
5
+ n_fft: 512
6
+ win_length: 200
7
+ hop_length: 80
8
+ f_min: 10
9
+ f_max: 3800
10
+ window_fn: hamming
11
+ n_mels: 80
12
+
13
+ conv2d_block_param_list:
14
+ - batch_norm: true
15
+ in_channels: 1
16
+ out_channels: 32
17
+ kernel_size: 3
18
+ stride: 1
19
+ dilation: 3
20
+ activation: relu
21
+ dropout: 0.1
22
+ - in_channels: 32
23
+ out_channels: 32
24
+ kernel_size: 5
25
+ stride: 2
26
+ dilation: 3
27
+ activation: relu
28
+ dropout: 0.1
29
+ - in_channels: 32
30
+ out_channels: 32
31
+ kernel_size: 3
32
+ stride: 1
33
+ dilation: 2
34
+ activation: relu
35
+ dropout: 0.1
36
+
37
+ cls_head_param:
38
+ input_dim: 864
39
+ num_layers: 2
40
+ hidden_dims:
41
+ - 128
42
+ - 32
43
+ activations: relu
44
+ dropout: 0.1
45
+ num_labels: 4
examples/vm_sound_classification/yaml/conv2d-classifier-4-ch4.yaml ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name: "cnn_audio_classifier"
2
+
3
+ mel_spectrogram_param:
4
+ sample_rate: 8000
5
+ n_fft: 512
6
+ win_length: 200
7
+ hop_length: 80
8
+ f_min: 10
9
+ f_max: 3800
10
+ window_fn: hamming
11
+ n_mels: 80
12
+
13
+ conv2d_block_param_list:
14
+ - batch_norm: true
15
+ in_channels: 1
16
+ out_channels: 4
17
+ kernel_size: 3
18
+ stride: 1
19
+ dilation: 3
20
+ activation: relu
21
+ dropout: 0.1
22
+ - in_channels: 4
23
+ out_channels: 4
24
+ kernel_size: 5
25
+ stride: 2
26
+ dilation: 3
27
+ activation: relu
28
+ dropout: 0.1
29
+ - in_channels: 4
30
+ out_channels: 4
31
+ kernel_size: 3
32
+ stride: 1
33
+ dilation: 2
34
+ activation: relu
35
+ dropout: 0.1
36
+
37
+ cls_head_param:
38
+ input_dim: 108
39
+ num_layers: 2
40
+ hidden_dims:
41
+ - 128
42
+ - 32
43
+ activations: relu
44
+ dropout: 0.1
45
+ num_labels: 4
examples/vm_sound_classification/yaml/conv2d-classifier-4-ch8.yaml ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name: "cnn_audio_classifier"
2
+
3
+ mel_spectrogram_param:
4
+ sample_rate: 8000
5
+ n_fft: 512
6
+ win_length: 200
7
+ hop_length: 80
8
+ f_min: 10
9
+ f_max: 3800
10
+ window_fn: hamming
11
+ n_mels: 80
12
+
13
+ conv2d_block_param_list:
14
+ - batch_norm: true
15
+ in_channels: 1
16
+ out_channels: 8
17
+ kernel_size: 3
18
+ stride: 1
19
+ dilation: 3
20
+ activation: relu
21
+ dropout: 0.1
22
+ - in_channels: 8
23
+ out_channels: 8
24
+ kernel_size: 5
25
+ stride: 2
26
+ dilation: 3
27
+ activation: relu
28
+ dropout: 0.1
29
+ - in_channels: 8
30
+ out_channels: 8
31
+ kernel_size: 3
32
+ stride: 1
33
+ dilation: 2
34
+ activation: relu
35
+ dropout: 0.1
36
+
37
+ cls_head_param:
38
+ input_dim: 216
39
+ num_layers: 2
40
+ hidden_dims:
41
+ - 128
42
+ - 32
43
+ activations: relu
44
+ dropout: 0.1
45
+ num_labels: 4
examples/vm_sound_classification/yaml/conv2d-classifier-8-ch16.yaml ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name: "cnn_audio_classifier"
2
+
3
+ mel_spectrogram_param:
4
+ sample_rate: 8000
5
+ n_fft: 512
6
+ win_length: 200
7
+ hop_length: 80
8
+ f_min: 10
9
+ f_max: 3800
10
+ window_fn: hamming
11
+ n_mels: 80
12
+
13
+ conv2d_block_param_list:
14
+ - batch_norm: true
15
+ in_channels: 1
16
+ out_channels: 16
17
+ kernel_size: 3
18
+ stride: 1
19
+ dilation: 3
20
+ activation: relu
21
+ dropout: 0.1
22
+ - in_channels: 16
23
+ out_channels: 16
24
+ kernel_size: 5
25
+ stride: 2
26
+ dilation: 3
27
+ activation: relu
28
+ dropout: 0.1
29
+ - in_channels: 16
30
+ out_channels: 16
31
+ kernel_size: 3
32
+ stride: 1
33
+ dilation: 2
34
+ activation: relu
35
+ dropout: 0.1
36
+
37
+ cls_head_param:
38
+ input_dim: 432
39
+ num_layers: 2
40
+ hidden_dims:
41
+ - 128
42
+ - 32
43
+ activations: relu
44
+ dropout: 0.1
45
+ num_labels: 8
examples/vm_sound_classification/yaml/conv2d-classifier-8-ch32.yaml ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name: "cnn_audio_classifier"
2
+
3
+ mel_spectrogram_param:
4
+ sample_rate: 8000
5
+ n_fft: 512
6
+ win_length: 200
7
+ hop_length: 80
8
+ f_min: 10
9
+ f_max: 3800
10
+ window_fn: hamming
11
+ n_mels: 80
12
+
13
+ conv2d_block_param_list:
14
+ - batch_norm: true
15
+ in_channels: 1
16
+ out_channels: 32
17
+ kernel_size: 3
18
+ stride: 1
19
+ dilation: 3
20
+ activation: relu
21
+ dropout: 0.1
22
+ - in_channels: 32
23
+ out_channels: 32
24
+ kernel_size: 5
25
+ stride: 2
26
+ dilation: 3
27
+ activation: relu
28
+ dropout: 0.1
29
+ - in_channels: 32
30
+ out_channels: 32
31
+ kernel_size: 3
32
+ stride: 1
33
+ dilation: 2
34
+ activation: relu
35
+ dropout: 0.1
36
+
37
+ cls_head_param:
38
+ input_dim: 864
39
+ num_layers: 2
40
+ hidden_dims:
41
+ - 128
42
+ - 32
43
+ activations: relu
44
+ dropout: 0.1
45
+ num_labels: 8
examples/vm_sound_classification/yaml/conv2d-classifier-8-ch4.yaml ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name: "cnn_audio_classifier"
2
+
3
+ mel_spectrogram_param:
4
+ sample_rate: 8000
5
+ n_fft: 512
6
+ win_length: 200
7
+ hop_length: 80
8
+ f_min: 10
9
+ f_max: 3800
10
+ window_fn: hamming
11
+ n_mels: 80
12
+
13
+ conv2d_block_param_list:
14
+ - batch_norm: true
15
+ in_channels: 1
16
+ out_channels: 4
17
+ kernel_size: 3
18
+ stride: 1
19
+ dilation: 3
20
+ activation: relu
21
+ dropout: 0.1
22
+ - in_channels: 4
23
+ out_channels: 4
24
+ kernel_size: 5
25
+ stride: 2
26
+ dilation: 3
27
+ activation: relu
28
+ dropout: 0.1
29
+ - in_channels: 4
30
+ out_channels: 4
31
+ kernel_size: 3
32
+ stride: 1
33
+ dilation: 2
34
+ activation: relu
35
+ dropout: 0.1
36
+
37
+ cls_head_param:
38
+ input_dim: 108
39
+ num_layers: 2
40
+ hidden_dims:
41
+ - 128
42
+ - 32
43
+ activations: relu
44
+ dropout: 0.1
45
+ num_labels: 8
examples/vm_sound_classification/yaml/conv2d-classifier-8-ch8.yaml ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name: "cnn_audio_classifier"
2
+
3
+ mel_spectrogram_param:
4
+ sample_rate: 8000
5
+ n_fft: 512
6
+ win_length: 200
7
+ hop_length: 80
8
+ f_min: 10
9
+ f_max: 3800
10
+ window_fn: hamming
11
+ n_mels: 80
12
+
13
+ conv2d_block_param_list:
14
+ - batch_norm: true
15
+ in_channels: 1
16
+ out_channels: 8
17
+ kernel_size: 3
18
+ stride: 1
19
+ dilation: 3
20
+ activation: relu
21
+ dropout: 0.1
22
+ - in_channels: 8
23
+ out_channels: 8
24
+ kernel_size: 5
25
+ stride: 2
26
+ dilation: 3
27
+ activation: relu
28
+ dropout: 0.1
29
+ - in_channels: 8
30
+ out_channels: 8
31
+ kernel_size: 3
32
+ stride: 1
33
+ dilation: 2
34
+ activation: relu
35
+ dropout: 0.1
36
+
37
+ cls_head_param:
38
+ input_dim: 216
39
+ num_layers: 2
40
+ hidden_dims:
41
+ - 128
42
+ - 32
43
+ activations: relu
44
+ dropout: 0.1
45
+ num_labels: 8
main.py CHANGED
@@ -2,7 +2,6 @@
2
  # -*- coding: utf-8 -*-
3
  import argparse
4
  from functools import lru_cache
5
- import json
6
  from pathlib import Path
7
  import platform
8
  import shutil
@@ -11,7 +10,6 @@ import zipfile
11
  from typing import Tuple
12
 
13
  import gradio as gr
14
- from dill.pointers import parents
15
  from huggingface_hub import snapshot_download
16
  import numpy as np
17
  import torch
@@ -114,13 +112,14 @@ def main():
114
 
115
  examples_dir = Path(args.examples_dir)
116
  trained_model_dir = Path(args.trained_model_dir)
117
- trained_model_dir.mkdir(parents=True, exist_ok=True)
118
 
119
  # download models
120
- _ = snapshot_download(
121
- repo_id=args.models_repo_id,
122
- local_dir=trained_model_dir.as_posix()
123
- )
 
 
124
 
125
  # examples
126
  example_zip_file = trained_model_dir / "examples.zip"
@@ -138,6 +137,7 @@ def main():
138
  if model_name == "examples":
139
  continue
140
  model_choices.append(model_name)
 
141
 
142
  # examples
143
  examples = list()
@@ -187,6 +187,7 @@ def main():
187
  outputs=[c_label, c_probability],
188
  )
189
 
 
190
  blocks.queue().launch(
191
  share=False if platform.system() == "Windows" else False,
192
  server_name="127.0.0.1" if platform.system() == "Windows" else "0.0.0.0",
 
2
  # -*- coding: utf-8 -*-
3
  import argparse
4
  from functools import lru_cache
 
5
  from pathlib import Path
6
  import platform
7
  import shutil
 
10
  from typing import Tuple
11
 
12
  import gradio as gr
 
13
  from huggingface_hub import snapshot_download
14
  import numpy as np
15
  import torch
 
112
 
113
  examples_dir = Path(args.examples_dir)
114
  trained_model_dir = Path(args.trained_model_dir)
 
115
 
116
  # download models
117
+ if not trained_model_dir.exists():
118
+ trained_model_dir.mkdir(parents=True, exist_ok=True)
119
+ _ = snapshot_download(
120
+ repo_id=args.models_repo_id,
121
+ local_dir=trained_model_dir.as_posix()
122
+ )
123
 
124
  # examples
125
  example_zip_file = trained_model_dir / "examples.zip"
 
137
  if model_name == "examples":
138
  continue
139
  model_choices.append(model_name)
140
+ model_choices = list(sorted(model_choices))
141
 
142
  # examples
143
  examples = list()
 
187
  outputs=[c_label, c_probability],
188
  )
189
 
190
+ # http://127.0.0.1:7864/
191
  blocks.queue().launch(
192
  share=False if platform.system() == "Windows" else False,
193
  server_name="127.0.0.1" if platform.system() == "Windows" else "0.0.0.0",