Spaces:
Runtime error
Runtime error
update
Browse files
examples/vm_sound_classification/run.sh
CHANGED
@@ -5,18 +5,18 @@
|
|
5 |
sh run.sh --stage 0 --stop_stage 1 --system_version windows --file_folder_name file_dir --final_model_name vm_sound_classification4-ch16 \
|
6 |
--filename_patterns "E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/wav_finished/en-US/wav_finished/*/*.wav \
|
7 |
E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/id-ID/wav_finished/*/*.wav" \
|
|
|
8 |
|
9 |
sh run.sh --stage 2 --stop_stage 2 --system_version windows --file_folder_name file_dir --final_model_name vm_sound_classification4-ch16 \
|
10 |
--filename_patterns "E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/wav_finished/en-US/wav_finished/*/*.wav \
|
11 |
E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/id-ID/wav_finished/*/*.wav" \
|
|
|
12 |
|
|
|
|
|
13 |
|
14 |
-
sh run.sh --stage 2 --stop_stage
|
15 |
-
|
16 |
-
|
17 |
-
sh run.sh --stage 0 --stop_stage 5 --system_version centos --file_folder_name file_dir --final_model_name vm_sound_classification4-ch16 \
|
18 |
-
--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav"
|
19 |
-
|
20 |
|
21 |
"
|
22 |
|
@@ -33,6 +33,7 @@ work_dir="$(pwd)"
|
|
33 |
file_folder_name=file_folder_name
|
34 |
final_model_name=final_model_name
|
35 |
filename_patterns="/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav"
|
|
|
36 |
nohup_name=nohup.out
|
37 |
|
38 |
country=en-US
|
@@ -112,6 +113,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
|
|
112 |
--vocabulary_dir "${vocabulary_dir}" \
|
113 |
--train_dataset "${train_dataset}" \
|
114 |
--valid_dataset "${valid_dataset}" \
|
|
|
115 |
|
116 |
fi
|
117 |
|
|
|
5 |
sh run.sh --stage 0 --stop_stage 1 --system_version windows --file_folder_name file_dir --final_model_name vm_sound_classification4-ch16 \
|
6 |
--filename_patterns "E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/wav_finished/en-US/wav_finished/*/*.wav \
|
7 |
E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/id-ID/wav_finished/*/*.wav" \
|
8 |
+
--label_plan 4
|
9 |
|
10 |
sh run.sh --stage 2 --stop_stage 2 --system_version windows --file_folder_name file_dir --final_model_name vm_sound_classification4-ch16 \
|
11 |
--filename_patterns "E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/wav_finished/en-US/wav_finished/*/*.wav \
|
12 |
E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/id-ID/wav_finished/*/*.wav" \
|
13 |
+
--label_plan 4
|
14 |
|
15 |
+
sh run.sh --stage 2 --stop_stage 2 --system_version windows --file_folder_name file_dir --final_model_name vm_sound_classification3 --label_plan 4
|
16 |
+
sh run.sh --stage 3 --stop_stage 3 --system_version windows --file_folder_name file_dir --final_model_name vm_sound_classification3 --label_plan 4
|
17 |
|
18 |
+
sh run.sh --stage 2 --stop_stage 5 --system_version centos --file_folder_name file_dir --final_model_name vm_sound_classification4-ch16 \
|
19 |
+
--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" --label_plan 4
|
|
|
|
|
|
|
|
|
20 |
|
21 |
"
|
22 |
|
|
|
33 |
file_folder_name=file_folder_name
|
34 |
final_model_name=final_model_name
|
35 |
filename_patterns="/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav"
|
36 |
+
label_plan=4
|
37 |
nohup_name=nohup.out
|
38 |
|
39 |
country=en-US
|
|
|
113 |
--vocabulary_dir "${vocabulary_dir}" \
|
114 |
--train_dataset "${train_dataset}" \
|
115 |
--valid_dataset "${valid_dataset}" \
|
116 |
+
--label_plan "${label_plan}" \
|
117 |
|
118 |
fi
|
119 |
|
examples/vm_sound_classification/step_1_prepare_data.py
CHANGED
@@ -25,6 +25,8 @@ def get_args():
|
|
25 |
parser.add_argument("--train_dataset", default="train.xlsx", type=str)
|
26 |
parser.add_argument("--valid_dataset", default="valid.xlsx", type=str)
|
27 |
|
|
|
|
|
28 |
args = parser.parse_args()
|
29 |
return args
|
30 |
|
@@ -37,54 +39,60 @@ def get_dataset(args):
|
|
37 |
file_dir = Path(args.file_dir)
|
38 |
file_dir.mkdir(exist_ok=True)
|
39 |
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
|
89 |
result = list()
|
90 |
for filename_pattern in filename_patterns:
|
@@ -98,10 +106,10 @@ def get_dataset(args):
|
|
98 |
folder = filename.parts[-2]
|
99 |
country = filename.parts[-4]
|
100 |
|
101 |
-
if folder not in
|
102 |
continue
|
103 |
|
104 |
-
labels =
|
105 |
|
106 |
random1 = random.random()
|
107 |
random2 = random.random()
|
|
|
25 |
parser.add_argument("--train_dataset", default="train.xlsx", type=str)
|
26 |
parser.add_argument("--valid_dataset", default="valid.xlsx", type=str)
|
27 |
|
28 |
+
parser.add_argument("--label_plan", default="4", type=str)
|
29 |
+
|
30 |
args = parser.parse_args()
|
31 |
return args
|
32 |
|
|
|
39 |
file_dir = Path(args.file_dir)
|
40 |
file_dir.mkdir(exist_ok=True)
|
41 |
|
42 |
+
if args.label_plan == "2":
|
43 |
+
label_map = {
|
44 |
+
"bell": "non_voice",
|
45 |
+
"white_noise": "non_voice",
|
46 |
+
"low_white_noise": "non_voice",
|
47 |
+
"high_white_noise": "non_voice",
|
48 |
+
"music": "non_voice",
|
49 |
+
"mute": "non_voice",
|
50 |
+
"noise": "non_voice",
|
51 |
+
"noise_mute": "non_voice",
|
52 |
+
"voice": "voice",
|
53 |
+
"voicemail": "voice",
|
54 |
+
}
|
55 |
+
elif args.label_plan == "3":
|
56 |
+
label_map = {
|
57 |
+
"bell": "voicemail",
|
58 |
+
"white_noise": "mute",
|
59 |
+
"low_white_noise": "mute",
|
60 |
+
"high_white_noise": "mute",
|
61 |
+
# "music": "music",
|
62 |
+
"mute": "mute",
|
63 |
+
"noise": "voice_or_noise",
|
64 |
+
"noise_mute": "voice_or_noise",
|
65 |
+
"voice": "voice_or_noise",
|
66 |
+
"voicemail": "voicemail",
|
67 |
+
}
|
68 |
+
elif args.label_plan == "4":
|
69 |
+
label_map = {
|
70 |
+
"bell": "voicemail",
|
71 |
+
"white_noise": "mute",
|
72 |
+
"low_white_noise": "mute",
|
73 |
+
"high_white_noise": "mute",
|
74 |
+
# "music": "music",
|
75 |
+
"mute": "mute",
|
76 |
+
"noise": "noise",
|
77 |
+
"noise_mute": "noise",
|
78 |
+
"voice": "voice",
|
79 |
+
"voicemail": "voicemail",
|
80 |
+
}
|
81 |
+
elif args.label_plan == "8":
|
82 |
+
label_map = {
|
83 |
+
"bell": "bell",
|
84 |
+
"white_noise": "white_noise",
|
85 |
+
"low_white_noise": "white_noise",
|
86 |
+
"high_white_noise": "white_noise",
|
87 |
+
"music": "music",
|
88 |
+
"mute": "mute",
|
89 |
+
"noise": "noise",
|
90 |
+
"noise_mute": "noise_mute",
|
91 |
+
"voice": "voice",
|
92 |
+
"voicemail": "voicemail",
|
93 |
+
}
|
94 |
+
else:
|
95 |
+
raise AssertionError
|
96 |
|
97 |
result = list()
|
98 |
for filename_pattern in filename_patterns:
|
|
|
106 |
folder = filename.parts[-2]
|
107 |
country = filename.parts[-4]
|
108 |
|
109 |
+
if folder not in label_map.keys():
|
110 |
continue
|
111 |
|
112 |
+
labels = label_map[folder]
|
113 |
|
114 |
random1 = random.random()
|
115 |
random2 = random.random()
|
toolbox/torchaudio/augment/spec_augment.py
CHANGED
@@ -20,6 +20,7 @@ class SpecAugment(nn.Module):
|
|
20 |
@staticmethod
|
21 |
def augment_volume(spec: torch.Tensor, factor_range: Tuple[float, float] = (0.5, 2.0)):
|
22 |
factor = random.uniform(*factor_range)
|
|
|
23 |
spec_ = spec.clone().detach()
|
24 |
spec_ *= factor
|
25 |
return spec_
|
|
|
20 |
@staticmethod
|
21 |
def augment_volume(spec: torch.Tensor, factor_range: Tuple[float, float] = (0.5, 2.0)):
|
22 |
factor = random.uniform(*factor_range)
|
23 |
+
factor = torch.tensor(factor, dtype=torch.float32)
|
24 |
spec_ = spec.clone().detach()
|
25 |
spec_ *= factor
|
26 |
return spec_
|