HoneyTian commited on
Commit
6e26705
·
1 Parent(s): 463adfc
examples/vm_sound_classification/run.sh CHANGED
@@ -5,18 +5,18 @@
5
  sh run.sh --stage 0 --stop_stage 1 --system_version windows --file_folder_name file_dir --final_model_name vm_sound_classification4-ch16 \
6
  --filename_patterns "E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/wav_finished/en-US/wav_finished/*/*.wav \
7
  E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/id-ID/wav_finished/*/*.wav" \
 
8
 
9
  sh run.sh --stage 2 --stop_stage 2 --system_version windows --file_folder_name file_dir --final_model_name vm_sound_classification4-ch16 \
10
  --filename_patterns "E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/wav_finished/en-US/wav_finished/*/*.wav \
11
  E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/id-ID/wav_finished/*/*.wav" \
 
12
 
 
 
13
 
14
- sh run.sh --stage 2 --stop_stage 2 --system_version windows --file_folder_name file_dir --final_model_name vm_sound_classification3
15
- sh run.sh --stage 3 --stop_stage 3 --system_version windows --file_folder_name file_dir --final_model_name vm_sound_classification3
16
-
17
- sh run.sh --stage 0 --stop_stage 5 --system_version centos --file_folder_name file_dir --final_model_name vm_sound_classification4-ch16 \
18
- --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav"
19
-
20
 
21
  "
22
 
@@ -33,6 +33,7 @@ work_dir="$(pwd)"
33
  file_folder_name=file_folder_name
34
  final_model_name=final_model_name
35
  filename_patterns="/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav"
 
36
  nohup_name=nohup.out
37
 
38
  country=en-US
@@ -112,6 +113,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
112
  --vocabulary_dir "${vocabulary_dir}" \
113
  --train_dataset "${train_dataset}" \
114
  --valid_dataset "${valid_dataset}" \
 
115
 
116
  fi
117
 
 
5
  sh run.sh --stage 0 --stop_stage 1 --system_version windows --file_folder_name file_dir --final_model_name vm_sound_classification4-ch16 \
6
  --filename_patterns "E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/wav_finished/en-US/wav_finished/*/*.wav \
7
  E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/id-ID/wav_finished/*/*.wav" \
8
+ --label_plan 4
9
 
10
  sh run.sh --stage 2 --stop_stage 2 --system_version windows --file_folder_name file_dir --final_model_name vm_sound_classification4-ch16 \
11
  --filename_patterns "E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/wav_finished/en-US/wav_finished/*/*.wav \
12
  E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/id-ID/wav_finished/*/*.wav" \
13
+ --label_plan 4
14
 
15
+ sh run.sh --stage 2 --stop_stage 2 --system_version windows --file_folder_name file_dir --final_model_name vm_sound_classification3 --label_plan 4
16
+ sh run.sh --stage 3 --stop_stage 3 --system_version windows --file_folder_name file_dir --final_model_name vm_sound_classification3 --label_plan 4
17
 
18
+ sh run.sh --stage 2 --stop_stage 5 --system_version centos --file_folder_name file_dir --final_model_name vm_sound_classification4-ch16 \
19
+ --filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" --label_plan 4
 
 
 
 
20
 
21
  "
22
 
 
33
  file_folder_name=file_folder_name
34
  final_model_name=final_model_name
35
  filename_patterns="/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav"
36
+ label_plan=4
37
  nohup_name=nohup.out
38
 
39
  country=en-US
 
113
  --vocabulary_dir "${vocabulary_dir}" \
114
  --train_dataset "${train_dataset}" \
115
  --valid_dataset "${valid_dataset}" \
116
+ --label_plan "${label_plan}" \
117
 
118
  fi
119
 
examples/vm_sound_classification/step_1_prepare_data.py CHANGED
@@ -25,6 +25,8 @@ def get_args():
25
  parser.add_argument("--train_dataset", default="train.xlsx", type=str)
26
  parser.add_argument("--valid_dataset", default="valid.xlsx", type=str)
27
 
 
 
28
  args = parser.parse_args()
29
  return args
30
 
@@ -37,54 +39,60 @@ def get_dataset(args):
37
  file_dir = Path(args.file_dir)
38
  file_dir.mkdir(exist_ok=True)
39
 
40
- # label2_map = {
41
- # "bell": "non_voice",
42
- # "white_noise": "non_voice",
43
- # "low_white_noise": "non_voice",
44
- # "high_white_noise": "non_voice",
45
- # "music": "non_voice",
46
- # "mute": "non_voice",
47
- # "noise": "non_voice",
48
- # "noise_mute": "non_voice",
49
- # "voice": "voice",
50
- # "voicemail": "voice",
51
- # }
52
- # label3_map = {
53
- # "bell": "voicemail",
54
- # "white_noise": "mute",
55
- # "low_white_noise": "mute",
56
- # "high_white_noise": "mute",
57
- # # "music": "music",
58
- # "mute": "mute",
59
- # "noise": "voice_or_noise",
60
- # "noise_mute": "voice_or_noise",
61
- # "voice": "voice_or_noise",
62
- # "voicemail": "voicemail",
63
- # }
64
- label4_map = {
65
- "bell": "voicemail",
66
- "white_noise": "mute",
67
- "low_white_noise": "mute",
68
- "high_white_noise": "mute",
69
- # "music": "music",
70
- "mute": "mute",
71
- "noise": "noise",
72
- "noise_mute": "noise",
73
- "voice": "voice",
74
- "voicemail": "voicemail",
75
- }
76
- # label8_map = {
77
- # "bell": "bell",
78
- # "white_noise": "white_noise",
79
- # "low_white_noise": "white_noise",
80
- # "high_white_noise": "white_noise",
81
- # "music": "music",
82
- # "mute": "mute",
83
- # "noise": "noise",
84
- # "noise_mute": "noise_mute",
85
- # "voice": "voice",
86
- # "voicemail": "voicemail",
87
- # }
 
 
 
 
 
 
88
 
89
  result = list()
90
  for filename_pattern in filename_patterns:
@@ -98,10 +106,10 @@ def get_dataset(args):
98
  folder = filename.parts[-2]
99
  country = filename.parts[-4]
100
 
101
- if folder not in label4_map.keys():
102
  continue
103
 
104
- labels = label4_map[folder]
105
 
106
  random1 = random.random()
107
  random2 = random.random()
 
25
  parser.add_argument("--train_dataset", default="train.xlsx", type=str)
26
  parser.add_argument("--valid_dataset", default="valid.xlsx", type=str)
27
 
28
+ parser.add_argument("--label_plan", default="4", type=str)
29
+
30
  args = parser.parse_args()
31
  return args
32
 
 
39
  file_dir = Path(args.file_dir)
40
  file_dir.mkdir(exist_ok=True)
41
 
42
+ if args.label_plan == "2":
43
+ label_map = {
44
+ "bell": "non_voice",
45
+ "white_noise": "non_voice",
46
+ "low_white_noise": "non_voice",
47
+ "high_white_noise": "non_voice",
48
+ "music": "non_voice",
49
+ "mute": "non_voice",
50
+ "noise": "non_voice",
51
+ "noise_mute": "non_voice",
52
+ "voice": "voice",
53
+ "voicemail": "voice",
54
+ }
55
+ elif args.label_plan == "3":
56
+ label_map = {
57
+ "bell": "voicemail",
58
+ "white_noise": "mute",
59
+ "low_white_noise": "mute",
60
+ "high_white_noise": "mute",
61
+ # "music": "music",
62
+ "mute": "mute",
63
+ "noise": "voice_or_noise",
64
+ "noise_mute": "voice_or_noise",
65
+ "voice": "voice_or_noise",
66
+ "voicemail": "voicemail",
67
+ }
68
+ elif args.label_plan == "4":
69
+ label_map = {
70
+ "bell": "voicemail",
71
+ "white_noise": "mute",
72
+ "low_white_noise": "mute",
73
+ "high_white_noise": "mute",
74
+ # "music": "music",
75
+ "mute": "mute",
76
+ "noise": "noise",
77
+ "noise_mute": "noise",
78
+ "voice": "voice",
79
+ "voicemail": "voicemail",
80
+ }
81
+ elif args.label_plan == "8":
82
+ label_map = {
83
+ "bell": "bell",
84
+ "white_noise": "white_noise",
85
+ "low_white_noise": "white_noise",
86
+ "high_white_noise": "white_noise",
87
+ "music": "music",
88
+ "mute": "mute",
89
+ "noise": "noise",
90
+ "noise_mute": "noise_mute",
91
+ "voice": "voice",
92
+ "voicemail": "voicemail",
93
+ }
94
+ else:
95
+ raise AssertionError
96
 
97
  result = list()
98
  for filename_pattern in filename_patterns:
 
106
  folder = filename.parts[-2]
107
  country = filename.parts[-4]
108
 
109
+ if folder not in label_map.keys():
110
  continue
111
 
112
+ labels = label_map[folder]
113
 
114
  random1 = random.random()
115
  random2 = random.random()
toolbox/torchaudio/augment/spec_augment.py CHANGED
@@ -20,6 +20,7 @@ class SpecAugment(nn.Module):
20
  @staticmethod
21
  def augment_volume(spec: torch.Tensor, factor_range: Tuple[float, float] = (0.5, 2.0)):
22
  factor = random.uniform(*factor_range)
 
23
  spec_ = spec.clone().detach()
24
  spec_ *= factor
25
  return spec_
 
20
  @staticmethod
21
  def augment_volume(spec: torch.Tensor, factor_range: Tuple[float, float] = (0.5, 2.0)):
22
  factor = random.uniform(*factor_range)
23
+ factor = torch.tensor(factor, dtype=torch.float32)
24
  spec_ = spec.clone().detach()
25
  spec_ *= factor
26
  return spec_