HoneyTian commited on
Commit
35035c8
·
1 Parent(s): 48776cd
examples/fsmn_vad_by_webrtcvad/run.sh CHANGED
@@ -2,18 +2,25 @@
2
 
3
  : <<'END'
4
 
 
 
 
 
 
 
 
5
  bash run.sh --stage 1 --stop_stage 1 --system_version centos \
6
  --file_folder_name fsmn-vad-by-webrtcvad-nx2-dns3 \
7
  --final_model_name fsmn-vad-by-webrtcvad-nx2-dns3 \
8
- --noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise/**/*.wav" \
9
- --speech_dir "/data/tianxing/HuggingDatasets/nx_noise/data/speech/dns3-speech/**/*.wav \
10
  /data/tianxing/HuggingDatasets/nx_noise/data/speech/nx-speech2/**/*.wav"
11
 
12
  bash run.sh --stage 3 --stop_stage 3 --system_version centos \
13
  --file_folder_name fsmn-vad-by-webrtcvad-nx2-dns3 \
14
  --final_model_name fsmn-vad-by-webrtcvad-nx2-dns3 \
15
- --noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise/**/*.wav" \
16
- --speech_dir "/data/tianxing/HuggingDatasets/nx_noise/data/speech/dns3-speech/**/*.wav \
17
  /data/tianxing/HuggingDatasets/nx_noise/data/speech/nx-speech2/**/*.wav"
18
 
19
 
@@ -89,10 +96,10 @@ $verbose && echo "system_version: ${system_version}"
89
  $verbose && echo "file_folder_name: ${file_folder_name}"
90
 
91
  if [ $system_version == "windows" ]; then
92
- alias python3='D:/Users/tianx/PycharmProjects/virtualenv/nx_denoise/Scripts/python.exe'
93
  elif [ $system_version == "centos" ] || [ $system_version == "ubuntu" ]; then
94
- #source /data/local/bin/nx_denoise/bin/activate
95
- alias python3='/data/local/bin/nx_denoise/bin/python3'
96
  fi
97
 
98
 
 
2
 
3
  : <<'END'
4
 
5
+ bash run.sh --stage 1 --stop_stage 1 --system_version windows \
6
+ --file_folder_name fsmn-vad-by-webrtcvad-nx2-dns3 \
7
+ --final_model_name fsmn-vad-by-webrtcvad-nx2-dns3 \
8
+ --noise_patterns "D:/Users/tianx/HuggingDatasets/nx_noise/data/noise/**/*.wav" \
9
+ --speech_patterns "D:/Users/tianx/HuggingDatasets/nx_noise/data/speech/**/*.wav"
10
+
11
+
12
  bash run.sh --stage 1 --stop_stage 1 --system_version centos \
13
  --file_folder_name fsmn-vad-by-webrtcvad-nx2-dns3 \
14
  --final_model_name fsmn-vad-by-webrtcvad-nx2-dns3 \
15
+ --noise_patterns "/data/tianxing/HuggingDatasets/nx_noise/data/noise/**/*.wav" \
16
+ --speech_patterns "/data/tianxing/HuggingDatasets/nx_noise/data/speech/dns3-speech/**/*.wav \
17
  /data/tianxing/HuggingDatasets/nx_noise/data/speech/nx-speech2/**/*.wav"
18
 
19
  bash run.sh --stage 3 --stop_stage 3 --system_version centos \
20
  --file_folder_name fsmn-vad-by-webrtcvad-nx2-dns3 \
21
  --final_model_name fsmn-vad-by-webrtcvad-nx2-dns3 \
22
+ --noise_patterns "/data/tianxing/HuggingDatasets/nx_noise/data/noise/**/*.wav" \
23
+ --speech_patterns "/data/tianxing/HuggingDatasets/nx_noise/data/speech/dns3-speech/**/*.wav \
24
  /data/tianxing/HuggingDatasets/nx_noise/data/speech/nx-speech2/**/*.wav"
25
 
26
 
 
96
  $verbose && echo "file_folder_name: ${file_folder_name}"
97
 
98
  if [ $system_version == "windows" ]; then
99
+ alias python3='D:/Users/tianx/PycharmProjects/virtualenv/cc_vad/Scripts/python.exe'
100
  elif [ $system_version == "centos" ] || [ $system_version == "ubuntu" ]; then
101
+ #source /data/local/bin/cc_vad/bin/activate
102
+ alias python3='/data/local/bin/cc_vad/bin/python3'
103
  fi
104
 
105
 
examples/fsmn_vad_by_webrtcvad/step_1_prepare_data.py CHANGED
@@ -35,7 +35,7 @@ def get_args():
35
  parser.add_argument("--valid_dataset", default="valid.jsonl", type=str)
36
 
37
  parser.add_argument("--duration", default=8.0, type=float)
38
- parser.add_argument("--min_speech_duration", default=6.0, type=float)
39
  parser.add_argument("--max_speech_duration", default=8.0, type=float)
40
  parser.add_argument("--min_snr_db", default=-10, type=float)
41
  parser.add_argument("--max_snr_db", default=20, type=float)
@@ -56,7 +56,7 @@ def target_second_noise_signal_generator(filename_patterns: List[str],
56
 
57
  for epoch_idx in range(max_epoch):
58
  for filename_pattern in filename_patterns:
59
- for filename in glob(filename_pattern):
60
  signal, _ = librosa.load(filename, sr=sample_rate)
61
 
62
  if signal.ndim != 1:
@@ -109,7 +109,7 @@ def target_second_speech_signal_generator(filename_patterns: List[str],
109
  sample_rate: int = 8000, max_epoch: int = 1):
110
  for epoch_idx in range(max_epoch):
111
  for filename_pattern in filename_patterns:
112
- for filename in glob(filename_pattern):
113
  signal, _ = librosa.load(filename, sr=sample_rate)
114
  raw_duration = librosa.get_duration(y=signal, sr=sample_rate)
115
 
 
35
  parser.add_argument("--valid_dataset", default="valid.jsonl", type=str)
36
 
37
  parser.add_argument("--duration", default=8.0, type=float)
38
+ parser.add_argument("--min_speech_duration", default=4.0, type=float)
39
  parser.add_argument("--max_speech_duration", default=8.0, type=float)
40
  parser.add_argument("--min_snr_db", default=-10, type=float)
41
  parser.add_argument("--max_snr_db", default=20, type=float)
 
56
 
57
  for epoch_idx in range(max_epoch):
58
  for filename_pattern in filename_patterns:
59
+ for filename in glob(filename_pattern, recursive=True):
60
  signal, _ = librosa.load(filename, sr=sample_rate)
61
 
62
  if signal.ndim != 1:
 
109
  sample_rate: int = 8000, max_epoch: int = 1):
110
  for epoch_idx in range(max_epoch):
111
  for filename_pattern in filename_patterns:
112
+ for filename in glob(filename_pattern, recursive=True):
113
  signal, _ = librosa.load(filename, sr=sample_rate)
114
  raw_duration = librosa.get_duration(y=signal, sr=sample_rate)
115