update
Browse files
examples/fsmn_vad_by_webrtcvad/run.sh
CHANGED
@@ -2,18 +2,25 @@
|
|
2 |
|
3 |
: <<'END'
|
4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
bash run.sh --stage 1 --stop_stage 1 --system_version centos \
|
6 |
--file_folder_name fsmn-vad-by-webrtcvad-nx2-dns3 \
|
7 |
--final_model_name fsmn-vad-by-webrtcvad-nx2-dns3 \
|
8 |
-
--
|
9 |
-
--
|
10 |
/data/tianxing/HuggingDatasets/nx_noise/data/speech/nx-speech2/**/*.wav"
|
11 |
|
12 |
bash run.sh --stage 3 --stop_stage 3 --system_version centos \
|
13 |
--file_folder_name fsmn-vad-by-webrtcvad-nx2-dns3 \
|
14 |
--final_model_name fsmn-vad-by-webrtcvad-nx2-dns3 \
|
15 |
-
--
|
16 |
-
--
|
17 |
/data/tianxing/HuggingDatasets/nx_noise/data/speech/nx-speech2/**/*.wav"
|
18 |
|
19 |
|
@@ -89,10 +96,10 @@ $verbose && echo "system_version: ${system_version}"
|
|
89 |
$verbose && echo "file_folder_name: ${file_folder_name}"
|
90 |
|
91 |
if [ $system_version == "windows" ]; then
|
92 |
-
alias python3='D:/Users/tianx/PycharmProjects/virtualenv/
|
93 |
elif [ $system_version == "centos" ] || [ $system_version == "ubuntu" ]; then
|
94 |
-
#source /data/local/bin/
|
95 |
-
alias python3='/data/local/bin/
|
96 |
fi
|
97 |
|
98 |
|
|
|
2 |
|
3 |
: <<'END'
|
4 |
|
5 |
+
bash run.sh --stage 1 --stop_stage 1 --system_version windows \
|
6 |
+
--file_folder_name fsmn-vad-by-webrtcvad-nx2-dns3 \
|
7 |
+
--final_model_name fsmn-vad-by-webrtcvad-nx2-dns3 \
|
8 |
+
--noise_patterns "D:/Users/tianx/HuggingDatasets/nx_noise/data/noise/**/*.wav" \
|
9 |
+
--speech_patterns "D:/Users/tianx/HuggingDatasets/nx_noise/data/speech/**/*.wav"
|
10 |
+
|
11 |
+
|
12 |
bash run.sh --stage 1 --stop_stage 1 --system_version centos \
|
13 |
--file_folder_name fsmn-vad-by-webrtcvad-nx2-dns3 \
|
14 |
--final_model_name fsmn-vad-by-webrtcvad-nx2-dns3 \
|
15 |
+
--noise_patterns "/data/tianxing/HuggingDatasets/nx_noise/data/noise/**/*.wav" \
|
16 |
+
--speech_patterns "/data/tianxing/HuggingDatasets/nx_noise/data/speech/dns3-speech/**/*.wav \
|
17 |
/data/tianxing/HuggingDatasets/nx_noise/data/speech/nx-speech2/**/*.wav"
|
18 |
|
19 |
bash run.sh --stage 3 --stop_stage 3 --system_version centos \
|
20 |
--file_folder_name fsmn-vad-by-webrtcvad-nx2-dns3 \
|
21 |
--final_model_name fsmn-vad-by-webrtcvad-nx2-dns3 \
|
22 |
+
--noise_patterns "/data/tianxing/HuggingDatasets/nx_noise/data/noise/**/*.wav" \
|
23 |
+
--speech_patterns "/data/tianxing/HuggingDatasets/nx_noise/data/speech/dns3-speech/**/*.wav \
|
24 |
/data/tianxing/HuggingDatasets/nx_noise/data/speech/nx-speech2/**/*.wav"
|
25 |
|
26 |
|
|
|
96 |
$verbose && echo "file_folder_name: ${file_folder_name}"
|
97 |
|
98 |
if [ $system_version == "windows" ]; then
|
99 |
+
alias python3='D:/Users/tianx/PycharmProjects/virtualenv/cc_vad/Scripts/python.exe'
|
100 |
elif [ $system_version == "centos" ] || [ $system_version == "ubuntu" ]; then
|
101 |
+
#source /data/local/bin/cc_vad/bin/activate
|
102 |
+
alias python3='/data/local/bin/cc_vad/bin/python3'
|
103 |
fi
|
104 |
|
105 |
|
examples/fsmn_vad_by_webrtcvad/step_1_prepare_data.py
CHANGED
@@ -35,7 +35,7 @@ def get_args():
|
|
35 |
parser.add_argument("--valid_dataset", default="valid.jsonl", type=str)
|
36 |
|
37 |
parser.add_argument("--duration", default=8.0, type=float)
|
38 |
-
parser.add_argument("--min_speech_duration", default=
|
39 |
parser.add_argument("--max_speech_duration", default=8.0, type=float)
|
40 |
parser.add_argument("--min_snr_db", default=-10, type=float)
|
41 |
parser.add_argument("--max_snr_db", default=20, type=float)
|
@@ -56,7 +56,7 @@ def target_second_noise_signal_generator(filename_patterns: List[str],
|
|
56 |
|
57 |
for epoch_idx in range(max_epoch):
|
58 |
for filename_pattern in filename_patterns:
|
59 |
-
for filename in glob(filename_pattern):
|
60 |
signal, _ = librosa.load(filename, sr=sample_rate)
|
61 |
|
62 |
if signal.ndim != 1:
|
@@ -109,7 +109,7 @@ def target_second_speech_signal_generator(filename_patterns: List[str],
|
|
109 |
sample_rate: int = 8000, max_epoch: int = 1):
|
110 |
for epoch_idx in range(max_epoch):
|
111 |
for filename_pattern in filename_patterns:
|
112 |
-
for filename in glob(filename_pattern):
|
113 |
signal, _ = librosa.load(filename, sr=sample_rate)
|
114 |
raw_duration = librosa.get_duration(y=signal, sr=sample_rate)
|
115 |
|
|
|
35 |
parser.add_argument("--valid_dataset", default="valid.jsonl", type=str)
|
36 |
|
37 |
parser.add_argument("--duration", default=8.0, type=float)
|
38 |
+
parser.add_argument("--min_speech_duration", default=4.0, type=float)
|
39 |
parser.add_argument("--max_speech_duration", default=8.0, type=float)
|
40 |
parser.add_argument("--min_snr_db", default=-10, type=float)
|
41 |
parser.add_argument("--max_snr_db", default=20, type=float)
|
|
|
56 |
|
57 |
for epoch_idx in range(max_epoch):
|
58 |
for filename_pattern in filename_patterns:
|
59 |
+
for filename in glob(filename_pattern, recursive=True):
|
60 |
signal, _ = librosa.load(filename, sr=sample_rate)
|
61 |
|
62 |
if signal.ndim != 1:
|
|
|
109 |
sample_rate: int = 8000, max_epoch: int = 1):
|
110 |
for epoch_idx in range(max_epoch):
|
111 |
for filename_pattern in filename_patterns:
|
112 |
+
for filename in glob(filename_pattern, recursive=True):
|
113 |
signal, _ = librosa.load(filename, sr=sample_rate)
|
114 |
raw_duration = librosa.get_duration(y=signal, sr=sample_rate)
|
115 |
|