File size: 4,569 Bytes
9829721 de43449 d0a5aa2 48776cd d0a5aa2 9829721 48776cd 9829721 7f331d5 9829721 48776cd 9829721 7f331d5 9829721 7f331d5 9829721 7f331d5 d87e440 7f331d5 9829721 d87e440 84babf6 9829721 84babf6 9829721 84babf6 9829721 d87e440 9829721 d87e440 9829721 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
#!/usr/bin/env bash
: <<'END'
bash run.sh --stage 3 --stop_stage 5 --system_version centos \
--file_folder_name silero-vad-by-webrtcvad-nx2-dns3 \
--final_model_name silero-vad-by-webrtcvad-nx2-dns3 \
--noise_patterns "/data/tianxing/HuggingDatasets/nx_noise/data/noise/**/*.wav" \
--speech_patterns "/data/tianxing/HuggingDatasets/nx_noise/data/speech/dns3-speech/**/*.wav \
/data/tianxing/HuggingDatasets/nx_noise/data/speech/nx-speech2/**/*.wav"
END
# params
system_version="windows";
verbose=true;
stage=0 # start from 0 if you need to start from data preparation
stop_stage=9
work_dir="$(pwd)"
file_folder_name=file_folder_name
final_model_name=final_model_name
config_file="yaml/config.yaml"
limit=10
noise_patterns=/data/tianxing/HuggingDatasets/nx_noise/data/noise/**/*.wav
speech_patterns=/data/tianxing/HuggingDatasets/nx_noise/data/speech/**/*.wav
max_count=-1
nohup_name=nohup.out
# model params
batch_size=64
max_epochs=200
save_top_k=10
patience=5
# parse options
while true; do
[ -z "${1:-}" ] && break; # break if there are no arguments
case "$1" in
--*) name=$(echo "$1" | sed s/^--// | sed s/-/_/g);
eval '[ -z "${'"$name"'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
old_value="(eval echo \\$$name)";
if [ "${old_value}" == "true" ] || [ "${old_value}" == "false" ]; then
was_bool=true;
else
was_bool=false;
fi
# Set the variable to the right value-- the escaped quotes make it work if
# the option had spaces, like --cmd "queue.pl -sync y"
eval "${name}=\"$2\"";
# Check that Boolean-valued arguments are really Boolean.
if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
exit 1;
fi
shift 2;
;;
*) break;
esac
done
file_dir="${work_dir}/${file_folder_name}"
final_model_dir="${work_dir}/../../trained_models/${final_model_name}";
evaluation_audio_dir="${file_dir}/evaluation_audio"
train_dataset="${file_dir}/train.jsonl"
valid_dataset="${file_dir}/valid.jsonl"
train_vad_dataset="${file_dir}/train-vad.jsonl"
valid_vad_dataset="${file_dir}/valid-vad.jsonl"
$verbose && echo "system_version: ${system_version}"
$verbose && echo "file_folder_name: ${file_folder_name}"
if [ $system_version == "windows" ]; then
alias python3='D:/Users/tianx/PycharmProjects/virtualenv/nx_denoise/Scripts/python.exe'
elif [ $system_version == "centos" ] || [ $system_version == "ubuntu" ]; then
#source /data/local/bin/nx_denoise/bin/activate
alias python3='/data/local/bin/nx_denoise/bin/python3'
fi
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
$verbose && echo "stage 1: prepare data"
cd "${work_dir}" || exit 1
python3 step_1_prepare_data.py \
--noise_patterns "${noise_patterns}" \
--speech_patterns "${speech_patterns}" \
--train_dataset "${train_dataset}" \
--valid_dataset "${valid_dataset}" \
--max_count "${max_count}" \
fi
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
$verbose && echo "stage 2: make vad segments"
cd "${work_dir}" || exit 1
python3 step_2_make_vad_segments.py \
--train_dataset "${train_dataset}" \
--valid_dataset "${valid_dataset}" \
--train_vad_dataset "${train_vad_dataset}" \
--valid_vad_dataset "${valid_vad_dataset}" \
fi
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
$verbose && echo "stage 3: train model"
cd "${work_dir}" || exit 1
python3 step_4_train_model.py \
--train_dataset "${train_vad_dataset}" \
--valid_dataset "${valid_vad_dataset}" \
--serialization_dir "${file_dir}" \
--config_file "${config_file}" \
fi
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
$verbose && echo "stage 4: export model"
cd "${work_dir}" || exit 1
python3 step_5_export_model.py \
--model_dir "${file_dir}/best" \
--output_dir "${file_dir}/best" \
fi
if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
$verbose && echo "stage 5: collect files"
cd "${work_dir}" || exit 1
mkdir -p ${final_model_dir}
cp "${file_dir}/best"/* "${final_model_dir}"
cd "${final_model_dir}/.." || exit 1;
if [ -e "${final_model_name}.zip" ]; then
rm -rf "${final_model_name}_backup.zip"
mv "${final_model_name}.zip" "${final_model_name}_backup.zip"
fi
zip -r "${final_model_name}.zip" "${final_model_name}"
rm -rf "${final_model_name}"
fi
if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
$verbose && echo "stage 6: clear file_dir"
cd "${work_dir}" || exit 1
rm -rf "${file_dir}";
fi
|