File size: 6,307 Bytes
69ad385
 
 
 
07f87a8
463adfc
 
6e26705
463adfc
07f87a8
463adfc
 
6e26705
69ad385
07f87a8
 
 
 
349ff6e
07f87a8
 
 
6f7cbba
69ad385
 
 
 
 
 
 
 
 
 
 
 
 
 
6e26705
07f87a8
6f7cbba
69ad385
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d6c27e
69ad385
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
07f87a8
6f7cbba
69ad385
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
#!/usr/bin/env bash

: <<'END'

sh run.sh --stage 0 --stop_stage 1 --system_version windows --file_folder_name file_dir --final_model_name sound-4-ch32 \
--filename_patterns "E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/wav_finished/en-US/wav_finished/*/*.wav \
E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/id-ID/wav_finished/*/*.wav" \
--label_plan 4

sh run.sh --stage 2 --stop_stage 2 --system_version windows --file_folder_name file_dir --final_model_name sound-2-ch32 \
--filename_patterns "E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/wav_finished/en-US/wav_finished/*/*.wav \
E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/id-ID/wav_finished/*/*.wav" \
--label_plan 4

sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-ch32 \
--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
--label_plan 3 \
--config_file "yaml/conv2d-classifier-3-ch4.yaml"

sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ms-my-2-ch32 \
--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ms-MY/wav_finished/*/*.wav" \
--label_plan 2-voicemail \
--config_file "yaml/conv2d-classifier-2-ch32.yaml"

END


# params
system_version="windows";
verbose=true;
stage=0 # start from 0 if you need to start from data preparation
stop_stage=9

work_dir="$(pwd)"
file_folder_name=file_folder_name
final_model_name=final_model_name
filename_patterns="/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav"
label_plan=4
config_file="yaml/conv2d-classifier-2-ch4.yaml"
pretrained_model=null
nohup_name=nohup.out

country=en-US

# model params
batch_size=64
max_epochs=200
save_top_k=10
patience=5


# parse options
while true; do
  [ -z "${1:-}" ] && break;  # break if there are no arguments
  case "$1" in
    --*) name=$(echo "$1" | sed s/^--// | sed s/-/_/g);
      eval '[ -z "${'"$name"'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
      old_value="(eval echo \\$$name)";
      if [ "${old_value}" == "true" ] || [ "${old_value}" == "false" ]; then
        was_bool=true;
      else
        was_bool=false;
      fi

      # Set the variable to the right value-- the escaped quotes make it work if
      # the option had spaces, like --cmd "queue.pl -sync y"
      eval "${name}=\"$2\"";

      # Check that Boolean-valued arguments are really Boolean.
      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
        echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
        exit 1;
      fi
      shift 2;
      ;;

    *) break;
  esac
done

file_dir="${work_dir}/${file_folder_name}"
final_model_dir="${work_dir}/../../trained_models/${final_model_name}";

dataset="${file_dir}/dataset.xlsx"
train_dataset="${file_dir}/train.xlsx"
valid_dataset="${file_dir}/valid.xlsx"
evaluation_file="${file_dir}/evaluation.xlsx"
vocabulary_dir="${file_dir}/vocabulary"

$verbose && echo "system_version: ${system_version}"
$verbose && echo "file_folder_name: ${file_folder_name}"

if [ $system_version == "windows" ]; then
  alias python3='D:/Users/tianx/PycharmProjects/virtualenv/vm_sound_classification/Scripts/python.exe'
elif [ $system_version == "centos" ] || [ $system_version == "ubuntu" ]; then
  #source /data/local/bin/vm_sound_classification/bin/activate
  alias python3='/data/local/bin/vm_sound_classification/bin/python3'
fi


if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
  $verbose && echo "stage 0: prepare data"
  cd "${work_dir}" || exit 1
  python3 step_1_prepare_data.py \
  --file_dir "${file_dir}" \
  --filename_patterns "${filename_patterns}" \
  --train_dataset "${train_dataset}" \
  --valid_dataset "${valid_dataset}" \
  --label_plan "${label_plan}" \

fi


if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
  $verbose && echo "stage 1: make vocabulary"
  cd "${work_dir}" || exit 1
  python3 step_2_make_vocabulary.py \
  --vocabulary_dir "${vocabulary_dir}" \
  --train_dataset "${train_dataset}" \
  --valid_dataset "${valid_dataset}" \

fi


if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
  $verbose && echo "stage 2: train model"
  cd "${work_dir}" || exit 1
  python3 step_3_train_model.py \
  --vocabulary_dir "${vocabulary_dir}" \
  --train_dataset "${train_dataset}" \
  --valid_dataset "${valid_dataset}" \
  --serialization_dir "${file_dir}" \
  --config_file "${config_file}" \
  --pretrained_model "${pretrained_model}" \

fi


if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
  $verbose && echo "stage 3: test model"
  cd "${work_dir}" || exit 1
  python3 step_4_evaluation_model.py \
  --dataset "${dataset}" \
  --vocabulary_dir "${vocabulary_dir}" \
  --model_dir "${file_dir}/best" \
  --output_file "${evaluation_file}" \

fi


if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
  $verbose && echo "stage 4: export model"
  cd "${work_dir}" || exit 1
  python3 step_5_export_models.py \
  --vocabulary_dir "${vocabulary_dir}" \
  --model_dir "${file_dir}/best" \
  --serialization_dir "${file_dir}" \

fi


if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
  $verbose && echo "stage 5: collect files"
  cd "${work_dir}" || exit 1

  mkdir -p ${final_model_dir}

  cp "${file_dir}/best"/* "${final_model_dir}"
  cp -r "${file_dir}/vocabulary" "${final_model_dir}"

  cp "${file_dir}/evaluation.xlsx" "${final_model_dir}/evaluation.xlsx"

  cp "${file_dir}/trace_model.zip" "${final_model_dir}/trace_model.zip"
  cp "${file_dir}/trace_quant_model.zip" "${final_model_dir}/trace_quant_model.zip"
  cp "${file_dir}/script_model.zip" "${final_model_dir}/script_model.zip"
  cp "${file_dir}/script_quant_model.zip" "${final_model_dir}/script_quant_model.zip"

  cd "${final_model_dir}/.." || exit 1;

  if [ -e "${final_model_name}.zip" ]; then
    rm -rf "${final_model_name}_backup.zip"
    mv "${final_model_name}.zip" "${final_model_name}_backup.zip"
  fi

  zip -r "${final_model_name}.zip" "${final_model_name}"
  rm -rf "${final_model_name}"

fi


if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
  $verbose && echo "stage 6: clear file_dir"
  cd "${work_dir}" || exit 1

  rm -rf "${file_dir}";

fi