Spaces:
Sleeping
Sleeping
update
Browse files
examples/sample_filter/bad_case_find.py
CHANGED
@@ -13,8 +13,8 @@ def get_args():
|
|
13 |
parser.add_argument(
|
14 |
"--data_dir",
|
15 |
# default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\data",
|
16 |
-
default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\us-
|
17 |
-
|
18 |
type=str
|
19 |
)
|
20 |
parser.add_argument(
|
@@ -37,20 +37,20 @@ def main():
|
|
37 |
data_dir = Path(args.data_dir)
|
38 |
keep_dir = Path(args.keep_dir)
|
39 |
keep_dir.mkdir(parents=True, exist_ok=True)
|
40 |
-
trash_dir = Path(args.trash_dir)
|
41 |
-
trash_dir.mkdir(parents=True, exist_ok=True)
|
42 |
|
43 |
client = Client("http://127.0.0.1:7864/")
|
44 |
|
45 |
for idx, filename in tqdm(enumerate(data_dir.glob("*.wav"))):
|
46 |
-
# if idx <
|
47 |
# continue
|
48 |
filename = filename.as_posix()
|
49 |
|
50 |
label1, prob1 = client.predict(
|
51 |
audio=handle_file(filename),
|
52 |
# model_name="vm_sound_classification8-ch32",
|
53 |
-
model_name="voicemail-
|
54 |
ground_true="Hello!!",
|
55 |
api_name="/click_button"
|
56 |
)
|
@@ -65,9 +65,9 @@ def main():
|
|
65 |
)
|
66 |
prob2 = float(prob2)
|
67 |
|
68 |
-
if label1 == "voicemail" and label2 in ("voicemail", "bell") and
|
69 |
pass
|
70 |
-
elif label1 == "non_voicemail" and label2 not in ("voicemail", "bell") and
|
71 |
pass
|
72 |
else:
|
73 |
print(f"label1: {label1}, prob1: {prob1}, label2: {label2}, prob2: {prob2}")
|
|
|
13 |
parser.add_argument(
|
14 |
"--data_dir",
|
15 |
# default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\data",
|
16 |
+
# default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\us-3",
|
17 |
+
default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\transfer",
|
18 |
type=str
|
19 |
)
|
20 |
parser.add_argument(
|
|
|
37 |
data_dir = Path(args.data_dir)
|
38 |
keep_dir = Path(args.keep_dir)
|
39 |
keep_dir.mkdir(parents=True, exist_ok=True)
|
40 |
+
# trash_dir = Path(args.trash_dir)
|
41 |
+
# trash_dir.mkdir(parents=True, exist_ok=True)
|
42 |
|
43 |
client = Client("http://127.0.0.1:7864/")
|
44 |
|
45 |
for idx, filename in tqdm(enumerate(data_dir.glob("*.wav"))):
|
46 |
+
# if idx < 400:
|
47 |
# continue
|
48 |
filename = filename.as_posix()
|
49 |
|
50 |
label1, prob1 = client.predict(
|
51 |
audio=handle_file(filename),
|
52 |
# model_name="vm_sound_classification8-ch32",
|
53 |
+
model_name="voicemail-id-id-2-ch32",
|
54 |
ground_true="Hello!!",
|
55 |
api_name="/click_button"
|
56 |
)
|
|
|
65 |
)
|
66 |
prob2 = float(prob2)
|
67 |
|
68 |
+
if label1 == "voicemail" and label2 in ("voicemail", "bell") and prob1 > 1.0:
|
69 |
pass
|
70 |
+
elif label1 == "non_voicemail" and label2 not in ("voicemail", "bell") and prob1 > 0.6:
|
71 |
pass
|
72 |
else:
|
73 |
print(f"label1: {label1}, prob1: {prob1}, label2: {label2}, prob2: {prob2}")
|
examples/sample_filter/wav_find_by_task_excel.py
CHANGED
@@ -12,19 +12,49 @@ from tqdm import tqdm
|
|
12 |
from project_settings import project_path
|
13 |
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
def get_args():
|
16 |
parser = argparse.ArgumentParser()
|
17 |
parser.add_argument(
|
18 |
"--task_file",
|
19 |
-
default=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
type=str
|
21 |
)
|
22 |
parser.add_argument(
|
23 |
"--wav_dir",
|
24 |
-
|
25 |
# default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\temp-2\temp\VoiceAppVoicemailDetection-1",
|
26 |
# default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\temp-3\temp\VoiceAppVoicemailDetection-1",
|
27 |
-
default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\temp-4\temp\VoiceAppVoicemailDetection-1",
|
28 |
type=str
|
29 |
)
|
30 |
parser.add_argument(
|
|
|
12 |
from project_settings import project_path
|
13 |
|
14 |
|
15 |
+
"""
|
16 |
+
default="task_DC_Death_Cases_20250220.xlsx",
|
17 |
+
default="task_DC_Death_Cases_20250221.xlsx",
|
18 |
+
default="task_DC_Death_Cases_EC_UC_20250220.xlsx",
|
19 |
+
default="task_DC_Death_Cases_EC_UC_20250221.xlsx",
|
20 |
+
default="task_DcTask_1_ID_LIVE_20250220_20250220-1.xlsx",
|
21 |
+
default="task_DcTask_1_ID_LIVE_20250221_20250221-1.xlsx",
|
22 |
+
default="task_DcTask_3_ID_LIVE_20250220_20250220-1.xlsx",
|
23 |
+
default="task_DcTask_3_ID_LIVE_20250221_20250221-1.xlsx",
|
24 |
+
default="task_DcTask_5_ID_LIVE_20250220_20250220-1.xlsx",
|
25 |
+
default="task_DcTask_5_ID_LIVE_20250221_20250221-1.xlsx",
|
26 |
+
default="task_DcTask_9_ID_LIVE_20250220_20250220-1.xlsx",
|
27 |
+
default="task_DcTask_PTP_ID_LIVE_20250220_20250220-1.xlsx",
|
28 |
+
default="task_DcTask_PTP_ID_LIVE_20250220_20250220-1 (1).xlsx",
|
29 |
+
default="task_DcTask_PTP_ID_LIVE_20250221_20250221-1.xlsx",
|
30 |
+
"""
|
31 |
+
|
32 |
def get_args():
|
33 |
parser = argparse.ArgumentParser()
|
34 |
parser.add_argument(
|
35 |
"--task_file",
|
36 |
+
# default="task_DC_Death_Cases_20250220.xlsx",
|
37 |
+
# default="task_DC_Death_Cases_20250221.xlsx",
|
38 |
+
# default="task_DC_Death_Cases_EC_UC_20250220.xlsx",
|
39 |
+
# default="task_DC_Death_Cases_EC_UC_20250221.xlsx",
|
40 |
+
# default="task_DcTask_1_ID_LIVE_20250220_20250220-1.xlsx",
|
41 |
+
# default="task_DcTask_1_ID_LIVE_20250221_20250221-1.xlsx",
|
42 |
+
# default="task_DcTask_3_ID_LIVE_20250220_20250220-1.xlsx",
|
43 |
+
# default="task_DcTask_3_ID_LIVE_20250221_20250221-1.xlsx",
|
44 |
+
# default="task_DcTask_5_ID_LIVE_20250220_20250220-1.xlsx",
|
45 |
+
# default="task_DcTask_5_ID_LIVE_20250221_20250221-1.xlsx",
|
46 |
+
# default="task_DcTask_9_ID_LIVE_20250220_20250220-1.xlsx",
|
47 |
+
# default="task_DcTask_PTP_ID_LIVE_20250220_20250220-1.xlsx",
|
48 |
+
# default="task_DcTask_PTP_ID_LIVE_20250220_20250220-1 (1).xlsx",
|
49 |
+
default="task_DcTask_PTP_ID_LIVE_20250221_20250221-1.xlsx",
|
50 |
type=str
|
51 |
)
|
52 |
parser.add_argument(
|
53 |
"--wav_dir",
|
54 |
+
default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\id",
|
55 |
# default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\temp-2\temp\VoiceAppVoicemailDetection-1",
|
56 |
# default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\temp-3\temp\VoiceAppVoicemailDetection-1",
|
57 |
+
# default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\temp-4\temp\VoiceAppVoicemailDetection-1",
|
58 |
type=str
|
59 |
)
|
60 |
parser.add_argument(
|
examples/vm_sound_classification/run_batch.sh
CHANGED
@@ -143,13 +143,13 @@ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name fi
|
|
143 |
#--label_plan 2-voicemail \
|
144 |
#--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
|
145 |
#--pretrained_model "/data/tianxing/PycharmProjects/vm_sound_classification/trained_models/voicemail-2-ch4.zip"
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ja-jp-2-ch4 \
|
154 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ja-JP/wav_finished/*/*.wav" \
|
155 |
#--label_plan 2-voicemail \
|
@@ -195,11 +195,11 @@ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name fi
|
|
195 |
#--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
|
196 |
#--pretrained_model "/data/tianxing/PycharmProjects/vm_sound_classification/trained_models/voicemail-2-ch32.zip"
|
197 |
|
198 |
-
sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-us-2-ch32 \
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
|
204 |
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-es-mx-2-ch32 \
|
205 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/es-MX/wav_finished/*/*.wav" \
|
@@ -212,13 +212,13 @@ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name fi
|
|
212 |
#--label_plan 2-voicemail \
|
213 |
#--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
|
214 |
#--pretrained_model "/data/tianxing/PycharmProjects/vm_sound_classification/trained_models/voicemail-2-ch32.zip"
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ja-jp-2-ch32 \
|
223 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ja-JP/wav_finished/*/*.wav" \
|
224 |
#--label_plan 2-voicemail \
|
|
|
143 |
#--label_plan 2-voicemail \
|
144 |
#--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
|
145 |
#--pretrained_model "/data/tianxing/PycharmProjects/vm_sound_classification/trained_models/voicemail-2-ch4.zip"
|
146 |
+
|
147 |
+
sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-id-id-2-ch4 \
|
148 |
+
--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/id-ID/wav_finished/*/*.wav" \
|
149 |
+
--label_plan 2-voicemail \
|
150 |
+
--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
|
151 |
+
--pretrained_model "/data/tianxing/PycharmProjects/vm_sound_classification/trained_models/voicemail-2-ch4.zip"
|
152 |
+
|
153 |
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ja-jp-2-ch4 \
|
154 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ja-JP/wav_finished/*/*.wav" \
|
155 |
#--label_plan 2-voicemail \
|
|
|
195 |
#--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
|
196 |
#--pretrained_model "/data/tianxing/PycharmProjects/vm_sound_classification/trained_models/voicemail-2-ch32.zip"
|
197 |
|
198 |
+
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-us-2-ch32 \
|
199 |
+
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-US/wav_finished/*/*.wav" \
|
200 |
+
#--label_plan 2-voicemail \
|
201 |
+
#--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
|
202 |
+
#--pretrained_model "/data/tianxing/PycharmProjects/vm_sound_classification/trained_models/voicemail-2-ch32.zip"
|
203 |
|
204 |
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-es-mx-2-ch32 \
|
205 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/es-MX/wav_finished/*/*.wav" \
|
|
|
212 |
#--label_plan 2-voicemail \
|
213 |
#--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
|
214 |
#--pretrained_model "/data/tianxing/PycharmProjects/vm_sound_classification/trained_models/voicemail-2-ch32.zip"
|
215 |
+
|
216 |
+
sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-id-id-2-ch32 \
|
217 |
+
--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/id-ID/wav_finished/*/*.wav" \
|
218 |
+
--label_plan 2-voicemail \
|
219 |
+
--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
|
220 |
+
--pretrained_model "/data/tianxing/PycharmProjects/vm_sound_classification/trained_models/voicemail-2-ch32.zip"
|
221 |
+
|
222 |
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ja-jp-2-ch32 \
|
223 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ja-JP/wav_finished/*/*.wav" \
|
224 |
#--label_plan 2-voicemail \
|