Spaces:
Sleeping
Sleeping
update
Browse files
examples/sample_filter/bad_case_find.py
CHANGED
|
@@ -13,8 +13,8 @@ def get_args():
|
|
| 13 |
parser.add_argument(
|
| 14 |
"--data_dir",
|
| 15 |
# default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\data",
|
| 16 |
-
default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\us-
|
| 17 |
-
|
| 18 |
type=str
|
| 19 |
)
|
| 20 |
parser.add_argument(
|
|
@@ -37,20 +37,20 @@ def main():
|
|
| 37 |
data_dir = Path(args.data_dir)
|
| 38 |
keep_dir = Path(args.keep_dir)
|
| 39 |
keep_dir.mkdir(parents=True, exist_ok=True)
|
| 40 |
-
trash_dir = Path(args.trash_dir)
|
| 41 |
-
trash_dir.mkdir(parents=True, exist_ok=True)
|
| 42 |
|
| 43 |
client = Client("http://127.0.0.1:7864/")
|
| 44 |
|
| 45 |
for idx, filename in tqdm(enumerate(data_dir.glob("*.wav"))):
|
| 46 |
-
# if idx <
|
| 47 |
# continue
|
| 48 |
filename = filename.as_posix()
|
| 49 |
|
| 50 |
label1, prob1 = client.predict(
|
| 51 |
audio=handle_file(filename),
|
| 52 |
# model_name="vm_sound_classification8-ch32",
|
| 53 |
-
model_name="voicemail-
|
| 54 |
ground_true="Hello!!",
|
| 55 |
api_name="/click_button"
|
| 56 |
)
|
|
@@ -65,9 +65,9 @@ def main():
|
|
| 65 |
)
|
| 66 |
prob2 = float(prob2)
|
| 67 |
|
| 68 |
-
if label1 == "voicemail" and label2 in ("voicemail", "bell") and
|
| 69 |
pass
|
| 70 |
-
elif label1 == "non_voicemail" and label2 not in ("voicemail", "bell") and
|
| 71 |
pass
|
| 72 |
else:
|
| 73 |
print(f"label1: {label1}, prob1: {prob1}, label2: {label2}, prob2: {prob2}")
|
|
|
|
| 13 |
parser.add_argument(
|
| 14 |
"--data_dir",
|
| 15 |
# default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\data",
|
| 16 |
+
# default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\us-3",
|
| 17 |
+
default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\transfer",
|
| 18 |
type=str
|
| 19 |
)
|
| 20 |
parser.add_argument(
|
|
|
|
| 37 |
data_dir = Path(args.data_dir)
|
| 38 |
keep_dir = Path(args.keep_dir)
|
| 39 |
keep_dir.mkdir(parents=True, exist_ok=True)
|
| 40 |
+
# trash_dir = Path(args.trash_dir)
|
| 41 |
+
# trash_dir.mkdir(parents=True, exist_ok=True)
|
| 42 |
|
| 43 |
client = Client("http://127.0.0.1:7864/")
|
| 44 |
|
| 45 |
for idx, filename in tqdm(enumerate(data_dir.glob("*.wav"))):
|
| 46 |
+
# if idx < 400:
|
| 47 |
# continue
|
| 48 |
filename = filename.as_posix()
|
| 49 |
|
| 50 |
label1, prob1 = client.predict(
|
| 51 |
audio=handle_file(filename),
|
| 52 |
# model_name="vm_sound_classification8-ch32",
|
| 53 |
+
model_name="voicemail-id-id-2-ch32",
|
| 54 |
ground_true="Hello!!",
|
| 55 |
api_name="/click_button"
|
| 56 |
)
|
|
|
|
| 65 |
)
|
| 66 |
prob2 = float(prob2)
|
| 67 |
|
| 68 |
+
if label1 == "voicemail" and label2 in ("voicemail", "bell") and prob1 > 1.0:
|
| 69 |
pass
|
| 70 |
+
elif label1 == "non_voicemail" and label2 not in ("voicemail", "bell") and prob1 > 0.6:
|
| 71 |
pass
|
| 72 |
else:
|
| 73 |
print(f"label1: {label1}, prob1: {prob1}, label2: {label2}, prob2: {prob2}")
|
examples/sample_filter/wav_find_by_task_excel.py
CHANGED
|
@@ -12,19 +12,49 @@ from tqdm import tqdm
|
|
| 12 |
from project_settings import project_path
|
| 13 |
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
def get_args():
|
| 16 |
parser = argparse.ArgumentParser()
|
| 17 |
parser.add_argument(
|
| 18 |
"--task_file",
|
| 19 |
-
default=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
type=str
|
| 21 |
)
|
| 22 |
parser.add_argument(
|
| 23 |
"--wav_dir",
|
| 24 |
-
|
| 25 |
# default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\temp-2\temp\VoiceAppVoicemailDetection-1",
|
| 26 |
# default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\temp-3\temp\VoiceAppVoicemailDetection-1",
|
| 27 |
-
default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\temp-4\temp\VoiceAppVoicemailDetection-1",
|
| 28 |
type=str
|
| 29 |
)
|
| 30 |
parser.add_argument(
|
|
|
|
| 12 |
from project_settings import project_path
|
| 13 |
|
| 14 |
|
| 15 |
+
"""
|
| 16 |
+
default="task_DC_Death_Cases_20250220.xlsx",
|
| 17 |
+
default="task_DC_Death_Cases_20250221.xlsx",
|
| 18 |
+
default="task_DC_Death_Cases_EC_UC_20250220.xlsx",
|
| 19 |
+
default="task_DC_Death_Cases_EC_UC_20250221.xlsx",
|
| 20 |
+
default="task_DcTask_1_ID_LIVE_20250220_20250220-1.xlsx",
|
| 21 |
+
default="task_DcTask_1_ID_LIVE_20250221_20250221-1.xlsx",
|
| 22 |
+
default="task_DcTask_3_ID_LIVE_20250220_20250220-1.xlsx",
|
| 23 |
+
default="task_DcTask_3_ID_LIVE_20250221_20250221-1.xlsx",
|
| 24 |
+
default="task_DcTask_5_ID_LIVE_20250220_20250220-1.xlsx",
|
| 25 |
+
default="task_DcTask_5_ID_LIVE_20250221_20250221-1.xlsx",
|
| 26 |
+
default="task_DcTask_9_ID_LIVE_20250220_20250220-1.xlsx",
|
| 27 |
+
default="task_DcTask_PTP_ID_LIVE_20250220_20250220-1.xlsx",
|
| 28 |
+
default="task_DcTask_PTP_ID_LIVE_20250220_20250220-1 (1).xlsx",
|
| 29 |
+
default="task_DcTask_PTP_ID_LIVE_20250221_20250221-1.xlsx",
|
| 30 |
+
"""
|
| 31 |
+
|
| 32 |
def get_args():
|
| 33 |
parser = argparse.ArgumentParser()
|
| 34 |
parser.add_argument(
|
| 35 |
"--task_file",
|
| 36 |
+
# default="task_DC_Death_Cases_20250220.xlsx",
|
| 37 |
+
# default="task_DC_Death_Cases_20250221.xlsx",
|
| 38 |
+
# default="task_DC_Death_Cases_EC_UC_20250220.xlsx",
|
| 39 |
+
# default="task_DC_Death_Cases_EC_UC_20250221.xlsx",
|
| 40 |
+
# default="task_DcTask_1_ID_LIVE_20250220_20250220-1.xlsx",
|
| 41 |
+
# default="task_DcTask_1_ID_LIVE_20250221_20250221-1.xlsx",
|
| 42 |
+
# default="task_DcTask_3_ID_LIVE_20250220_20250220-1.xlsx",
|
| 43 |
+
# default="task_DcTask_3_ID_LIVE_20250221_20250221-1.xlsx",
|
| 44 |
+
# default="task_DcTask_5_ID_LIVE_20250220_20250220-1.xlsx",
|
| 45 |
+
# default="task_DcTask_5_ID_LIVE_20250221_20250221-1.xlsx",
|
| 46 |
+
# default="task_DcTask_9_ID_LIVE_20250220_20250220-1.xlsx",
|
| 47 |
+
# default="task_DcTask_PTP_ID_LIVE_20250220_20250220-1.xlsx",
|
| 48 |
+
# default="task_DcTask_PTP_ID_LIVE_20250220_20250220-1 (1).xlsx",
|
| 49 |
+
default="task_DcTask_PTP_ID_LIVE_20250221_20250221-1.xlsx",
|
| 50 |
type=str
|
| 51 |
)
|
| 52 |
parser.add_argument(
|
| 53 |
"--wav_dir",
|
| 54 |
+
default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\id",
|
| 55 |
# default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\temp-2\temp\VoiceAppVoicemailDetection-1",
|
| 56 |
# default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\temp-3\temp\VoiceAppVoicemailDetection-1",
|
| 57 |
+
# default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\temp-4\temp\VoiceAppVoicemailDetection-1",
|
| 58 |
type=str
|
| 59 |
)
|
| 60 |
parser.add_argument(
|
examples/vm_sound_classification/run_batch.sh
CHANGED
|
@@ -143,13 +143,13 @@ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name fi
|
|
| 143 |
#--label_plan 2-voicemail \
|
| 144 |
#--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
|
| 145 |
#--pretrained_model "/data/tianxing/PycharmProjects/vm_sound_classification/trained_models/voicemail-2-ch4.zip"
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ja-jp-2-ch4 \
|
| 154 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ja-JP/wav_finished/*/*.wav" \
|
| 155 |
#--label_plan 2-voicemail \
|
|
@@ -195,11 +195,11 @@ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name fi
|
|
| 195 |
#--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
|
| 196 |
#--pretrained_model "/data/tianxing/PycharmProjects/vm_sound_classification/trained_models/voicemail-2-ch32.zip"
|
| 197 |
|
| 198 |
-
sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-us-2-ch32 \
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
|
| 204 |
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-es-mx-2-ch32 \
|
| 205 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/es-MX/wav_finished/*/*.wav" \
|
|
@@ -212,13 +212,13 @@ sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name fi
|
|
| 212 |
#--label_plan 2-voicemail \
|
| 213 |
#--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
|
| 214 |
#--pretrained_model "/data/tianxing/PycharmProjects/vm_sound_classification/trained_models/voicemail-2-ch32.zip"
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ja-jp-2-ch32 \
|
| 223 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ja-JP/wav_finished/*/*.wav" \
|
| 224 |
#--label_plan 2-voicemail \
|
|
|
|
| 143 |
#--label_plan 2-voicemail \
|
| 144 |
#--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
|
| 145 |
#--pretrained_model "/data/tianxing/PycharmProjects/vm_sound_classification/trained_models/voicemail-2-ch4.zip"
|
| 146 |
+
|
| 147 |
+
sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-id-id-2-ch4 \
|
| 148 |
+
--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/id-ID/wav_finished/*/*.wav" \
|
| 149 |
+
--label_plan 2-voicemail \
|
| 150 |
+
--config_file "yaml/conv2d-classifier-2-ch4.yaml" \
|
| 151 |
+
--pretrained_model "/data/tianxing/PycharmProjects/vm_sound_classification/trained_models/voicemail-2-ch4.zip"
|
| 152 |
+
|
| 153 |
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ja-jp-2-ch4 \
|
| 154 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ja-JP/wav_finished/*/*.wav" \
|
| 155 |
#--label_plan 2-voicemail \
|
|
|
|
| 195 |
#--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
|
| 196 |
#--pretrained_model "/data/tianxing/PycharmProjects/vm_sound_classification/trained_models/voicemail-2-ch32.zip"
|
| 197 |
|
| 198 |
+
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-en-us-2-ch32 \
|
| 199 |
+
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/en-US/wav_finished/*/*.wav" \
|
| 200 |
+
#--label_plan 2-voicemail \
|
| 201 |
+
#--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
|
| 202 |
+
#--pretrained_model "/data/tianxing/PycharmProjects/vm_sound_classification/trained_models/voicemail-2-ch32.zip"
|
| 203 |
|
| 204 |
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-es-mx-2-ch32 \
|
| 205 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/es-MX/wav_finished/*/*.wav" \
|
|
|
|
| 212 |
#--label_plan 2-voicemail \
|
| 213 |
#--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
|
| 214 |
#--pretrained_model "/data/tianxing/PycharmProjects/vm_sound_classification/trained_models/voicemail-2-ch32.zip"
|
| 215 |
+
|
| 216 |
+
sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-id-id-2-ch32 \
|
| 217 |
+
--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/id-ID/wav_finished/*/*.wav" \
|
| 218 |
+
--label_plan 2-voicemail \
|
| 219 |
+
--config_file "yaml/conv2d-classifier-2-ch32.yaml" \
|
| 220 |
+
--pretrained_model "/data/tianxing/PycharmProjects/vm_sound_classification/trained_models/voicemail-2-ch32.zip"
|
| 221 |
+
|
| 222 |
#sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ja-jp-2-ch32 \
|
| 223 |
#--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ja-JP/wav_finished/*/*.wav" \
|
| 224 |
#--label_plan 2-voicemail \
|