Spaces:

qgyd2021
/

cc_audio_8

Running

App Files Files Community

HoneyTian commited on Jan 10

Commit

07f87a8

1 Parent(s): 349ff6e

add yaml config

Browse files

Files changed (22) hide show

examples/sample_filter/test1.py +25 -11
examples/sample_filter/test2.py +78 -0
examples/sample_filter/test4.py +76 -0
examples/vm_sound_classification/run.sh +12 -7
examples/vm_sound_classification/run_batch.sh +8 -0
examples/vm_sound_classification/yaml/conv2d-classifier-2-ch16.yaml +45 -0
examples/vm_sound_classification/yaml/conv2d-classifier-2-ch32.yaml +45 -0
examples/vm_sound_classification/{conv2d_classifier.yaml → yaml/conv2d-classifier-2-ch4.yaml} +0 -0
examples/vm_sound_classification/yaml/conv2d-classifier-2-ch8.yaml +45 -0
examples/vm_sound_classification/yaml/conv2d-classifier-3-ch16.yaml +45 -0
examples/vm_sound_classification/yaml/conv2d-classifier-3-ch32.yaml +45 -0
examples/vm_sound_classification/yaml/conv2d-classifier-3-ch4.yaml +45 -0
examples/vm_sound_classification/yaml/conv2d-classifier-3-ch8.yaml +45 -0
examples/vm_sound_classification/yaml/conv2d-classifier-4-ch16.yaml +45 -0
examples/vm_sound_classification/yaml/conv2d-classifier-4-ch32.yaml +45 -0
examples/vm_sound_classification/yaml/conv2d-classifier-4-ch4.yaml +45 -0
examples/vm_sound_classification/yaml/conv2d-classifier-4-ch8.yaml +45 -0
examples/vm_sound_classification/yaml/conv2d-classifier-8-ch16.yaml +45 -0
examples/vm_sound_classification/yaml/conv2d-classifier-8-ch32.yaml +45 -0
examples/vm_sound_classification/yaml/conv2d-classifier-8-ch4.yaml +45 -0
examples/vm_sound_classification/yaml/conv2d-classifier-8-ch8.yaml +45 -0
main.py +8 -7

examples/sample_filter/test1.py CHANGED Viewed

@@ -12,7 +12,9 @@ def get_args():
     parser = argparse.ArgumentParser()
     parser.add_argument(
         "--data_dir",
-        default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\temp\temp",
         type=str
     )
     parser.add_argument(
@@ -40,28 +42,40 @@ def main():
     client = Client("http://127.0.0.1:7864/")
-    for filename in tqdm(data_dir.glob("*.wav")):
         filename = filename.as_posix()
-        label, prob = client.predict(
             audio=handle_file(filename),
-            model_name="vm_sound_classification8-ch32",
             ground_true="Hello!!",
             api_name="/click_button"
         )
-        prob = float(prob)
-        if prob > 0.7:
-            shutil.move(
-                filename,
-                trash_dir.as_posix(),
-            )
         else:
             shutil.move(
                 filename,
                 keep_dir.as_posix(),
             )
     return

     parser = argparse.ArgumentParser()
     parser.add_argument(
         "--data_dir",
+        # default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\data",
+        # default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\temp-3\temp\VoiceAppVoicemailDetection-1",
+        default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\transfer",
         type=str
     )
     parser.add_argument(
     client = Client("http://127.0.0.1:7864/")
+    for idx, filename in tqdm(enumerate(data_dir.glob("*.wav"))):
+        # if idx < 639:
+        #     continue
         filename = filename.as_posix()
+        label1, prob1 = client.predict(
             audio=handle_file(filename),
+            # model_name="vm_sound_classification8-ch32",
+            model_name="voicemail-ms-my-2-ch32",
             ground_true="Hello!!",
             api_name="/click_button"
         )
+        prob1 = float(prob1)
+        label2, prob2 = client.predict(
+            audio=handle_file(filename),
+            # model_name="vm_sound_classification8-ch32",
+            model_name="sound-8-ch32",
+            ground_true="Hello!!",
+            api_name="/click_button"
+        )
+        prob2 = float(prob2)
+        if label1 == "voicemail" and label2 in ("voicemail", "bell") and prob2 > 0.6:
+            pass
+        elif label1 == "non_voicemail" and label2 not in ("voicemail", "bell") and prob2 > 0.6:
+            pass
         else:
+            print(f"label1: {label1}, prob1: {prob1}, label2: {label2}, prob2: {prob2}")
             shutil.move(
                 filename,
                 keep_dir.as_posix(),
             )
+            # exit(0)
     return

examples/sample_filter/test2.py ADDED Viewed

	@@ -0,0 +1,78 @@

+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+import argparse
+from pathlib import Path
+import shutil
+from gradio_client import Client, handle_file
+from tqdm import tqdm
+from project_settings import project_path
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--data_dir",
+        # default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\data-1",
+        # default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\temp-2\temp\VoiceAppVoicemailDetection-1",
+        # default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\temp-3\temp\VoiceAppVoicemailDetection-1",
+        # default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\temp-4\temp\VoiceAppVoicemailDetection-1",
+        default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\transfer",
+        type=str
+    )
+    parser.add_argument(
+        "--keep_dir",
+        default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\keep-3",
+        type=str
+    )
+    parser.add_argument(
+        "--trash_dir",
+        default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\trash",
+        type=str
+    )
+    args = parser.parse_args()
+    return args
+def main():
+    args = get_args()
+    data_dir = Path(args.data_dir)
+    keep_dir = Path(args.keep_dir)
+    keep_dir.mkdir(parents=True, exist_ok=True)
+    trash_dir = Path(args.trash_dir)
+    trash_dir.mkdir(parents=True, exist_ok=True)
+    client = Client("http://127.0.0.1:7864/")
+    for idx, filename in tqdm(enumerate(data_dir.glob("*.wav"))):
+        if idx < 200:
+            continue
+        filename = filename.as_posix()
+        label1, prob1 = client.predict(
+            audio=handle_file(filename),
+            # model_name="vm_sound_classification8-ch32",
+            model_name="voicemail-ms-my-2-ch32",
+            ground_true="Hello!!",
+            api_name="/click_button"
+        )
+        prob1 = float(prob1)
+        print(f"label: {label1}, prob: {prob1}")
+        if label1 == "voicemail" and prob1 < 0.95:
+            shutil.move(
+                filename,
+                keep_dir.as_posix(),
+            )
+        elif label1 != "voicemail" and prob1 < 0.85:
+            shutil.move(
+                filename,
+                keep_dir.as_posix(),
+            )
+    return
+if __name__ == '__main__':
+    main()

examples/sample_filter/test4.py ADDED Viewed

	@@ -0,0 +1,76 @@

+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+import argparse
+import os
+from pathlib import Path
+import shutil
+import pandas as pd
+from gradio_client import Client, handle_file
+from tqdm import tqdm
+from project_settings import project_path
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--task_file",
+        default=r"task_DcTask_1_MY_LIVE_20250109_20250109-1.xlsx",
+        type=str
+    )
+    parser.add_argument(
+        "--wav_dir",
+        # default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\data-1",
+        # default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\temp-2\temp\VoiceAppVoicemailDetection-1",
+        # default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\temp-3\temp\VoiceAppVoicemailDetection-1",
+        default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\temp-4\temp\VoiceAppVoicemailDetection-1",
+        type=str
+    )
+    parser.add_argument(
+        "--output_dir",
+        default=r"E:\Users\tianx\HuggingDatasets\vm_sound_classification\data\transfer",
+        type=str
+    )
+    args = parser.parse_args()
+    return args
+def main():
+    args = get_args()
+    wav_dir = Path(args.wav_dir)
+    output_dir = Path(args.output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    df = pd.read_excel(args.task_file)
+    transfer_set = set()
+    for i, row in df.iterrows():
+        call_id = row["通话ID"]
+        intent_str = row["意向标签"]
+        if intent_str == "Connection - Transferred to agent":
+            transfer_set.add(call_id)
+        if intent_str == "Connection - No human voice detected":
+            transfer_set.add(call_id)
+    print(f"transfer count: {len(transfer_set)}")
+    for idx, filename in tqdm(enumerate(wav_dir.glob("**/*.wav"))):
+        basename = filename.stem
+        call_id, _, _, _ = basename.split("_")
+        if call_id not in transfer_set:
+            continue
+        print(filename.as_posix())
+        shutil.move(
+            filename.as_posix(),
+            output_dir.as_posix()
+        )
+    return
+if __name__ == '__main__':
+    main()

examples/vm_sound_classification/run.sh CHANGED Viewed

@@ -2,22 +2,25 @@
 : <<'END'
-sh run.sh --stage 0 --stop_stage 1 --system_version windows --file_folder_name file_dir --final_model_name vm_sound_classification4-ch32 \
 --filename_patterns "E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/wav_finished/en-US/wav_finished/*/*.wav \
 E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/id-ID/wav_finished/*/*.wav" \
 --label_plan 4
-sh run.sh --stage 2 --stop_stage 2 --system_version windows --file_folder_name file_dir --final_model_name vm_sound_classification2-ch32 \
 --filename_patterns "E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/wav_finished/en-US/wav_finished/*/*.wav \
 E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/id-ID/wav_finished/*/*.wav" \
 --label_plan 4
-sh run.sh --stage 0 --stop_stage 5 --system_version centos --file_folder_name file_dir --final_model_name vm_sound_classification8-ch32 \
---filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" --label_plan 8
-sh run.sh --stage 0 --stop_stage 5 --system_version centos --file_folder_name file_dir --final_model_name vm_sound_classification2-ch32-voicemail \
---filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/th-TH/wav_finished/*/*.wav" --label_plan 2-voicemail
 END
@@ -33,6 +36,7 @@ file_folder_name=file_folder_name
 final_model_name=final_model_name
 filename_patterns="/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav"
 label_plan=4
 nohup_name=nohup.out
 country=en-US
@@ -125,6 +129,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
   --train_dataset "${train_dataset}" \
   --valid_dataset "${valid_dataset}" \
   --serialization_dir "${file_dir}" \
 fi

 : <<'END'
+sh run.sh --stage 0 --stop_stage 1 --system_version windows --file_folder_name file_dir --final_model_name sound-4-ch32 \
 --filename_patterns "E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/wav_finished/en-US/wav_finished/*/*.wav \
 E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/id-ID/wav_finished/*/*.wav" \
 --label_plan 4
+sh run.sh --stage 2 --stop_stage 2 --system_version windows --file_folder_name file_dir --final_model_name sound-2-ch32 \
 --filename_patterns "E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/wav_finished/en-US/wav_finished/*/*.wav \
 E:/Users/tianx/HuggingDatasets/vm_sound_classification/data/wav_finished/id-ID/wav_finished/*/*.wav" \
 --label_plan 4
+sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-ch32 \
+--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
+--label_plan 3 \
+--config_file "yaml/conv2d-classifier-3-ch4.yaml"
+sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name voicemail-ms-my-2-ch32 \
+--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/ms-MY/wav_finished/*/*.wav" \
+--label_plan 2-voicemail \
+--config_file "yaml/conv2d-classifier-2-ch4.yaml"
 END
 final_model_name=final_model_name
 filename_patterns="/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav"
 label_plan=4
+config_file="yaml/conv2d-classifier-2-ch4.yaml"
 nohup_name=nohup.out
 country=en-US
   --train_dataset "${train_dataset}" \
   --valid_dataset "${valid_dataset}" \
   --serialization_dir "${file_dir}" \
+  --config_file "${config_file}" \
 fi

examples/vm_sound_classification/run_batch.sh ADDED Viewed

	@@ -0,0 +1,8 @@

+#!/usr/bin/env bash
+sh run.sh --stage 0 --stop_stage 6 --system_version centos --file_folder_name file_dir --final_model_name sound-3-ch4 \
+--filename_patterns "/data/tianxing/PycharmProjects/datasets/voicemail/*/wav_finished/*/*.wav" \
+--label_plan 3 \
+--config_file "yaml/conv2d-classifier-3-ch4.yaml"

examples/vm_sound_classification/yaml/conv2d-classifier-2-ch16.yaml ADDED Viewed

	@@ -0,0 +1,45 @@

+model_name: "cnn_audio_classifier"
+mel_spectrogram_param:
+  sample_rate: 8000
+  n_fft: 512
+  win_length: 200
+  hop_length: 80
+  f_min: 10
+  f_max: 3800
+  window_fn: hamming
+  n_mels: 80
+conv2d_block_param_list:
+  - batch_norm: true
+    in_channels: 1
+    out_channels: 16
+    kernel_size: 3
+    stride: 1
+    dilation: 3
+    activation: relu
+    dropout: 0.1
+  - in_channels: 16
+    out_channels: 16
+    kernel_size: 5
+    stride: 2
+    dilation: 3
+    activation: relu
+    dropout: 0.1
+  - in_channels: 16
+    out_channels: 16
+    kernel_size: 3
+    stride: 1
+    dilation: 2
+    activation: relu
+    dropout: 0.1
+cls_head_param:
+  input_dim: 432
+  num_layers: 2
+  hidden_dims:
+    - 128
+    - 32
+  activations: relu
+  dropout: 0.1
+  num_labels: 2

examples/vm_sound_classification/yaml/conv2d-classifier-2-ch32.yaml ADDED Viewed

	@@ -0,0 +1,45 @@

+model_name: "cnn_audio_classifier"
+mel_spectrogram_param:
+  sample_rate: 8000
+  n_fft: 512
+  win_length: 200
+  hop_length: 80
+  f_min: 10
+  f_max: 3800
+  window_fn: hamming
+  n_mels: 80
+conv2d_block_param_list:
+  - batch_norm: true
+    in_channels: 1
+    out_channels: 32
+    kernel_size: 3
+    stride: 1
+    dilation: 3
+    activation: relu
+    dropout: 0.1
+  - in_channels: 32
+    out_channels: 32
+    kernel_size: 5
+    stride: 2
+    dilation: 3
+    activation: relu
+    dropout: 0.1
+  - in_channels: 32
+    out_channels: 32
+    kernel_size: 3
+    stride: 1
+    dilation: 2
+    activation: relu
+    dropout: 0.1
+cls_head_param:
+  input_dim: 864
+  num_layers: 2
+  hidden_dims:
+    - 128
+    - 32
+  activations: relu
+  dropout: 0.1
+  num_labels: 2

examples/vm_sound_classification/{conv2d_classifier.yaml → yaml/conv2d-classifier-2-ch4.yaml} RENAMED Viewed

File without changes

examples/vm_sound_classification/yaml/conv2d-classifier-2-ch8.yaml ADDED Viewed

	@@ -0,0 +1,45 @@

+model_name: "cnn_audio_classifier"
+mel_spectrogram_param:
+  sample_rate: 8000
+  n_fft: 512
+  win_length: 200
+  hop_length: 80
+  f_min: 10
+  f_max: 3800
+  window_fn: hamming
+  n_mels: 80
+conv2d_block_param_list:
+  - batch_norm: true
+    in_channels: 1
+    out_channels: 8
+    kernel_size: 3
+    stride: 1
+    dilation: 3
+    activation: relu
+    dropout: 0.1
+  - in_channels: 8
+    out_channels: 8
+    kernel_size: 5
+    stride: 2
+    dilation: 3
+    activation: relu
+    dropout: 0.1
+  - in_channels: 8
+    out_channels: 8
+    kernel_size: 3
+    stride: 1
+    dilation: 2
+    activation: relu
+    dropout: 0.1
+cls_head_param:
+  input_dim: 216
+  num_layers: 2
+  hidden_dims:
+    - 128
+    - 32
+  activations: relu
+  dropout: 0.1
+  num_labels: 2

examples/vm_sound_classification/yaml/conv2d-classifier-3-ch16.yaml ADDED Viewed

	@@ -0,0 +1,45 @@

+model_name: "cnn_audio_classifier"
+mel_spectrogram_param:
+  sample_rate: 8000
+  n_fft: 512
+  win_length: 200
+  hop_length: 80
+  f_min: 10
+  f_max: 3800
+  window_fn: hamming
+  n_mels: 80
+conv2d_block_param_list:
+  - batch_norm: true
+    in_channels: 1
+    out_channels: 16
+    kernel_size: 3
+    stride: 1
+    dilation: 3
+    activation: relu
+    dropout: 0.1
+  - in_channels: 16
+    out_channels: 16
+    kernel_size: 5
+    stride: 2
+    dilation: 3
+    activation: relu
+    dropout: 0.1
+  - in_channels: 16
+    out_channels: 16
+    kernel_size: 3
+    stride: 1
+    dilation: 2
+    activation: relu
+    dropout: 0.1
+cls_head_param:
+  input_dim: 432
+  num_layers: 2
+  hidden_dims:
+    - 128
+    - 32
+  activations: relu
+  dropout: 0.1
+  num_labels: 3

examples/vm_sound_classification/yaml/conv2d-classifier-3-ch32.yaml ADDED Viewed

	@@ -0,0 +1,45 @@

+model_name: "cnn_audio_classifier"
+mel_spectrogram_param:
+  sample_rate: 8000
+  n_fft: 512
+  win_length: 200
+  hop_length: 80
+  f_min: 10
+  f_max: 3800
+  window_fn: hamming
+  n_mels: 80
+conv2d_block_param_list:
+  - batch_norm: true
+    in_channels: 1
+    out_channels: 32
+    kernel_size: 3
+    stride: 1
+    dilation: 3
+    activation: relu
+    dropout: 0.1
+  - in_channels: 32
+    out_channels: 32
+    kernel_size: 5
+    stride: 2
+    dilation: 3
+    activation: relu
+    dropout: 0.1
+  - in_channels: 32
+    out_channels: 32
+    kernel_size: 3
+    stride: 1
+    dilation: 2
+    activation: relu
+    dropout: 0.1
+cls_head_param:
+  input_dim: 864
+  num_layers: 2
+  hidden_dims:
+    - 128
+    - 32
+  activations: relu
+  dropout: 0.1
+  num_labels: 3

examples/vm_sound_classification/yaml/conv2d-classifier-3-ch4.yaml ADDED Viewed

	@@ -0,0 +1,45 @@

+model_name: "cnn_audio_classifier"
+mel_spectrogram_param:
+  sample_rate: 8000
+  n_fft: 512
+  win_length: 200
+  hop_length: 80
+  f_min: 10
+  f_max: 3800
+  window_fn: hamming
+  n_mels: 80
+conv2d_block_param_list:
+  - batch_norm: true
+    in_channels: 1
+    out_channels: 4
+    kernel_size: 3
+    stride: 1
+    dilation: 3
+    activation: relu
+    dropout: 0.1
+  - in_channels: 4
+    out_channels: 4
+    kernel_size: 5
+    stride: 2
+    dilation: 3
+    activation: relu
+    dropout: 0.1
+  - in_channels: 4
+    out_channels: 4
+    kernel_size: 3
+    stride: 1
+    dilation: 2
+    activation: relu
+    dropout: 0.1
+cls_head_param:
+  input_dim: 108
+  num_layers: 2
+  hidden_dims:
+    - 128
+    - 32
+  activations: relu
+  dropout: 0.1
+  num_labels: 3

examples/vm_sound_classification/yaml/conv2d-classifier-3-ch8.yaml ADDED Viewed

	@@ -0,0 +1,45 @@

+model_name: "cnn_audio_classifier"
+mel_spectrogram_param:
+  sample_rate: 8000
+  n_fft: 512
+  win_length: 200
+  hop_length: 80
+  f_min: 10
+  f_max: 3800
+  window_fn: hamming
+  n_mels: 80
+conv2d_block_param_list:
+  - batch_norm: true
+    in_channels: 1
+    out_channels: 8
+    kernel_size: 3
+    stride: 1
+    dilation: 3
+    activation: relu
+    dropout: 0.1
+  - in_channels: 8
+    out_channels: 8
+    kernel_size: 5
+    stride: 2
+    dilation: 3
+    activation: relu
+    dropout: 0.1
+  - in_channels: 8
+    out_channels: 8
+    kernel_size: 3
+    stride: 1
+    dilation: 2
+    activation: relu
+    dropout: 0.1
+cls_head_param:
+  input_dim: 216
+  num_layers: 2
+  hidden_dims:
+    - 128
+    - 32
+  activations: relu
+  dropout: 0.1
+  num_labels: 3

examples/vm_sound_classification/yaml/conv2d-classifier-4-ch16.yaml ADDED Viewed

	@@ -0,0 +1,45 @@

+model_name: "cnn_audio_classifier"
+mel_spectrogram_param:
+  sample_rate: 8000
+  n_fft: 512
+  win_length: 200
+  hop_length: 80
+  f_min: 10
+  f_max: 3800
+  window_fn: hamming
+  n_mels: 80
+conv2d_block_param_list:
+  - batch_norm: true
+    in_channels: 1
+    out_channels: 16
+    kernel_size: 3
+    stride: 1
+    dilation: 3
+    activation: relu
+    dropout: 0.1
+  - in_channels: 16
+    out_channels: 16
+    kernel_size: 5
+    stride: 2
+    dilation: 3
+    activation: relu
+    dropout: 0.1
+  - in_channels: 16
+    out_channels: 16
+    kernel_size: 3
+    stride: 1
+    dilation: 2
+    activation: relu
+    dropout: 0.1
+cls_head_param:
+  input_dim: 432
+  num_layers: 2
+  hidden_dims:
+    - 128
+    - 32
+  activations: relu
+  dropout: 0.1
+  num_labels: 4

examples/vm_sound_classification/yaml/conv2d-classifier-4-ch32.yaml ADDED Viewed

	@@ -0,0 +1,45 @@

+model_name: "cnn_audio_classifier"
+mel_spectrogram_param:
+  sample_rate: 8000
+  n_fft: 512
+  win_length: 200
+  hop_length: 80
+  f_min: 10
+  f_max: 3800
+  window_fn: hamming
+  n_mels: 80
+conv2d_block_param_list:
+  - batch_norm: true
+    in_channels: 1
+    out_channels: 32
+    kernel_size: 3
+    stride: 1
+    dilation: 3
+    activation: relu
+    dropout: 0.1
+  - in_channels: 32
+    out_channels: 32
+    kernel_size: 5
+    stride: 2
+    dilation: 3
+    activation: relu
+    dropout: 0.1
+  - in_channels: 32
+    out_channels: 32
+    kernel_size: 3
+    stride: 1
+    dilation: 2
+    activation: relu
+    dropout: 0.1
+cls_head_param:
+  input_dim: 864
+  num_layers: 2
+  hidden_dims:
+    - 128
+    - 32
+  activations: relu
+  dropout: 0.1
+  num_labels: 4

examples/vm_sound_classification/yaml/conv2d-classifier-4-ch4.yaml ADDED Viewed

	@@ -0,0 +1,45 @@

+model_name: "cnn_audio_classifier"
+mel_spectrogram_param:
+  sample_rate: 8000
+  n_fft: 512
+  win_length: 200
+  hop_length: 80
+  f_min: 10
+  f_max: 3800
+  window_fn: hamming
+  n_mels: 80
+conv2d_block_param_list:
+  - batch_norm: true
+    in_channels: 1
+    out_channels: 4
+    kernel_size: 3
+    stride: 1
+    dilation: 3
+    activation: relu
+    dropout: 0.1
+  - in_channels: 4
+    out_channels: 4
+    kernel_size: 5
+    stride: 2
+    dilation: 3
+    activation: relu
+    dropout: 0.1
+  - in_channels: 4
+    out_channels: 4
+    kernel_size: 3
+    stride: 1
+    dilation: 2
+    activation: relu
+    dropout: 0.1
+cls_head_param:
+  input_dim: 108
+  num_layers: 2
+  hidden_dims:
+    - 128
+    - 32
+  activations: relu
+  dropout: 0.1
+  num_labels: 4

examples/vm_sound_classification/yaml/conv2d-classifier-4-ch8.yaml ADDED Viewed

	@@ -0,0 +1,45 @@

+model_name: "cnn_audio_classifier"
+mel_spectrogram_param:
+  sample_rate: 8000
+  n_fft: 512
+  win_length: 200
+  hop_length: 80
+  f_min: 10
+  f_max: 3800
+  window_fn: hamming
+  n_mels: 80
+conv2d_block_param_list:
+  - batch_norm: true
+    in_channels: 1
+    out_channels: 8
+    kernel_size: 3
+    stride: 1
+    dilation: 3
+    activation: relu
+    dropout: 0.1
+  - in_channels: 8
+    out_channels: 8
+    kernel_size: 5
+    stride: 2
+    dilation: 3
+    activation: relu
+    dropout: 0.1
+  - in_channels: 8
+    out_channels: 8
+    kernel_size: 3
+    stride: 1
+    dilation: 2
+    activation: relu
+    dropout: 0.1
+cls_head_param:
+  input_dim: 216
+  num_layers: 2
+  hidden_dims:
+    - 128
+    - 32
+  activations: relu
+  dropout: 0.1
+  num_labels: 4

examples/vm_sound_classification/yaml/conv2d-classifier-8-ch16.yaml ADDED Viewed

	@@ -0,0 +1,45 @@

+model_name: "cnn_audio_classifier"
+mel_spectrogram_param:
+  sample_rate: 8000
+  n_fft: 512
+  win_length: 200
+  hop_length: 80
+  f_min: 10
+  f_max: 3800
+  window_fn: hamming
+  n_mels: 80
+conv2d_block_param_list:
+  - batch_norm: true
+    in_channels: 1
+    out_channels: 16
+    kernel_size: 3
+    stride: 1
+    dilation: 3
+    activation: relu
+    dropout: 0.1
+  - in_channels: 16
+    out_channels: 16
+    kernel_size: 5
+    stride: 2
+    dilation: 3
+    activation: relu
+    dropout: 0.1
+  - in_channels: 16
+    out_channels: 16
+    kernel_size: 3
+    stride: 1
+    dilation: 2
+    activation: relu
+    dropout: 0.1
+cls_head_param:
+  input_dim: 432
+  num_layers: 2
+  hidden_dims:
+    - 128
+    - 32
+  activations: relu
+  dropout: 0.1
+  num_labels: 8

examples/vm_sound_classification/yaml/conv2d-classifier-8-ch32.yaml ADDED Viewed

	@@ -0,0 +1,45 @@

+model_name: "cnn_audio_classifier"
+mel_spectrogram_param:
+  sample_rate: 8000
+  n_fft: 512
+  win_length: 200
+  hop_length: 80
+  f_min: 10
+  f_max: 3800
+  window_fn: hamming
+  n_mels: 80
+conv2d_block_param_list:
+  - batch_norm: true
+    in_channels: 1
+    out_channels: 32
+    kernel_size: 3
+    stride: 1
+    dilation: 3
+    activation: relu
+    dropout: 0.1
+  - in_channels: 32
+    out_channels: 32
+    kernel_size: 5
+    stride: 2
+    dilation: 3
+    activation: relu
+    dropout: 0.1
+  - in_channels: 32
+    out_channels: 32
+    kernel_size: 3
+    stride: 1
+    dilation: 2
+    activation: relu
+    dropout: 0.1
+cls_head_param:
+  input_dim: 864
+  num_layers: 2
+  hidden_dims:
+    - 128
+    - 32
+  activations: relu
+  dropout: 0.1
+  num_labels: 8

examples/vm_sound_classification/yaml/conv2d-classifier-8-ch4.yaml ADDED Viewed

	@@ -0,0 +1,45 @@

+model_name: "cnn_audio_classifier"
+mel_spectrogram_param:
+  sample_rate: 8000
+  n_fft: 512
+  win_length: 200
+  hop_length: 80
+  f_min: 10
+  f_max: 3800
+  window_fn: hamming
+  n_mels: 80
+conv2d_block_param_list:
+  - batch_norm: true
+    in_channels: 1
+    out_channels: 4
+    kernel_size: 3
+    stride: 1
+    dilation: 3
+    activation: relu
+    dropout: 0.1
+  - in_channels: 4
+    out_channels: 4
+    kernel_size: 5
+    stride: 2
+    dilation: 3
+    activation: relu
+    dropout: 0.1
+  - in_channels: 4
+    out_channels: 4
+    kernel_size: 3
+    stride: 1
+    dilation: 2
+    activation: relu
+    dropout: 0.1
+cls_head_param:
+  input_dim: 108
+  num_layers: 2
+  hidden_dims:
+    - 128
+    - 32
+  activations: relu
+  dropout: 0.1
+  num_labels: 8

examples/vm_sound_classification/yaml/conv2d-classifier-8-ch8.yaml ADDED Viewed

	@@ -0,0 +1,45 @@

+model_name: "cnn_audio_classifier"
+mel_spectrogram_param:
+  sample_rate: 8000
+  n_fft: 512
+  win_length: 200
+  hop_length: 80
+  f_min: 10
+  f_max: 3800
+  window_fn: hamming
+  n_mels: 80
+conv2d_block_param_list:
+  - batch_norm: true
+    in_channels: 1
+    out_channels: 8
+    kernel_size: 3
+    stride: 1
+    dilation: 3
+    activation: relu
+    dropout: 0.1
+  - in_channels: 8
+    out_channels: 8
+    kernel_size: 5
+    stride: 2
+    dilation: 3
+    activation: relu
+    dropout: 0.1
+  - in_channels: 8
+    out_channels: 8
+    kernel_size: 3
+    stride: 1
+    dilation: 2
+    activation: relu
+    dropout: 0.1
+cls_head_param:
+  input_dim: 216
+  num_layers: 2
+  hidden_dims:
+    - 128
+    - 32
+  activations: relu
+  dropout: 0.1
+  num_labels: 8

main.py CHANGED Viewed

@@ -2,7 +2,6 @@
 # -*- coding: utf-8 -*-
 import argparse
 from functools import lru_cache
-import json
 from pathlib import Path
 import platform
 import shutil
@@ -11,7 +10,6 @@ import zipfile
 from typing import Tuple
 import gradio as gr
-from dill.pointers import parents
 from huggingface_hub import snapshot_download
 import numpy as np
 import torch
@@ -114,13 +112,14 @@ def main():
     examples_dir = Path(args.examples_dir)
     trained_model_dir = Path(args.trained_model_dir)
-    trained_model_dir.mkdir(parents=True, exist_ok=True)
     # download models
-    _ = snapshot_download(
-        repo_id=args.models_repo_id,
-        local_dir=trained_model_dir.as_posix()
-    )
     # examples
     example_zip_file = trained_model_dir / "examples.zip"
@@ -138,6 +137,7 @@ def main():
         if model_name == "examples":
             continue
         model_choices.append(model_name)
     # examples
     examples = list()
@@ -187,6 +187,7 @@ def main():
             outputs=[c_label, c_probability],
         )
     blocks.queue().launch(
         share=False if platform.system() == "Windows" else False,
         server_name="127.0.0.1" if platform.system() == "Windows" else "0.0.0.0",

 # -*- coding: utf-8 -*-
 import argparse
 from functools import lru_cache
 from pathlib import Path
 import platform
 import shutil
 from typing import Tuple
 import gradio as gr
 from huggingface_hub import snapshot_download
 import numpy as np
 import torch
     examples_dir = Path(args.examples_dir)
     trained_model_dir = Path(args.trained_model_dir)
     # download models
+    if not trained_model_dir.exists():
+        trained_model_dir.mkdir(parents=True, exist_ok=True)
+        _ = snapshot_download(
+            repo_id=args.models_repo_id,
+            local_dir=trained_model_dir.as_posix()
+        )
     # examples
     example_zip_file = trained_model_dir / "examples.zip"
         if model_name == "examples":
             continue
         model_choices.append(model_name)
+    model_choices = list(sorted(model_choices))
     # examples
     examples = list()
             outputs=[c_label, c_probability],
         )
+    # http://127.0.0.1:7864/
     blocks.queue().launch(
         share=False if platform.system() == "Windows" else False,
         server_name="127.0.0.1" if platform.system() == "Windows" else "0.0.0.0",