Spaces:

ccmusic-database
/

chest_falsetto

Running

App Files Files

admin commited on Feb 9

Commit

580abb7

1 Parent(s): b9a5e1d

fixing jpgs

Browse files

Files changed (2) hide show

app.py +24 -8
utils.py +2 -2

app.py CHANGED Viewed

@@ -8,8 +8,9 @@ import numpy as np
 import gradio as gr
 import librosa.display
 import matplotlib.pyplot as plt
-from utils import get_modelist, find_wav_files, embed_img
 from model import EvalNet
 TRANSLATE = {
@@ -39,7 +40,7 @@ def wav2mel(audio_path: str, width=0.496145124716553):
             librosa.display.specshow(log_mel_spec[:, i : i + step])
             plt.axis("off")
             plt.savefig(
-                f"{TEMP_DIR}/output.jpg",
                 bbox_inches="tight",
                 pad_inches=0.0,
             )
@@ -65,7 +66,7 @@ def wav2cqt(audio_path: str, width=0.496145124716553):
             librosa.display.specshow(log_cqt_spec[:, i : i + step])
             plt.axis("off")
             plt.savefig(
-                f"{TEMP_DIR}/output.jpg",
                 bbox_inches="tight",
                 pad_inches=0.0,
             )
@@ -91,7 +92,7 @@ def wav2chroma(audio_path: str, width=0.496145124716553):
             librosa.display.specshow(log_chroma_spec[:, i : i + step])
             plt.axis("off")
             plt.savefig(
-                f"{TEMP_DIR}/output.jpg",
                 bbox_inches="tight",
                 pad_inches=0.0,
             )
@@ -101,6 +102,16 @@ def wav2chroma(audio_path: str, width=0.496145124716553):
         print(f"Error converting {audio_path} : {e}")
 def infer(wav_path: str, log_name: str, folder_path=TEMP_DIR):
     if os.path.exists(folder_path):
         shutil.rmtree(folder_path)
@@ -115,9 +126,14 @@ def infer(wav_path: str, log_name: str, folder_path=TEMP_DIR):
     spec = log_name.split("_")[-3]
     eval("wav2%s" % spec)(wav_path)
-    input = embed_img(f"{folder_path}/output.jpg")
-    output: torch.Tensor = model(input)
-    pred_id = torch.max(output.data, 1)[1]
     return os.path.basename(wav_path), TRANSLATE[CLASSES[pred_id]]
@@ -125,7 +141,7 @@ if __name__ == "__main__":
     warnings.filterwarnings("ignore")
     models = get_modelist()
     examples = []
-    example_wavs = find_wav_files()
     model_num = len(models)
     for wav in example_wavs:
         examples.append([wav, models[random.randint(0, model_num - 1)]])

 import gradio as gr
 import librosa.display
 import matplotlib.pyplot as plt
+from collections import Counter
 from model import EvalNet
+from utils import get_modelist, find_files, embed_img
 TRANSLATE = {
             librosa.display.specshow(log_mel_spec[:, i : i + step])
             plt.axis("off")
             plt.savefig(
+                f"{TEMP_DIR}/{i}.jpg",
                 bbox_inches="tight",
                 pad_inches=0.0,
             )
             librosa.display.specshow(log_cqt_spec[:, i : i + step])
             plt.axis("off")
             plt.savefig(
+                f"{TEMP_DIR}/{i}.jpg",
                 bbox_inches="tight",
                 pad_inches=0.0,
             )
             librosa.display.specshow(log_chroma_spec[:, i : i + step])
             plt.axis("off")
             plt.savefig(
+                f"{TEMP_DIR}/{i}.jpg",
                 bbox_inches="tight",
                 pad_inches=0.0,
             )
         print(f"Error converting {audio_path} : {e}")
+def most_frequent_value(lst: list):
+    counter = Counter(lst)
+    max_count = max(counter.values())
+    for element, count in counter.items():
+        if count == max_count:
+            return element
+    return None
 def infer(wav_path: str, log_name: str, folder_path=TEMP_DIR):
     if os.path.exists(folder_path):
         shutil.rmtree(folder_path)
     spec = log_name.split("_")[-3]
     eval("wav2%s" % spec)(wav_path)
+    jpgs = find_files(folder_path, ".jpg")
+    preds = []
+    for jpg in jpgs:
+        input = embed_img(jpg)
+        output: torch.Tensor = model(input)
+        preds.append(torch.max(output.data, 1)[1])
+    pred_id = most_frequent_value(preds)
     return os.path.basename(wav_path), TRANSLATE[CLASSES[pred_id]]
     warnings.filterwarnings("ignore")
     models = get_modelist()
     examples = []
+    example_wavs = find_files()
     model_num = len(models)
     for wav in example_wavs:
         examples.append([wav, models[random.randint(0, model_num - 1)]])

utils.py CHANGED Viewed

@@ -18,11 +18,11 @@ def toCUDA(x):
     return x
-def find_wav_files(folder_path=f"{MODEL_DIR}/examples"):
     wav_files = []
     for root, _, files in os.walk(folder_path):
         for file in files:
-            if file.endswith(".wav"):
                 file_path = os.path.join(root, file)
                 wav_files.append(file_path)

     return x
+def find_files(folder_path=f"{MODEL_DIR}/examples", ext=".wav"):
     wav_files = []
     for root, _, files in os.walk(folder_path):
         for file in files:
+            if file.endswith(ext):
                 file_path = os.path.join(root, file)
                 wav_files.append(file_path)