Update app.py
Browse files
app.py
CHANGED
@@ -1,31 +1,10 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
import torchaudio
|
3 |
-
import torch
|
4 |
-
import os
|
5 |
-
from rave import RAVE # Assuming rave.py or pip package is available
|
6 |
-
from huggingface_hub import hf_hub_download
|
7 |
-
|
8 |
-
# β
Available RAVE models (can expand dynamically from HF repo)
|
9 |
-
RAVE_MODELS = {
|
10 |
-
"Guitar": "guitar_iil_b2048_r48000_z16.ts",
|
11 |
-
"Soprano Sax": "sax_soprano_franziskaschroeder_b2048_r48000_z20.ts",
|
12 |
-
"Organ (Archive)": "organ_archive_b2048_r48000_z16.ts",
|
13 |
-
"Organ (Bach)": "organ_bach_b2048_r48000_z16.ts",
|
14 |
-
"Voice Multivoice": "voice-multi-b2048-r48000-z11.ts",
|
15 |
-
"Birds Dawn Chorus": "birds_dawnchorus_b2048_r48000_z8.ts",
|
16 |
-
"Magnets": "magnets_b2048_r48000_z8.ts",
|
17 |
-
"Whale Songs": "humpbacks_pondbrain_b2048_r48000_z20.ts"
|
18 |
-
}
|
19 |
-
|
20 |
-
MODEL_CACHE = {}
|
21 |
-
|
22 |
import gradio as gr
|
23 |
import torchaudio
|
24 |
import torch
|
25 |
import numpy as np
|
26 |
from huggingface_hub import hf_hub_download
|
27 |
|
28 |
-
# β
|
29 |
RAVE_MODELS = {
|
30 |
"Guitar": "guitar_iil_b2048_r48000_z16.ts",
|
31 |
"Soprano Sax": "sax_soprano_franziskaschroeder_b2048_r48000_z20.ts",
|
@@ -40,7 +19,7 @@ RAVE_MODELS = {
|
|
40 |
MODEL_CACHE = {}
|
41 |
|
42 |
def load_rave_model(model_name):
|
43 |
-
"""Load
|
44 |
if model_name in MODEL_CACHE:
|
45 |
return MODEL_CACHE[model_name]
|
46 |
|
@@ -55,31 +34,29 @@ def load_rave_model(model_name):
|
|
55 |
return model
|
56 |
|
57 |
def apply_rave(audio, model_name):
|
58 |
-
"""Apply selected RAVE
|
59 |
model = load_rave_model(model_name)
|
60 |
|
61 |
-
# Convert numpy audio
|
62 |
audio_tensor = torch.tensor(audio[0]).unsqueeze(0) # [1, samples]
|
63 |
sr = audio[1]
|
64 |
|
65 |
-
# β
|
66 |
if sr != 48000:
|
67 |
audio_tensor = torchaudio.functional.resample(audio_tensor, sr, 48000)
|
68 |
sr = 48000
|
69 |
|
70 |
with torch.no_grad():
|
71 |
-
# β
|
72 |
-
# TorchScript models are usually structured like: model.encode(x) / model.decode(z)
|
73 |
z = model.encode(audio_tensor)
|
74 |
processed_audio = model.decode(z)
|
75 |
|
76 |
return (processed_audio.squeeze().cpu().numpy(), sr)
|
77 |
|
78 |
-
|
79 |
-
# π Gradio Interface
|
80 |
with gr.Blocks() as demo:
|
81 |
gr.Markdown("## π RAVE Style Transfer on Stems")
|
82 |
-
gr.Markdown("Upload audio,
|
83 |
|
84 |
with gr.Row():
|
85 |
audio_input = gr.Audio(type="numpy", label="Upload Audio", sources=["upload", "microphone"])
|
@@ -88,7 +65,6 @@ with gr.Blocks() as demo:
|
|
88 |
with gr.Row():
|
89 |
output_audio = gr.Audio(type="numpy", label="Transformed Audio")
|
90 |
|
91 |
-
# API + UI trigger
|
92 |
process_btn = gr.Button("Apply Style Transfer")
|
93 |
process_btn.click(fn=apply_rave, inputs=[audio_input, model_selector], outputs=output_audio)
|
94 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import torchaudio
|
3 |
import torch
|
4 |
import numpy as np
|
5 |
from huggingface_hub import hf_hub_download
|
6 |
|
7 |
+
# β
Map of model names to files on Hugging Face
|
8 |
RAVE_MODELS = {
|
9 |
"Guitar": "guitar_iil_b2048_r48000_z16.ts",
|
10 |
"Soprano Sax": "sax_soprano_franziskaschroeder_b2048_r48000_z20.ts",
|
|
|
19 |
MODEL_CACHE = {}
|
20 |
|
21 |
def load_rave_model(model_name):
|
22 |
+
"""Load TorchScript RAVE model from Hugging Face Hub."""
|
23 |
if model_name in MODEL_CACHE:
|
24 |
return MODEL_CACHE[model_name]
|
25 |
|
|
|
34 |
return model
|
35 |
|
36 |
def apply_rave(audio, model_name):
|
37 |
+
"""Apply selected RAVE model to uploaded audio."""
|
38 |
model = load_rave_model(model_name)
|
39 |
|
40 |
+
# Convert numpy audio to torch tensor
|
41 |
audio_tensor = torch.tensor(audio[0]).unsqueeze(0) # [1, samples]
|
42 |
sr = audio[1]
|
43 |
|
44 |
+
# β
Resample if needed (most RAVE models expect 48kHz)
|
45 |
if sr != 48000:
|
46 |
audio_tensor = torchaudio.functional.resample(audio_tensor, sr, 48000)
|
47 |
sr = 48000
|
48 |
|
49 |
with torch.no_grad():
|
50 |
+
# β
TorchScript models have encode & decode methods
|
|
|
51 |
z = model.encode(audio_tensor)
|
52 |
processed_audio = model.decode(z)
|
53 |
|
54 |
return (processed_audio.squeeze().cpu().numpy(), sr)
|
55 |
|
56 |
+
# π Gradio UI
|
|
|
57 |
with gr.Blocks() as demo:
|
58 |
gr.Markdown("## π RAVE Style Transfer on Stems")
|
59 |
+
gr.Markdown("Upload audio, pick a RAVE model, and get a transformed version.")
|
60 |
|
61 |
with gr.Row():
|
62 |
audio_input = gr.Audio(type="numpy", label="Upload Audio", sources=["upload", "microphone"])
|
|
|
65 |
with gr.Row():
|
66 |
output_audio = gr.Audio(type="numpy", label="Transformed Audio")
|
67 |
|
|
|
68 |
process_btn = gr.Button("Apply Style Transfer")
|
69 |
process_btn.click(fn=apply_rave, inputs=[audio_input, model_selector], outputs=output_audio)
|
70 |
|