Update app.py
Browse files
app.py
CHANGED
@@ -37,22 +37,28 @@ def apply_rave(audio, model_name):
|
|
37 |
"""Apply selected RAVE model to uploaded audio."""
|
38 |
model = load_rave_model(model_name)
|
39 |
|
40 |
-
#
|
41 |
-
|
42 |
-
sr = audio[1]
|
43 |
|
44 |
-
# β
|
45 |
-
if
|
46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
sr = 48000
|
48 |
|
49 |
with torch.no_grad():
|
50 |
-
# β
TorchScript models have encode & decode methods
|
51 |
z = model.encode(audio_tensor)
|
52 |
processed_audio = model.decode(z)
|
53 |
|
54 |
return (processed_audio.squeeze().cpu().numpy(), sr)
|
55 |
|
|
|
56 |
# π Gradio UI
|
57 |
with gr.Blocks() as demo:
|
58 |
gr.Markdown("## π RAVE Style Transfer on Stems")
|
|
|
37 |
"""Apply selected RAVE model to uploaded audio."""
|
38 |
model = load_rave_model(model_name)
|
39 |
|
40 |
+
# β
Unpack properly
|
41 |
+
waveform, sr = audio # waveform: np.array [samples, channels]
|
|
|
42 |
|
43 |
+
# β
Convert stereo -> mono if needed
|
44 |
+
if waveform.ndim > 1:
|
45 |
+
waveform = np.mean(waveform, axis=1)
|
46 |
+
|
47 |
+
# β
Convert numpy to torch tensor
|
48 |
+
audio_tensor = torch.tensor(waveform).unsqueeze(0) # shape: [1, samples]
|
49 |
+
|
50 |
+
# β
Resample if needed
|
51 |
+
if int(sr) != 48000:
|
52 |
+
audio_tensor = torchaudio.functional.resample(audio_tensor, int(sr), 48000)
|
53 |
sr = 48000
|
54 |
|
55 |
with torch.no_grad():
|
|
|
56 |
z = model.encode(audio_tensor)
|
57 |
processed_audio = model.decode(z)
|
58 |
|
59 |
return (processed_audio.squeeze().cpu().numpy(), sr)
|
60 |
|
61 |
+
|
62 |
# π Gradio UI
|
63 |
with gr.Blocks() as demo:
|
64 |
gr.Markdown("## π RAVE Style Transfer on Stems")
|