ahk-d commited on
Commit
57c442c
Β·
verified Β·
1 Parent(s): af310d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -7
app.py CHANGED
@@ -37,22 +37,28 @@ def apply_rave(audio, model_name):
37
  """Apply selected RAVE model to uploaded audio."""
38
  model = load_rave_model(model_name)
39
 
40
- # Convert numpy audio to torch tensor
41
- audio_tensor = torch.tensor(audio[0]).unsqueeze(0) # [1, samples]
42
- sr = audio[1]
43
 
44
- # βœ… Resample if needed (most RAVE models expect 48kHz)
45
- if sr != 48000:
46
- audio_tensor = torchaudio.functional.resample(audio_tensor, sr, 48000)
 
 
 
 
 
 
 
47
  sr = 48000
48
 
49
  with torch.no_grad():
50
- # βœ… TorchScript models have encode & decode methods
51
  z = model.encode(audio_tensor)
52
  processed_audio = model.decode(z)
53
 
54
  return (processed_audio.squeeze().cpu().numpy(), sr)
55
 
 
56
  # πŸŽ› Gradio UI
57
  with gr.Blocks() as demo:
58
  gr.Markdown("## πŸŽ› RAVE Style Transfer on Stems")
 
37
  """Apply selected RAVE model to uploaded audio."""
38
  model = load_rave_model(model_name)
39
 
40
+ # βœ… Unpack properly
41
+ waveform, sr = audio # waveform: np.array [samples, channels]
 
42
 
43
+ # βœ… Convert stereo -> mono if needed
44
+ if waveform.ndim > 1:
45
+ waveform = np.mean(waveform, axis=1)
46
+
47
+ # βœ… Convert numpy to torch tensor
48
+ audio_tensor = torch.tensor(waveform).unsqueeze(0) # shape: [1, samples]
49
+
50
+ # βœ… Resample if needed
51
+ if int(sr) != 48000:
52
+ audio_tensor = torchaudio.functional.resample(audio_tensor, int(sr), 48000)
53
  sr = 48000
54
 
55
  with torch.no_grad():
 
56
  z = model.encode(audio_tensor)
57
  processed_audio = model.decode(z)
58
 
59
  return (processed_audio.squeeze().cpu().numpy(), sr)
60
 
61
+
62
  # πŸŽ› Gradio UI
63
  with gr.Blocks() as demo:
64
  gr.Markdown("## πŸŽ› RAVE Style Transfer on Stems")