ahk-d commited on
Commit
af310d3
Β·
verified Β·
1 Parent(s): 7383a83

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -32
app.py CHANGED
@@ -1,31 +1,10 @@
1
- import gradio as gr
2
- import torchaudio
3
- import torch
4
- import os
5
- from rave import RAVE # Assuming rave.py or pip package is available
6
- from huggingface_hub import hf_hub_download
7
-
8
- # βœ… Available RAVE models (can expand dynamically from HF repo)
9
- RAVE_MODELS = {
10
- "Guitar": "guitar_iil_b2048_r48000_z16.ts",
11
- "Soprano Sax": "sax_soprano_franziskaschroeder_b2048_r48000_z20.ts",
12
- "Organ (Archive)": "organ_archive_b2048_r48000_z16.ts",
13
- "Organ (Bach)": "organ_bach_b2048_r48000_z16.ts",
14
- "Voice Multivoice": "voice-multi-b2048-r48000-z11.ts",
15
- "Birds Dawn Chorus": "birds_dawnchorus_b2048_r48000_z8.ts",
16
- "Magnets": "magnets_b2048_r48000_z8.ts",
17
- "Whale Songs": "humpbacks_pondbrain_b2048_r48000_z20.ts"
18
- }
19
-
20
- MODEL_CACHE = {}
21
-
22
  import gradio as gr
23
  import torchaudio
24
  import torch
25
  import numpy as np
26
  from huggingface_hub import hf_hub_download
27
 
28
- # βœ… Available RAVE models
29
  RAVE_MODELS = {
30
  "Guitar": "guitar_iil_b2048_r48000_z16.ts",
31
  "Soprano Sax": "sax_soprano_franziskaschroeder_b2048_r48000_z20.ts",
@@ -40,7 +19,7 @@ RAVE_MODELS = {
40
  MODEL_CACHE = {}
41
 
42
  def load_rave_model(model_name):
43
- """Load a TorchScript RAVE model directly from Hugging Face."""
44
  if model_name in MODEL_CACHE:
45
  return MODEL_CACHE[model_name]
46
 
@@ -55,31 +34,29 @@ def load_rave_model(model_name):
55
  return model
56
 
57
  def apply_rave(audio, model_name):
58
- """Apply selected RAVE style transfer model to uploaded audio."""
59
  model = load_rave_model(model_name)
60
 
61
- # Convert numpy audio (from Gradio) to torch tensor
62
  audio_tensor = torch.tensor(audio[0]).unsqueeze(0) # [1, samples]
63
  sr = audio[1]
64
 
65
- # βœ… resample if needed
66
  if sr != 48000:
67
  audio_tensor = torchaudio.functional.resample(audio_tensor, sr, 48000)
68
  sr = 48000
69
 
70
  with torch.no_grad():
71
- # βœ… pass audio through RAVE TorchScript (encode/decode)
72
- # TorchScript models are usually structured like: model.encode(x) / model.decode(z)
73
  z = model.encode(audio_tensor)
74
  processed_audio = model.decode(z)
75
 
76
  return (processed_audio.squeeze().cpu().numpy(), sr)
77
 
78
-
79
- # πŸŽ› Gradio Interface
80
  with gr.Blocks() as demo:
81
  gr.Markdown("## πŸŽ› RAVE Style Transfer on Stems")
82
- gr.Markdown("Upload audio, select a RAVE model, and get a transformed version.")
83
 
84
  with gr.Row():
85
  audio_input = gr.Audio(type="numpy", label="Upload Audio", sources=["upload", "microphone"])
@@ -88,7 +65,6 @@ with gr.Blocks() as demo:
88
  with gr.Row():
89
  output_audio = gr.Audio(type="numpy", label="Transformed Audio")
90
 
91
- # API + UI trigger
92
  process_btn = gr.Button("Apply Style Transfer")
93
  process_btn.click(fn=apply_rave, inputs=[audio_input, model_selector], outputs=output_audio)
94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import torchaudio
3
  import torch
4
  import numpy as np
5
  from huggingface_hub import hf_hub_download
6
 
7
+ # βœ… Map of model names to files on Hugging Face
8
  RAVE_MODELS = {
9
  "Guitar": "guitar_iil_b2048_r48000_z16.ts",
10
  "Soprano Sax": "sax_soprano_franziskaschroeder_b2048_r48000_z20.ts",
 
19
  MODEL_CACHE = {}
20
 
21
  def load_rave_model(model_name):
22
+ """Load TorchScript RAVE model from Hugging Face Hub."""
23
  if model_name in MODEL_CACHE:
24
  return MODEL_CACHE[model_name]
25
 
 
34
  return model
35
 
36
  def apply_rave(audio, model_name):
37
+ """Apply selected RAVE model to uploaded audio."""
38
  model = load_rave_model(model_name)
39
 
40
+ # Convert numpy audio to torch tensor
41
  audio_tensor = torch.tensor(audio[0]).unsqueeze(0) # [1, samples]
42
  sr = audio[1]
43
 
44
+ # βœ… Resample if needed (most RAVE models expect 48kHz)
45
  if sr != 48000:
46
  audio_tensor = torchaudio.functional.resample(audio_tensor, sr, 48000)
47
  sr = 48000
48
 
49
  with torch.no_grad():
50
+ # βœ… TorchScript models have encode & decode methods
 
51
  z = model.encode(audio_tensor)
52
  processed_audio = model.decode(z)
53
 
54
  return (processed_audio.squeeze().cpu().numpy(), sr)
55
 
56
+ # πŸŽ› Gradio UI
 
57
  with gr.Blocks() as demo:
58
  gr.Markdown("## πŸŽ› RAVE Style Transfer on Stems")
59
+ gr.Markdown("Upload audio, pick a RAVE model, and get a transformed version.")
60
 
61
  with gr.Row():
62
  audio_input = gr.Audio(type="numpy", label="Upload Audio", sources=["upload", "microphone"])
 
65
  with gr.Row():
66
  output_audio = gr.Audio(type="numpy", label="Transformed Audio")
67
 
 
68
  process_btn = gr.Button("Apply Style Transfer")
69
  process_btn.click(fn=apply_rave, inputs=[audio_input, model_selector], outputs=output_audio)
70