Update app.py
Browse files
app.py
CHANGED
@@ -65,14 +65,6 @@ from models.soundstream_hubert_new import SoundStream
|
|
65 |
from vocoder import build_codec_model, process_audio
|
66 |
from post_process_audio import replace_low_freq_with_energy_matched
|
67 |
|
68 |
-
# Install flash attention
|
69 |
-
print("Installing flash-attn...")
|
70 |
-
subprocess.run(
|
71 |
-
"pip install flash-attn --no-build-isolation",
|
72 |
-
env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
|
73 |
-
shell=True,
|
74 |
-
)
|
75 |
-
|
76 |
# Initialize device
|
77 |
device = "cuda:0"
|
78 |
|
@@ -211,7 +203,7 @@ def generate_music(genre_txt, lyrics_txt, max_new_tokens=5, run_n_segments=2, us
|
|
211 |
decoded_instrumentals = codec_model.decode(torch.as_tensor(instrumentals.astype(np.int16), dtype=torch.long).unsqueeze(0).permute(1, 0, 2).to(device)).cpu().squeeze(0)
|
212 |
|
213 |
mixed_audio = (decoded_vocals + decoded_instrumentals) / 2
|
214 |
-
return (16000, mixed_audio.
|
215 |
|
216 |
@spaces.GPU(duration=120)
|
217 |
def infer(genre_txt_content, lyrics_txt_content, num_segments=2, max_new_tokens=10):
|
|
|
65 |
from vocoder import build_codec_model, process_audio
|
66 |
from post_process_audio import replace_low_freq_with_energy_matched
|
67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
# Initialize device
|
69 |
device = "cuda:0"
|
70 |
|
|
|
203 |
decoded_instrumentals = codec_model.decode(torch.as_tensor(instrumentals.astype(np.int16), dtype=torch.long).unsqueeze(0).permute(1, 0, 2).to(device)).cpu().squeeze(0)
|
204 |
|
205 |
mixed_audio = (decoded_vocals + decoded_instrumentals) / 2
|
206 |
+
return (16000, mixed_audio.detach().numpy())
|
207 |
|
208 |
@spaces.GPU(duration=120)
|
209 |
def infer(genre_txt_content, lyrics_txt_content, num_segments=2, max_new_tokens=10):
|