Update app.py
Browse files
app.py
CHANGED
@@ -203,7 +203,11 @@ def generate_music(genre_txt, lyrics_txt, max_new_tokens=5, run_n_segments=2, us
|
|
203 |
decoded_instrumentals = codec_model.decode(torch.as_tensor(instrumentals.astype(np.int16), dtype=torch.long).unsqueeze(0).permute(1, 0, 2).to(device)).cpu().squeeze(0)
|
204 |
|
205 |
mixed_audio = (decoded_vocals + decoded_instrumentals) / 2
|
206 |
-
|
|
|
|
|
|
|
|
|
207 |
|
208 |
@spaces.GPU(duration=120)
|
209 |
def infer(genre_txt_content, lyrics_txt_content, num_segments=2, max_new_tokens=10):
|
|
|
203 |
decoded_instrumentals = codec_model.decode(torch.as_tensor(instrumentals.astype(np.int16), dtype=torch.long).unsqueeze(0).permute(1, 0, 2).to(device)).cpu().squeeze(0)
|
204 |
|
205 |
mixed_audio = (decoded_vocals + decoded_instrumentals) / 2
|
206 |
+
mixed_audio_np = mixed_audio.detach().numpy() # Convert to NumPy array
|
207 |
+
mixed_audio_int16 = (mixed_audio_np * 32767).astype(np.int16) # Convert to int16
|
208 |
+
|
209 |
+
# Return the sample rate and the converted audio data
|
210 |
+
return (16000, mixed_audio_int16)
|
211 |
|
212 |
@spaces.GPU(duration=120)
|
213 |
def infer(genre_txt_content, lyrics_txt_content, num_segments=2, max_new_tokens=10):
|