Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -118,7 +118,6 @@ def split_lyrics(lyrics: str):
|
|
| 118 |
structured_lyrics = [f"[{seg[0]}]\n{seg[1].strip()}\n\n" for seg in segments]
|
| 119 |
return structured_lyrics
|
| 120 |
|
| 121 |
-
@spaces.GPU(duration=178)
|
| 122 |
def generate_music(
|
| 123 |
genre_txt=None,
|
| 124 |
lyrics_txt=None,
|
|
@@ -168,70 +167,69 @@ def generate_music(
|
|
| 168 |
# Format text prompt
|
| 169 |
run_n_segments = min(run_n_segments, len(lyrics)) + 1
|
| 170 |
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
raw_codes =
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
mmtokenizer.
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
|
|
|
|
|
|
| 195 |
else:
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
raw_output = torch.cat([raw_output, prompt_ids, output_seq[:, input_ids.shape[-1]:]], dim=1)
|
| 233 |
-
else:
|
| 234 |
-
raw_output = output_seq
|
| 235 |
|
| 236 |
# save raw output and check sanity
|
| 237 |
ids = raw_output[0].cpu().numpy()
|
|
@@ -359,19 +357,50 @@ with gr.Blocks() as demo:
|
|
| 359 |
# Examples updated to only include text inputs
|
| 360 |
gr.Examples(
|
| 361 |
examples=[
|
| 362 |
-
[
|
| 363 |
-
|
| 364 |
-
"""[verse]
|
| 365 |
Woke up in the morning, sun is shining bright
|
| 366 |
Chasing all my dreams, gotta get my mind right
|
| 367 |
City lights are fading, but my vision's clear
|
| 368 |
Got my team beside me, no room for fear
|
| 369 |
-
|
| 370 |
-
[chorus]
|
| 371 |
Walking through the streets, beats inside my head
|
| 372 |
Every step I take, closer to the bread
|
| 373 |
-
|
| 374 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 375 |
"""
|
| 376 |
],
|
| 377 |
[
|
|
|
|
| 118 |
structured_lyrics = [f"[{seg[0]}]\n{seg[1].strip()}\n\n" for seg in segments]
|
| 119 |
return structured_lyrics
|
| 120 |
|
|
|
|
| 121 |
def generate_music(
|
| 122 |
genre_txt=None,
|
| 123 |
lyrics_txt=None,
|
|
|
|
| 167 |
# Format text prompt
|
| 168 |
run_n_segments = min(run_n_segments, len(lyrics)) + 1
|
| 169 |
|
| 170 |
+
@spaces.GPU(duration=178)
|
| 171 |
+
def generator:
|
| 172 |
+
for i, p in enumerate(tqdm(prompt_texts[:run_n_segments])):
|
| 173 |
+
section_text = p.replace('[start_of_segment]', '').replace('[end_of_segment]', '')
|
| 174 |
+
guidance_scale = 1.5 if i <= 1 else 1.2 # Guidance scale adjusted based on segment index
|
| 175 |
+
if i == 0:
|
| 176 |
+
continue
|
| 177 |
+
if i == 1:
|
| 178 |
+
if use_audio_prompt:
|
| 179 |
+
audio_prompt = load_audio_mono(audio_prompt_path)
|
| 180 |
+
audio_prompt.unsqueeze_(0)
|
| 181 |
+
with torch.no_grad():
|
| 182 |
+
raw_codes = codec_model.encode(audio_prompt.to(device), target_bw=0.5)
|
| 183 |
+
raw_codes = raw_codes.transpose(0, 1)
|
| 184 |
+
raw_codes = raw_codes.cpu().numpy().astype(np.int16)
|
| 185 |
+
# Format audio prompt
|
| 186 |
+
code_ids = codectool.npy2ids(raw_codes[0])
|
| 187 |
+
audio_prompt_codec = code_ids[int(prompt_start_time * 50): int(prompt_end_time * 50)] # 50 is tps of xcodec
|
| 188 |
+
audio_prompt_codec_ids = [mmtokenizer.soa] + codectool.sep_ids + audio_prompt_codec + [
|
| 189 |
+
mmtokenizer.eoa]
|
| 190 |
+
sentence_ids = mmtokenizer.tokenize("[start_of_reference]") + audio_prompt_codec_ids + mmtokenizer.tokenize(
|
| 191 |
+
"[end_of_reference]")
|
| 192 |
+
head_id = mmtokenizer.tokenize(prompt_texts[0]) + sentence_ids
|
| 193 |
+
else:
|
| 194 |
+
head_id = mmtokenizer.tokenize(prompt_texts[0])
|
| 195 |
+
prompt_ids = head_id + start_of_segment + mmtokenizer.tokenize(section_text) + [mmtokenizer.soa] + codectool.sep_ids
|
| 196 |
else:
|
| 197 |
+
prompt_ids = end_of_segment + start_of_segment + mmtokenizer.tokenize(section_text) + [mmtokenizer.soa] + codectool.sep_ids
|
| 198 |
+
|
| 199 |
+
prompt_ids = torch.as_tensor(prompt_ids).unsqueeze(0).to(device)
|
| 200 |
+
input_ids = torch.cat([raw_output, prompt_ids], dim=1) if i > 1 else prompt_ids
|
| 201 |
+
|
| 202 |
+
# Use window slicing in case output sequence exceeds the context of model
|
| 203 |
+
max_context = 16384 - max_new_tokens - 1
|
| 204 |
+
if input_ids.shape[-1] > max_context:
|
| 205 |
+
print(
|
| 206 |
+
f'Section {i}: output length {input_ids.shape[-1]} exceeding context length {max_context}, now using the last {max_context} tokens.')
|
| 207 |
+
input_ids = input_ids[:, -(max_context):]
|
| 208 |
+
|
| 209 |
+
with torch.inference_mode(), torch.autocast(device_type='cuda', dtype=torch.float16):
|
| 210 |
+
output_seq = model.generate(
|
| 211 |
+
input_ids=input_ids,
|
| 212 |
+
max_new_tokens=max_new_tokens,
|
| 213 |
+
min_new_tokens=100,
|
| 214 |
+
do_sample=True,
|
| 215 |
+
top_p=top_p,
|
| 216 |
+
temperature=temperature,
|
| 217 |
+
repetition_penalty=repetition_penalty,
|
| 218 |
+
eos_token_id=mmtokenizer.eoa,
|
| 219 |
+
pad_token_id=mmtokenizer.eoa,
|
| 220 |
+
logits_processor=LogitsProcessorList([BlockTokenRangeProcessor(0, 32002), BlockTokenRangeProcessor(32016, 32016)]),
|
| 221 |
+
guidance_scale=guidance_scale,
|
| 222 |
+
use_cache=True,
|
| 223 |
+
num_beams=1
|
| 224 |
+
)
|
| 225 |
+
if output_seq[0][-1].item() != mmtokenizer.eoa:
|
| 226 |
+
tensor_eoa = torch.as_tensor([[mmtokenizer.eoa]]).to(model.device)
|
| 227 |
+
output_seq = torch.cat((output_seq, tensor_eoa), dim=1)
|
| 228 |
+
|
| 229 |
+
if i > 1:
|
| 230 |
+
raw_output = torch.cat([raw_output, prompt_ids, output_seq[:, input_ids.shape[-1]:]], dim=1)
|
| 231 |
+
else:
|
| 232 |
+
raw_output = output_seq
|
|
|
|
|
|
|
|
|
|
| 233 |
|
| 234 |
# save raw output and check sanity
|
| 235 |
ids = raw_output[0].cpu().numpy()
|
|
|
|
| 357 |
# Examples updated to only include text inputs
|
| 358 |
gr.Examples(
|
| 359 |
examples=[
|
| 360 |
+
["rap piano street tough piercing vocal hip-hop synthesizer clear vocal male",
|
| 361 |
+
"""[verse]
|
|
|
|
| 362 |
Woke up in the morning, sun is shining bright
|
| 363 |
Chasing all my dreams, gotta get my mind right
|
| 364 |
City lights are fading, but my vision's clear
|
| 365 |
Got my team beside me, no room for fear
|
|
|
|
|
|
|
| 366 |
Walking through the streets, beats inside my head
|
| 367 |
Every step I take, closer to the bread
|
| 368 |
+
|
| 369 |
+
[chorus]
|
| 370 |
+
This is my life, and I'm aiming for the top
|
| 371 |
+
Never gonna quit, no, I'm never gonna stop
|
| 372 |
+
Through the highs and lows, I'mma keep it real
|
| 373 |
+
Living out my dreams with this mic and a deal
|
| 374 |
+
|
| 375 |
+
[verse]
|
| 376 |
+
Late nights grinding, writing down these rhymes
|
| 377 |
+
Clock is ticking fast, can't afford to waste time
|
| 378 |
+
Haters gonna hate, but I brush it off
|
| 379 |
+
Turn the negativity into something strong
|
| 380 |
+
Mama working hard, wanna make her proud"""],
|
| 381 |
+
[
|
| 382 |
+
"inspiring female uplifting pop airy vocal electronic bright vocal vocal",
|
| 383 |
+
"""[verse]
|
| 384 |
+
Staring at the sunset, colors paint the sky
|
| 385 |
+
Thoughts of you keep swirling, can't deny
|
| 386 |
+
I know I let you down, I made mistakes
|
| 387 |
+
But I'm here to mend the heart I didn't break
|
| 388 |
+
|
| 389 |
+
[chorus]
|
| 390 |
+
Every road you take, I'll be one step behind
|
| 391 |
+
Every dream you chase, I'm reaching for the light
|
| 392 |
+
You can't fight this feeling now
|
| 393 |
+
I won't back down
|
| 394 |
+
I'm the whisper in the wind, the shadow by your side
|
| 395 |
+
The warmth you feel within when you can't hide
|
| 396 |
+
You know you can't deny it now
|
| 397 |
+
I won't back down
|
| 398 |
+
|
| 399 |
+
[verse]
|
| 400 |
+
They might say I'm foolish, chasing after you
|
| 401 |
+
But they don't feel this love the way we do
|
| 402 |
+
My heart beats only for you, can't you see?
|
| 403 |
+
I won't let you slip away from me
|
| 404 |
"""
|
| 405 |
],
|
| 406 |
[
|