Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -118,7 +118,6 @@ def split_lyrics(lyrics: str):
|
|
118 |
structured_lyrics = [f"[{seg[0]}]\n{seg[1].strip()}\n\n" for seg in segments]
|
119 |
return structured_lyrics
|
120 |
|
121 |
-
@spaces.GPU(duration=178)
|
122 |
def generate_music(
|
123 |
genre_txt=None,
|
124 |
lyrics_txt=None,
|
@@ -168,70 +167,69 @@ def generate_music(
|
|
168 |
# Format text prompt
|
169 |
run_n_segments = min(run_n_segments, len(lyrics)) + 1
|
170 |
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
raw_codes =
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
mmtokenizer.
|
192 |
-
|
193 |
-
|
194 |
-
|
|
|
|
|
195 |
else:
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
raw_output = torch.cat([raw_output, prompt_ids, output_seq[:, input_ids.shape[-1]:]], dim=1)
|
233 |
-
else:
|
234 |
-
raw_output = output_seq
|
235 |
|
236 |
# save raw output and check sanity
|
237 |
ids = raw_output[0].cpu().numpy()
|
@@ -359,19 +357,50 @@ with gr.Blocks() as demo:
|
|
359 |
# Examples updated to only include text inputs
|
360 |
gr.Examples(
|
361 |
examples=[
|
362 |
-
[
|
363 |
-
|
364 |
-
"""[verse]
|
365 |
Woke up in the morning, sun is shining bright
|
366 |
Chasing all my dreams, gotta get my mind right
|
367 |
City lights are fading, but my vision's clear
|
368 |
Got my team beside me, no room for fear
|
369 |
-
|
370 |
-
[chorus]
|
371 |
Walking through the streets, beats inside my head
|
372 |
Every step I take, closer to the bread
|
373 |
-
|
374 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
375 |
"""
|
376 |
],
|
377 |
[
|
|
|
118 |
structured_lyrics = [f"[{seg[0]}]\n{seg[1].strip()}\n\n" for seg in segments]
|
119 |
return structured_lyrics
|
120 |
|
|
|
121 |
def generate_music(
|
122 |
genre_txt=None,
|
123 |
lyrics_txt=None,
|
|
|
167 |
# Format text prompt
|
168 |
run_n_segments = min(run_n_segments, len(lyrics)) + 1
|
169 |
|
170 |
+
@spaces.GPU(duration=178)
|
171 |
+
def generator:
|
172 |
+
for i, p in enumerate(tqdm(prompt_texts[:run_n_segments])):
|
173 |
+
section_text = p.replace('[start_of_segment]', '').replace('[end_of_segment]', '')
|
174 |
+
guidance_scale = 1.5 if i <= 1 else 1.2 # Guidance scale adjusted based on segment index
|
175 |
+
if i == 0:
|
176 |
+
continue
|
177 |
+
if i == 1:
|
178 |
+
if use_audio_prompt:
|
179 |
+
audio_prompt = load_audio_mono(audio_prompt_path)
|
180 |
+
audio_prompt.unsqueeze_(0)
|
181 |
+
with torch.no_grad():
|
182 |
+
raw_codes = codec_model.encode(audio_prompt.to(device), target_bw=0.5)
|
183 |
+
raw_codes = raw_codes.transpose(0, 1)
|
184 |
+
raw_codes = raw_codes.cpu().numpy().astype(np.int16)
|
185 |
+
# Format audio prompt
|
186 |
+
code_ids = codectool.npy2ids(raw_codes[0])
|
187 |
+
audio_prompt_codec = code_ids[int(prompt_start_time * 50): int(prompt_end_time * 50)] # 50 is tps of xcodec
|
188 |
+
audio_prompt_codec_ids = [mmtokenizer.soa] + codectool.sep_ids + audio_prompt_codec + [
|
189 |
+
mmtokenizer.eoa]
|
190 |
+
sentence_ids = mmtokenizer.tokenize("[start_of_reference]") + audio_prompt_codec_ids + mmtokenizer.tokenize(
|
191 |
+
"[end_of_reference]")
|
192 |
+
head_id = mmtokenizer.tokenize(prompt_texts[0]) + sentence_ids
|
193 |
+
else:
|
194 |
+
head_id = mmtokenizer.tokenize(prompt_texts[0])
|
195 |
+
prompt_ids = head_id + start_of_segment + mmtokenizer.tokenize(section_text) + [mmtokenizer.soa] + codectool.sep_ids
|
196 |
else:
|
197 |
+
prompt_ids = end_of_segment + start_of_segment + mmtokenizer.tokenize(section_text) + [mmtokenizer.soa] + codectool.sep_ids
|
198 |
+
|
199 |
+
prompt_ids = torch.as_tensor(prompt_ids).unsqueeze(0).to(device)
|
200 |
+
input_ids = torch.cat([raw_output, prompt_ids], dim=1) if i > 1 else prompt_ids
|
201 |
+
|
202 |
+
# Use window slicing in case output sequence exceeds the context of model
|
203 |
+
max_context = 16384 - max_new_tokens - 1
|
204 |
+
if input_ids.shape[-1] > max_context:
|
205 |
+
print(
|
206 |
+
f'Section {i}: output length {input_ids.shape[-1]} exceeding context length {max_context}, now using the last {max_context} tokens.')
|
207 |
+
input_ids = input_ids[:, -(max_context):]
|
208 |
+
|
209 |
+
with torch.inference_mode(), torch.autocast(device_type='cuda', dtype=torch.float16):
|
210 |
+
output_seq = model.generate(
|
211 |
+
input_ids=input_ids,
|
212 |
+
max_new_tokens=max_new_tokens,
|
213 |
+
min_new_tokens=100,
|
214 |
+
do_sample=True,
|
215 |
+
top_p=top_p,
|
216 |
+
temperature=temperature,
|
217 |
+
repetition_penalty=repetition_penalty,
|
218 |
+
eos_token_id=mmtokenizer.eoa,
|
219 |
+
pad_token_id=mmtokenizer.eoa,
|
220 |
+
logits_processor=LogitsProcessorList([BlockTokenRangeProcessor(0, 32002), BlockTokenRangeProcessor(32016, 32016)]),
|
221 |
+
guidance_scale=guidance_scale,
|
222 |
+
use_cache=True,
|
223 |
+
num_beams=1
|
224 |
+
)
|
225 |
+
if output_seq[0][-1].item() != mmtokenizer.eoa:
|
226 |
+
tensor_eoa = torch.as_tensor([[mmtokenizer.eoa]]).to(model.device)
|
227 |
+
output_seq = torch.cat((output_seq, tensor_eoa), dim=1)
|
228 |
+
|
229 |
+
if i > 1:
|
230 |
+
raw_output = torch.cat([raw_output, prompt_ids, output_seq[:, input_ids.shape[-1]:]], dim=1)
|
231 |
+
else:
|
232 |
+
raw_output = output_seq
|
|
|
|
|
|
|
233 |
|
234 |
# save raw output and check sanity
|
235 |
ids = raw_output[0].cpu().numpy()
|
|
|
357 |
# Examples updated to only include text inputs
|
358 |
gr.Examples(
|
359 |
examples=[
|
360 |
+
["rap piano street tough piercing vocal hip-hop synthesizer clear vocal male",
|
361 |
+
"""[verse]
|
|
|
362 |
Woke up in the morning, sun is shining bright
|
363 |
Chasing all my dreams, gotta get my mind right
|
364 |
City lights are fading, but my vision's clear
|
365 |
Got my team beside me, no room for fear
|
|
|
|
|
366 |
Walking through the streets, beats inside my head
|
367 |
Every step I take, closer to the bread
|
368 |
+
|
369 |
+
[chorus]
|
370 |
+
This is my life, and I'm aiming for the top
|
371 |
+
Never gonna quit, no, I'm never gonna stop
|
372 |
+
Through the highs and lows, I'mma keep it real
|
373 |
+
Living out my dreams with this mic and a deal
|
374 |
+
|
375 |
+
[verse]
|
376 |
+
Late nights grinding, writing down these rhymes
|
377 |
+
Clock is ticking fast, can't afford to waste time
|
378 |
+
Haters gonna hate, but I brush it off
|
379 |
+
Turn the negativity into something strong
|
380 |
+
Mama working hard, wanna make her proud"""],
|
381 |
+
[
|
382 |
+
"inspiring female uplifting pop airy vocal electronic bright vocal vocal",
|
383 |
+
"""[verse]
|
384 |
+
Staring at the sunset, colors paint the sky
|
385 |
+
Thoughts of you keep swirling, can't deny
|
386 |
+
I know I let you down, I made mistakes
|
387 |
+
But I'm here to mend the heart I didn't break
|
388 |
+
|
389 |
+
[chorus]
|
390 |
+
Every road you take, I'll be one step behind
|
391 |
+
Every dream you chase, I'm reaching for the light
|
392 |
+
You can't fight this feeling now
|
393 |
+
I won't back down
|
394 |
+
I'm the whisper in the wind, the shadow by your side
|
395 |
+
The warmth you feel within when you can't hide
|
396 |
+
You know you can't deny it now
|
397 |
+
I won't back down
|
398 |
+
|
399 |
+
[verse]
|
400 |
+
They might say I'm foolish, chasing after you
|
401 |
+
But they don't feel this love the way we do
|
402 |
+
My heart beats only for you, can't you see?
|
403 |
+
I won't let you slip away from me
|
404 |
"""
|
405 |
],
|
406 |
[
|