KingNish commited on
Commit
e6c9d72
·
verified ·
1 Parent(s): 310fd77

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -37
app.py CHANGED
@@ -15,6 +15,8 @@ subprocess.run(
15
  "pip install flash-attn --no-build-isolation",
16
  env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
17
  shell=True,
 
 
18
  )
19
 
20
  from huggingface_hub import snapshot_download
@@ -75,16 +77,9 @@ device = "cuda:0"
75
  model = AutoModelForCausalLM.from_pretrained(
76
  "m-a-p/YuE-s1-7B-anneal-en-cot",
77
  torch_dtype=torch.float16,
78
- attn_implementation="flash_attention_2", # To enable flashattn, you have to install flash-attn
 
79
  ).to(device)
80
- # assistant_model = AutoModelForCausalLM.from_pretrained(
81
- # "m-a-p/YuE-s2-1B-general",
82
- # torch_dtype=torch.float16,
83
- # attn_implementation="flash_attention_2", # To enable flashattn, you have to install flash-attn
84
- # ).to(device)
85
- # assistant_model = torch.compile(assistant_model)
86
- # model = torch.compile(model)
87
- # assistant_model.eval()
88
  model.eval()
89
 
90
  basic_model_config = './xcodec_mini_infer/final_ckpt/config.yaml'
@@ -130,7 +125,7 @@ def generate_music(
130
  raise FileNotFoundError("Please offer audio prompt filepath using '--audio_prompt_path', when you enable 'use_audio_prompt'!")
131
  cuda_idx = cuda_idx
132
  max_new_tokens = max_new_tokens * 100
133
-
134
  with tempfile.TemporaryDirectory() as output_dir:
135
  stage1_output_dir = os.path.join(output_dir, f"stage1")
136
  os.makedirs(stage1_output_dir, exist_ok=True)
@@ -234,7 +229,7 @@ def generate_music(
234
  pad_token_id=mmtokenizer.eoa,
235
  logits_processor=LogitsProcessorList([BlockTokenRangeProcessor(0, 32002), BlockTokenRangeProcessor(32016, 32016)]),
236
  guidance_scale=guidance_scale,
237
- use_cache=True,
238
  top_k=50,
239
  num_beams=1
240
  )
@@ -268,14 +263,14 @@ def generate_music(
268
  instrumentals.append(instrumentals_ids)
269
  vocals = np.concatenate(vocals, axis=1)
270
  instrumentals = np.concatenate(instrumentals, axis=1)
271
-
272
  vocal_save_path = os.path.join(stage1_output_dir, f"vocal_{random_id}".replace('.', '@') + '.npy')
273
  inst_save_path = os.path.join(stage1_output_dir, f"instrumental_{random_id}".replace('.', '@') + '.npy')
274
  np.save(vocal_save_path, vocals)
275
  np.save(inst_save_path, instrumentals)
276
  stage1_output_set.append(vocal_save_path)
277
  stage1_output_set.append(inst_save_path)
278
-
279
 
280
  print("Converting to Audio...")
281
 
@@ -374,7 +369,7 @@ def generate_music(
374
  cutoff_freq=5500.0
375
  )
376
  print("All process Done")
377
-
378
  # Load the final audio file and return the numpy array
379
  final_audio, sr = torchaudio.load(final_output_path)
380
  return (sr, final_audio.squeeze().numpy())
@@ -402,7 +397,7 @@ with gr.Blocks() as demo:
402
  <div style="display:flex;column-gap:4px;">
403
  <a href="https://github.com/multimodal-art-projection/YuE">
404
  <img src='https://img.shields.io/badge/GitHub-Repo-blue'>
405
- </a>
406
  <a href="https://map-yue.github.io">
407
  <img src='https://img.shields.io/badge/Project-Page-green'>
408
  </a>
@@ -418,32 +413,11 @@ with gr.Blocks() as demo:
418
 
419
  with gr.Column():
420
  num_segments = gr.Number(label="Number of Segments", value=2, interactive=True)
421
- max_new_tokens = gr.Slider(label="Duration of song", minimum=1, maximum=30, step=1, value=5,
422
  interactive=True)
423
  submit_btn = gr.Button("Submit")
424
  music_out = gr.Audio(label="Audio Result")
425
 
426
- # gr.Examples(
427
- # examples=[
428
- # ["Rap, Hip-Hop, Street Vibes, Tough, Piercing Vocals, Piano, Synthesizer, Clear Male Vocals",
429
- # """[verse]
430
- # Woke up in the morning, sun is shining bright
431
- # Chasing all my dreams, gotta get my mind right
432
- # City lights are fading, but my vision's clear
433
- # Got my team beside me, no room for fear
434
- # Walking through the streets, beats inside my head
435
- # Every step I take, closer to the bread
436
- # People passing by, they don't understand
437
- # Building up my future with my own two hands
438
- # """],
439
- # ],
440
- # inputs=[genre_txt, lyrics_txt],
441
- # outputs=[music_out],
442
- # cache_examples=True,
443
- # cache_mode="eager",
444
- # fn=infer
445
- # )
446
-
447
  gr.Examples(
448
  examples=[
449
  [
 
15
  "pip install flash-attn --no-build-isolation",
16
  env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
17
  shell=True,
18
+ capture_output=True, # Capture output for debugging
19
+ text=True # Decode output as text
20
  )
21
 
22
  from huggingface_hub import snapshot_download
 
77
  model = AutoModelForCausalLM.from_pretrained(
78
  "m-a-p/YuE-s1-7B-anneal-en-cot",
79
  torch_dtype=torch.float16,
80
+ attn_implementation="flash_attention_2",
81
+ load_in_4bit=True # Or load_in_8bit=True
82
  ).to(device)
 
 
 
 
 
 
 
 
83
  model.eval()
84
 
85
  basic_model_config = './xcodec_mini_infer/final_ckpt/config.yaml'
 
125
  raise FileNotFoundError("Please offer audio prompt filepath using '--audio_prompt_path', when you enable 'use_audio_prompt'!")
126
  cuda_idx = cuda_idx
127
  max_new_tokens = max_new_tokens * 100
128
+
129
  with tempfile.TemporaryDirectory() as output_dir:
130
  stage1_output_dir = os.path.join(output_dir, f"stage1")
131
  os.makedirs(stage1_output_dir, exist_ok=True)
 
229
  pad_token_id=mmtokenizer.eoa,
230
  logits_processor=LogitsProcessorList([BlockTokenRangeProcessor(0, 32002), BlockTokenRangeProcessor(32016, 32016)]),
231
  guidance_scale=guidance_scale,
232
+ use_cache=True, # KV Caching is enabled here!
233
  top_k=50,
234
  num_beams=1
235
  )
 
263
  instrumentals.append(instrumentals_ids)
264
  vocals = np.concatenate(vocals, axis=1)
265
  instrumentals = np.concatenate(instrumentals, axis=1)
266
+
267
  vocal_save_path = os.path.join(stage1_output_dir, f"vocal_{random_id}".replace('.', '@') + '.npy')
268
  inst_save_path = os.path.join(stage1_output_dir, f"instrumental_{random_id}".replace('.', '@') + '.npy')
269
  np.save(vocal_save_path, vocals)
270
  np.save(inst_save_path, instrumentals)
271
  stage1_output_set.append(vocal_save_path)
272
  stage1_output_set.append(inst_save_path)
273
+
274
 
275
  print("Converting to Audio...")
276
 
 
369
  cutoff_freq=5500.0
370
  )
371
  print("All process Done")
372
+
373
  # Load the final audio file and return the numpy array
374
  final_audio, sr = torchaudio.load(final_output_path)
375
  return (sr, final_audio.squeeze().numpy())
 
397
  <div style="display:flex;column-gap:4px;">
398
  <a href="https://github.com/multimodal-art-projection/YuE">
399
  <img src='https://img.shields.io/badge/GitHub-Repo-blue'>
400
+ </a>
401
  <a href="https://map-yue.github.io">
402
  <img src='https://img.shields.io/badge/Project-Page-green'>
403
  </a>
 
413
 
414
  with gr.Column():
415
  num_segments = gr.Number(label="Number of Segments", value=2, interactive=True)
416
+ max_new_tokens = gr.Slider(label="Duration of song", minimum=1, maximum=30, step=1, value=15,
417
  interactive=True)
418
  submit_btn = gr.Button("Submit")
419
  music_out = gr.Audio(label="Audio Result")
420
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
421
  gr.Examples(
422
  examples=[
423
  [