ginipick commited on
Commit
c8a3a02
ยท
verified ยท
1 Parent(s): 460b152

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -15
app.py CHANGED
@@ -62,18 +62,31 @@ def analyze_lyrics(lyrics):
62
  def calculate_generation_params(lyrics):
63
  sections, total_sections, total_lines, section_lines = analyze_lyrics(lyrics)
64
 
65
- # ๊ธฐ๋ณธ ํ† ํฐ ์ˆ˜ ๊ณ„์‚ฐ
66
- base_tokens_per_line = 200
67
- verse_tokens = section_lines['verse'] * base_tokens_per_line
68
- chorus_tokens = section_lines['chorus'] * (base_tokens_per_line * 1.5) # ์ฝ”๋Ÿฌ์Šค๋Š” 50% ๋” ๋งŽ์€ ํ† ํฐ
69
- bridge_tokens = section_lines['bridge'] * base_tokens_per_line
70
 
71
- # ์ด ํ† ํฐ ์ˆ˜ ๊ณ„์‚ฐ
72
- total_tokens = int(verse_tokens + chorus_tokens + bridge_tokens)
 
 
 
 
 
 
73
 
74
- # ์„น์…˜ ๊ธฐ๋ฐ˜ ์„ธ๊ทธ๋จผํŠธ ์ˆ˜ ๊ณ„์‚ฐ
75
- num_segments = max(2, min(4, total_sections))
 
76
 
 
 
 
 
 
 
 
 
77
  # ํ† ํฐ ์ˆ˜ ์ œํ•œ
78
  max_tokens = min(32000, max(3000, total_tokens))
79
 
@@ -81,9 +94,19 @@ def calculate_generation_params(lyrics):
81
  'max_tokens': max_tokens,
82
  'num_segments': num_segments,
83
  'sections': sections,
84
- 'section_lines': section_lines
 
85
  }
86
 
 
 
 
 
 
 
 
 
 
87
  # ์–ธ์–ด ๊ฐ์ง€ ๋ฐ ๋ชจ๋ธ ์„ ํƒ ํ•จ์ˆ˜
88
  def detect_and_select_model(text):
89
  if re.search(r'[\u3131-\u318E\uAC00-\uD7A3]', text): # ํ•œ๊ธ€
@@ -239,6 +262,13 @@ def infer(genre_txt_content, lyrics_txt_content, num_segments, max_new_tokens):
239
  logging.info(f"Selected model: {model_path}")
240
  logging.info(f"Lyrics analysis: {params}")
241
 
 
 
 
 
 
 
 
242
  # ์‹ค์ œ ์‚ฌ์šฉํ•  ํŒŒ๋ผ๋ฏธํ„ฐ
243
  actual_num_segments = config['num_segments']
244
  actual_max_tokens = config['max_tokens']
@@ -253,7 +283,7 @@ def infer(genre_txt_content, lyrics_txt_content, num_segments, max_new_tokens):
253
  os.makedirs(output_dir, exist_ok=True)
254
  empty_output_folder(output_dir)
255
 
256
- # ๊ธฐ๋ณธ ๋ช…๋ น์–ด ๊ตฌ์„ฑ (๋ถˆํ•„์š”ํ•œ ๋งค๊ฐœ๋ณ€์ˆ˜ ์ œ๊ฑฐ)
257
  command = [
258
  "python", "infer.py",
259
  "--stage1_model", model_path,
@@ -264,9 +294,16 @@ def infer(genre_txt_content, lyrics_txt_content, num_segments, max_new_tokens):
264
  "--stage2_batch_size", str(config['batch_size']),
265
  "--output_dir", output_dir,
266
  "--cuda_idx", "0",
267
- "--max_new_tokens", str(actual_max_tokens)
 
268
  ]
269
 
 
 
 
 
 
 
270
  # GPU๊ฐ€ ์žˆ๋Š” ๊ฒฝ์šฐ์—๋งŒ ์ถ”๊ฐ€ ์˜ต์…˜ ์ ์šฉ
271
  if torch.cuda.is_available():
272
  command.append("--disable_offload_model")
@@ -311,7 +348,11 @@ def infer(genre_txt_content, lyrics_txt_content, num_segments, max_new_tokens):
311
  # ๊ฒฐ๊ณผ ์ฒ˜๋ฆฌ
312
  last_mp3 = get_last_mp3_file(output_dir)
313
  if last_mp3:
 
314
  logging.info(f"Generated audio file: {last_mp3}")
 
 
 
315
  return last_mp3
316
  else:
317
  logging.warning("No output audio file generated")
@@ -334,7 +375,16 @@ def main():
334
  with gr.Blocks() as demo:
335
  with gr.Column():
336
  gr.Markdown("# YuE: Open Music Foundation Models for Full-Song Generation (Multi-Language Support)")
337
-
 
 
 
 
 
 
 
 
 
338
 
339
  with gr.Row():
340
  with gr.Column():
@@ -365,6 +415,9 @@ def main():
365
  value=4000,
366
  interactive=False
367
  )
 
 
 
368
  submit_btn = gr.Button("Generate Music", variant="primary")
369
  music_out = gr.Audio(label="Generated Audio")
370
 
@@ -385,7 +438,6 @@ Don't let this moment fade, hold me close tonight
385
  With you here beside me, everything's alright
386
  Can't imagine life alone, don't want to let you go
387
  Stay with me forever, let our love just flow
388
-
389
  """
390
  ],
391
  # ํ•œ๊ตญ์–ด ์˜ˆ์ œ
@@ -402,7 +454,6 @@ Stay with me forever, let our love just flow
402
  ๋‘๋ ค์›€์€ ์—†์–ด ๋„ˆ์™€ ํ•จ๊ป˜๋ผ๋ฉด
403
  ์˜์›ํžˆ ๊ณ„์†๋  ์šฐ๋ฆฌ์˜ ๋…ธ๋ž˜
404
  ์ด ์ˆœ๊ฐ„์„ ๊ธฐ์–ตํ•ด forever
405
-
406
  """
407
  ]
408
  ],
@@ -412,7 +463,24 @@ Stay with me forever, let our love just flow
412
  # ์‹œ์Šคํ…œ ์ดˆ๊ธฐํ™”
413
  initialize_system()
414
 
 
 
 
 
 
 
 
 
 
 
 
415
  # ์ด๋ฒคํŠธ ํ•ธ๋“ค๋Ÿฌ
 
 
 
 
 
 
416
  submit_btn.click(
417
  fn=infer,
418
  inputs=[genre_txt, lyrics_txt, num_segments, max_new_tokens],
 
62
  def calculate_generation_params(lyrics):
63
  sections, total_sections, total_lines, section_lines = analyze_lyrics(lyrics)
64
 
65
+ # ๊ธฐ๋ณธ ํ† ํฐ ์ˆ˜ ๊ณ„์‚ฐ (1ํ† ํฐ โ‰ˆ 0.02์ดˆ ๊ธฐ์ค€)
66
+ seconds_per_line = 3 # ํ•œ ์ค„๋‹น ํ‰๊ท  3์ดˆ
67
+ target_duration = 0 # ๋ชฉํ‘œ ๊ธธ์ด (์ดˆ)
 
 
68
 
69
+ # ๊ฐ ์„น์…˜๋ณ„ ์‹œ๊ฐ„ ๊ณ„์‚ฐ
70
+ for section_type in ['verse', 'chorus', 'bridge']:
71
+ lines = section_lines[section_type]
72
+ if section_type == 'chorus':
73
+ # ์ฝ”๋Ÿฌ์Šค๋Š” ๋” ๊ธด ์‹œ๊ฐ„ ํ• ๋‹น
74
+ target_duration += lines * seconds_per_line * 1.5
75
+ else:
76
+ target_duration += lines * seconds_per_line
77
 
78
+ # ํ† ํฐ ์ˆ˜ ๊ณ„์‚ฐ (1์ดˆ๋‹น ์•ฝ 50ํ† ํฐ)
79
+ tokens_per_second = 50
80
+ total_tokens = int(target_duration * tokens_per_second)
81
 
82
+ # ์„น์…˜ ๊ธฐ๋ฐ˜ ์„ธ๊ทธ๋จผํŠธ ์ˆ˜ ๊ณ„์‚ฐ
83
+ if target_duration > 180: # 3๋ถ„ ์ด์ƒ
84
+ num_segments = 4
85
+ elif target_duration > 120: # 2๋ถ„ ์ด์ƒ
86
+ num_segments = 3
87
+ else:
88
+ num_segments = 2
89
+
90
  # ํ† ํฐ ์ˆ˜ ์ œํ•œ
91
  max_tokens = min(32000, max(3000, total_tokens))
92
 
 
94
  'max_tokens': max_tokens,
95
  'num_segments': num_segments,
96
  'sections': sections,
97
+ 'section_lines': section_lines,
98
+ 'estimated_duration': target_duration
99
  }
100
 
101
+ def get_audio_duration(file_path):
102
+ try:
103
+ import librosa
104
+ duration = librosa.get_duration(path=file_path)
105
+ return duration
106
+ except Exception as e:
107
+ logging.error(f"Failed to get audio duration: {e}")
108
+ return None
109
+
110
  # ์–ธ์–ด ๊ฐ์ง€ ๋ฐ ๋ชจ๋ธ ์„ ํƒ ํ•จ์ˆ˜
111
  def detect_and_select_model(text):
112
  if re.search(r'[\u3131-\u318E\uAC00-\uD7A3]', text): # ํ•œ๊ธ€
 
262
  logging.info(f"Selected model: {model_path}")
263
  logging.info(f"Lyrics analysis: {params}")
264
 
265
+ # ์ฝ”๋Ÿฌ์Šค ์„น์…˜ ํ™•์ธ
266
+ has_chorus = params['sections']['chorus'] > 0
267
+ estimated_duration = params.get('estimated_duration', 60) # ๊ธฐ๋ณธ๊ฐ’ 60์ดˆ
268
+
269
+ logging.info(f"Estimated duration: {estimated_duration} seconds")
270
+ logging.info(f"Has chorus sections: {has_chorus}")
271
+
272
  # ์‹ค์ œ ์‚ฌ์šฉํ•  ํŒŒ๋ผ๋ฏธํ„ฐ
273
  actual_num_segments = config['num_segments']
274
  actual_max_tokens = config['max_tokens']
 
283
  os.makedirs(output_dir, exist_ok=True)
284
  empty_output_folder(output_dir)
285
 
286
+ # ๊ธฐ๋ณธ ๋ช…๋ น์–ด ๊ตฌ์„ฑ
287
  command = [
288
  "python", "infer.py",
289
  "--stage1_model", model_path,
 
294
  "--stage2_batch_size", str(config['batch_size']),
295
  "--output_dir", output_dir,
296
  "--cuda_idx", "0",
297
+ "--max_new_tokens", str(actual_max_tokens),
298
+ "--keep_intermediate"
299
  ]
300
 
301
+ if has_chorus:
302
+ command.extend([
303
+ "--segment_duration", str(int(estimated_duration / actual_num_segments)),
304
+ "--enhance_chorus"
305
+ ])
306
+
307
  # GPU๊ฐ€ ์žˆ๋Š” ๊ฒฝ์šฐ์—๋งŒ ์ถ”๊ฐ€ ์˜ต์…˜ ์ ์šฉ
308
  if torch.cuda.is_available():
309
  command.append("--disable_offload_model")
 
348
  # ๊ฒฐ๊ณผ ์ฒ˜๋ฆฌ
349
  last_mp3 = get_last_mp3_file(output_dir)
350
  if last_mp3:
351
+ duration = get_audio_duration(last_mp3)
352
  logging.info(f"Generated audio file: {last_mp3}")
353
+ if duration:
354
+ logging.info(f"Audio duration: {duration:.2f} seconds")
355
+ logging.info(f"Expected duration: {estimated_duration} seconds")
356
  return last_mp3
357
  else:
358
  logging.warning("No output audio file generated")
 
375
  with gr.Blocks() as demo:
376
  with gr.Column():
377
  gr.Markdown("# YuE: Open Music Foundation Models for Full-Song Generation (Multi-Language Support)")
378
+ gr.HTML("""
379
+ <div style="display:flex;column-gap:4px;">
380
+ <a href="https://github.com/multimodal-art-projection/YuE">
381
+ <img src='https://img.shields.io/badge/GitHub-Repo-blue'>
382
+ </a>
383
+ <a href="https://map-yue.github.io">
384
+ <img src='https://img.shields.io/badge/Project-Page-green'>
385
+ </a>
386
+ </div>
387
+ """)
388
 
389
  with gr.Row():
390
  with gr.Column():
 
415
  value=4000,
416
  interactive=False
417
  )
418
+ with gr.Row():
419
+ duration_info = gr.Label(label="Estimated Duration")
420
+ sections_info = gr.Label(label="Section Information")
421
  submit_btn = gr.Button("Generate Music", variant="primary")
422
  music_out = gr.Audio(label="Generated Audio")
423
 
 
438
  With you here beside me, everything's alright
439
  Can't imagine life alone, don't want to let you go
440
  Stay with me forever, let our love just flow
 
441
  """
442
  ],
443
  # ํ•œ๊ตญ์–ด ์˜ˆ์ œ
 
454
  ๋‘๋ ค์›€์€ ์—†์–ด ๋„ˆ์™€ ํ•จ๊ป˜๋ผ๋ฉด
455
  ์˜์›ํžˆ ๊ณ„์†๋  ์šฐ๋ฆฌ์˜ ๋…ธ๋ž˜
456
  ์ด ์ˆœ๊ฐ„์„ ๊ธฐ์–ตํ•ด forever
 
457
  """
458
  ]
459
  ],
 
463
  # ์‹œ์Šคํ…œ ์ดˆ๊ธฐํ™”
464
  initialize_system()
465
 
466
+ def update_info(lyrics):
467
+ if not lyrics:
468
+ return "No lyrics entered", "No sections detected"
469
+ params = calculate_generation_params(lyrics)
470
+ duration = params.get('estimated_duration', 0)
471
+ sections = params['sections']
472
+ return (
473
+ f"{duration:.1f} seconds",
474
+ f"Verses: {sections['verse']}, Chorus: {sections['chorus']}, Bridge: {sections['bridge']}"
475
+ )
476
+
477
  # ์ด๋ฒคํŠธ ํ•ธ๋“ค๋Ÿฌ
478
+ lyrics_txt.change(
479
+ fn=update_info,
480
+ inputs=[lyrics_txt],
481
+ outputs=[duration_info, sections_info]
482
+ )
483
+
484
  submit_btn.click(
485
  fn=infer,
486
  inputs=[genre_txt, lyrics_txt, num_segments, max_new_tokens],