OpenSUNO

Running

App Files Files Community

ginipick commited on Jan 29

Commit

c8a3a02

verified ·

1 Parent(s): 460b152

Update app.py

Browse files

Files changed (1) hide show

app.py +83 -15

app.py CHANGED Viewed

@@ -62,18 +62,31 @@ def analyze_lyrics(lyrics):
 def calculate_generation_params(lyrics):
     sections, total_sections, total_lines, section_lines = analyze_lyrics(lyrics)
-    # 기본 토큰 수 계산
-    base_tokens_per_line = 200
-    verse_tokens = section_lines['verse'] * base_tokens_per_line
-    chorus_tokens = section_lines['chorus'] * (base_tokens_per_line * 1.5)  # 코러스는 50% 더 많은 토큰
-    bridge_tokens = section_lines['bridge'] * base_tokens_per_line
-    # 총 토큰 수 계산
-    total_tokens = int(verse_tokens + chorus_tokens + bridge_tokens)
-    # 섹션 기반 세그먼트 수 계산
-    num_segments = max(2, min(4, total_sections))
     # 토큰 수 제한
     max_tokens = min(32000, max(3000, total_tokens))
@@ -81,9 +94,19 @@ def calculate_generation_params(lyrics):
         'max_tokens': max_tokens,
         'num_segments': num_segments,
         'sections': sections,
-        'section_lines': section_lines
     }
 # 언어 감지 및 모델 선택 함수
 def detect_and_select_model(text):
     if re.search(r'[\u3131-\u318E\uAC00-\uD7A3]', text):  # 한글
@@ -239,6 +262,13 @@ def infer(genre_txt_content, lyrics_txt_content, num_segments, max_new_tokens):
         logging.info(f"Selected model: {model_path}")
         logging.info(f"Lyrics analysis: {params}")
         # 실제 사용할 파라미터
         actual_num_segments = config['num_segments']
         actual_max_tokens = config['max_tokens']
@@ -253,7 +283,7 @@ def infer(genre_txt_content, lyrics_txt_content, num_segments, max_new_tokens):
         os.makedirs(output_dir, exist_ok=True)
         empty_output_folder(output_dir)
-        # 기본 명령어 구성 (불필요한 매개변수 제거)
         command = [
             "python", "infer.py",
             "--stage1_model", model_path,
@@ -264,9 +294,16 @@ def infer(genre_txt_content, lyrics_txt_content, num_segments, max_new_tokens):
             "--stage2_batch_size", str(config['batch_size']),
             "--output_dir", output_dir,
             "--cuda_idx", "0",
-            "--max_new_tokens", str(actual_max_tokens)
         ]
         # GPU가 있는 경우에만 추가 옵션 적용
         if torch.cuda.is_available():
             command.append("--disable_offload_model")
@@ -311,7 +348,11 @@ def infer(genre_txt_content, lyrics_txt_content, num_segments, max_new_tokens):
         # 결과 처리
         last_mp3 = get_last_mp3_file(output_dir)
         if last_mp3:
             logging.info(f"Generated audio file: {last_mp3}")
             return last_mp3
         else:
             logging.warning("No output audio file generated")
@@ -334,7 +375,16 @@ def main():
     with gr.Blocks() as demo:
         with gr.Column():
             gr.Markdown("# YuE: Open Music Foundation Models for Full-Song Generation (Multi-Language Support)")
             with gr.Row():
                 with gr.Column():
@@ -365,6 +415,9 @@ def main():
                         value=4000,
                         interactive=False
                     )
                     submit_btn = gr.Button("Generate Music", variant="primary")
                     music_out = gr.Audio(label="Generated Audio")
@@ -385,7 +438,6 @@ Don't let this moment fade, hold me close tonight
 With you here beside me, everything's alright
 Can't imagine life alone, don't want to let you go
 Stay with me forever, let our love just flow
                         """
                     ],
                     # 한국어 예제
@@ -402,7 +454,6 @@ Stay with me forever, let our love just flow
 두려움은 없어 너와 함께라면
 영원히 계속될 우리의 노래
 이 순간을 기억해 forever
                         """
                     ]
                 ],
@@ -412,7 +463,24 @@ Stay with me forever, let our love just flow
         # 시스템 초기화
         initialize_system()
         # 이벤트 핸들러
         submit_btn.click(
             fn=infer,
             inputs=[genre_txt, lyrics_txt, num_segments, max_new_tokens],

 def calculate_generation_params(lyrics):
     sections, total_sections, total_lines, section_lines = analyze_lyrics(lyrics)
+    # 기본 토큰 수 계산 (1토큰 ≈ 0.02초 기준)
+    seconds_per_line = 3  # 한 줄당 평균 3초
+    target_duration = 0  # 목표 길이 (초)
+    # 각 섹션별 시간 계산
+    for section_type in ['verse', 'chorus', 'bridge']:
+        lines = section_lines[section_type]
+        if section_type == 'chorus':
+            # 코러스는 더 긴 시간 할당
+            target_duration += lines * seconds_per_line * 1.5
+        else:
+            target_duration += lines * seconds_per_line
+    # 토큰 수 계산 (1초당 약 50토큰)
+    tokens_per_second = 50
+    total_tokens = int(target_duration * tokens_per_second)
+    # 섹션 기반 세그먼트 수 계산
+    if target_duration > 180:  # 3분 이상
+        num_segments = 4
+    elif target_duration > 120:  # 2분 이상
+        num_segments = 3
+    else:
+        num_segments = 2
     # 토큰 수 제한
     max_tokens = min(32000, max(3000, total_tokens))
         'max_tokens': max_tokens,
         'num_segments': num_segments,
         'sections': sections,
+        'section_lines': section_lines,
+        'estimated_duration': target_duration
     }
+def get_audio_duration(file_path):
+    try:
+        import librosa
+        duration = librosa.get_duration(path=file_path)
+        return duration
+    except Exception as e:
+        logging.error(f"Failed to get audio duration: {e}")
+        return None
 # 언어 감지 및 모델 선택 함수
 def detect_and_select_model(text):
     if re.search(r'[\u3131-\u318E\uAC00-\uD7A3]', text):  # 한글
         logging.info(f"Selected model: {model_path}")
         logging.info(f"Lyrics analysis: {params}")
+        # 코러스 섹션 확인
+        has_chorus = params['sections']['chorus'] > 0
+        estimated_duration = params.get('estimated_duration', 60)  # 기본값 60초
+        logging.info(f"Estimated duration: {estimated_duration} seconds")
+        logging.info(f"Has chorus sections: {has_chorus}")
         # 실제 사용할 파라미터
         actual_num_segments = config['num_segments']
         actual_max_tokens = config['max_tokens']
         os.makedirs(output_dir, exist_ok=True)
         empty_output_folder(output_dir)
+        # 기본 명령어 구성
         command = [
             "python", "infer.py",
             "--stage1_model", model_path,
             "--stage2_batch_size", str(config['batch_size']),
             "--output_dir", output_dir,
             "--cuda_idx", "0",
+            "--max_new_tokens", str(actual_max_tokens),
+            "--keep_intermediate"
         ]
+        if has_chorus:
+            command.extend([
+                "--segment_duration", str(int(estimated_duration / actual_num_segments)),
+                "--enhance_chorus"
+            ])
         # GPU가 있는 경우에만 추가 옵션 적용
         if torch.cuda.is_available():
             command.append("--disable_offload_model")
         # 결과 처리
         last_mp3 = get_last_mp3_file(output_dir)
         if last_mp3:
+            duration = get_audio_duration(last_mp3)
             logging.info(f"Generated audio file: {last_mp3}")
+            if duration:
+                logging.info(f"Audio duration: {duration:.2f} seconds")
+                logging.info(f"Expected duration: {estimated_duration} seconds")
             return last_mp3
         else:
             logging.warning("No output audio file generated")
     with gr.Blocks() as demo:
         with gr.Column():
             gr.Markdown("# YuE: Open Music Foundation Models for Full-Song Generation (Multi-Language Support)")
+            gr.HTML("""
+            <div style="display:flex;column-gap:4px;">
+                <a href="https://github.com/multimodal-art-projection/YuE">
+                    <img src='https://img.shields.io/badge/GitHub-Repo-blue'>
+                </a>
+                <a href="https://map-yue.github.io">
+                    <img src='https://img.shields.io/badge/Project-Page-green'>
+                </a>
+            </div>
+            """)
             with gr.Row():
                 with gr.Column():
                         value=4000,
                         interactive=False
                     )
+                    with gr.Row():
+                        duration_info = gr.Label(label="Estimated Duration")
+                        sections_info = gr.Label(label="Section Information")
                     submit_btn = gr.Button("Generate Music", variant="primary")
                     music_out = gr.Audio(label="Generated Audio")
 With you here beside me, everything's alright
 Can't imagine life alone, don't want to let you go
 Stay with me forever, let our love just flow
                         """
                     ],
                     # 한국어 예제
 두려움은 없어 너와 함께라면
 영원히 계속될 우리의 노래
 이 순간을 기억해 forever
                         """
                     ]
                 ],
         # 시스템 초기화
         initialize_system()
+        def update_info(lyrics):
+            if not lyrics:
+                return "No lyrics entered", "No sections detected"
+            params = calculate_generation_params(lyrics)
+            duration = params.get('estimated_duration', 0)
+            sections = params['sections']
+            return (
+                f"{duration:.1f} seconds",
+                f"Verses: {sections['verse']}, Chorus: {sections['chorus']}, Bridge: {sections['bridge']}"
+            )
         # 이벤트 핸들러
+        lyrics_txt.change(
+            fn=update_info,
+            inputs=[lyrics_txt],
+            outputs=[duration_info, sections_info]
+        )
         submit_btn.click(
             fn=infer,
             inputs=[genre_txt, lyrics_txt, num_segments, max_new_tokens],