OpenSUNO

Running

App Files Files Community

ginipick commited on Jan 29

Commit

dbad390

verified ·

1 Parent(s): bddc80f

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -44

app.py CHANGED Viewed

@@ -62,40 +62,43 @@ def analyze_lyrics(lyrics):
 def calculate_generation_params(lyrics):
     sections, total_sections, total_lines, section_lines = analyze_lyrics(lyrics)
-    # 기본 토큰 수 계산 (1토큰 ≈ 0.02초 기준)
-    seconds_per_line = 3  # 한 줄당 평균 3초
-    target_duration = 0  # 목표 길이 (초)
-    # 각 섹션별 시간 계산
-    for section_type in ['verse', 'chorus', 'bridge']:
-        lines = section_lines[section_type]
-        if section_type == 'chorus':
-            # 코러스는 더 긴 시간 할당
-            target_duration += lines * seconds_per_line * 1.5
-        else:
-            target_duration += lines * seconds_per_line
-    # 토큰 수 계산 (1초당 약 50토큰)
     tokens_per_second = 50
-    total_tokens = int(target_duration * tokens_per_second)
     # 섹션 기반 세그먼트 수 계산
-    if target_duration > 180:  # 3분 이상
         num_segments = 4
-    elif target_duration > 120:  # 2분 이상
         num_segments = 3
-    else:
         num_segments = 2
-    # 토큰 수 제한
-    max_tokens = min(32000, max(3000, total_tokens))
     return {
         'max_tokens': max_tokens,
         'num_segments': num_segments,
         'sections': sections,
         'section_lines': section_lines,
-        'estimated_duration': target_duration
     }
 def get_audio_duration(file_path):
@@ -122,30 +125,41 @@ def optimize_model_selection(lyrics, genre):
     model_path = detect_and_select_model(lyrics)
     params = calculate_generation_params(lyrics)
     model_config = {
         "m-a-p/YuE-s1-7B-anneal-en-cot": {
             "max_tokens": params['max_tokens'],
             "temperature": 0.8,
             "batch_size": 8,
             "num_segments": params['num_segments'],
-            "chorus_strength": 1.2 if params['sections']['chorus'] > 0 else 1.0
         },
         "m-a-p/YuE-s1-7B-anneal-jp-kr-cot": {
             "max_tokens": params['max_tokens'],
             "temperature": 0.7,
             "batch_size": 8,
             "num_segments": params['num_segments'],
-            "chorus_strength": 1.2 if params['sections']['chorus'] > 0 else 1.0
         },
         "m-a-p/YuE-s1-7B-anneal-zh-cot": {
             "max_tokens": params['max_tokens'],
             "temperature": 0.7,
             "batch_size": 8,
             "num_segments": params['num_segments'],
-            "chorus_strength": 1.2 if params['sections']['chorus'] > 0 else 1.0
         }
     }
     return model_path, model_config[model_path], params
 # GPU 설정 최적화
@@ -266,13 +280,16 @@ def infer(genre_txt_content, lyrics_txt_content, num_segments, max_new_tokens):
         has_chorus = params['sections']['chorus'] > 0
         estimated_duration = params.get('estimated_duration', 60)
         logging.info(f"Estimated duration: {estimated_duration} seconds")
         logging.info(f"Has chorus sections: {has_chorus}")
-        # 실제 사용할 파라미터
-        actual_num_segments = config['num_segments']
-        actual_max_tokens = config['max_tokens']
         logging.info(f"Using segments: {actual_num_segments}, tokens: {actual_max_tokens}")
         # 임시 파일 생성
@@ -283,7 +300,7 @@ def infer(genre_txt_content, lyrics_txt_content, num_segments, max_new_tokens):
         os.makedirs(output_dir, exist_ok=True)
         empty_output_folder(output_dir)
-        # 기본 명령어 구성 (지원되는 매개변수만 사용)
         command = [
             "python", "infer.py",
             "--stage1_model", model_path,
@@ -347,6 +364,10 @@ def infer(genre_txt_content, lyrics_txt_content, num_segments, max_new_tokens):
                 if duration:
                     logging.info(f"Audio duration: {duration:.2f} seconds")
                     logging.info(f"Expected duration: {estimated_duration} seconds")
             except Exception as e:
                 logging.warning(f"Failed to get audio duration: {e}")
             return last_mp3
@@ -370,17 +391,8 @@ def main():
     # Gradio 인터페이스
     with gr.Blocks() as demo:
         with gr.Column():
-            gr.Markdown("# YuE: Open Music Foundation Models for Full-Song Generation (Multi-Language Support)")
-            gr.HTML("""
-            <div style="display:flex;column-gap:4px;">
-                <a href="https://github.com/multimodal-art-projection/YuE">
-                    <img src='https://img.shields.io/badge/GitHub-Repo-blue'>
-                </a>
-                <a href="https://map-yue.github.io">
-                    <img src='https://img.shields.io/badge/Project-Page-green'>
-                </a>
-            </div>
-            """)
             with gr.Row():
                 with gr.Column():
@@ -458,18 +470,20 @@ Stay with me forever, let our love just flow
         # 시스템 초기화
         initialize_system()
         def update_info(lyrics):
             if not lyrics:
                 return "No lyrics entered", "No sections detected"
             params = calculate_generation_params(lyrics)
-            duration = params.get('estimated_duration', 0)
             sections = params['sections']
             return (
-                f"{duration:.1f} seconds",
-                f"Verses: {sections['verse']}, Chorus: {sections['chorus']}, Bridge: {sections['bridge']}"
             )
         # 이벤트 핸들러
         lyrics_txt.change(
             fn=update_info,

 def calculate_generation_params(lyrics):
     sections, total_sections, total_lines, section_lines = analyze_lyrics(lyrics)
+    # 기본 시간 계산 (초 단위)
+    time_per_line = {
+        'verse': 4,    # verse는 한 줄당 4초
+        'chorus': 6,   # chorus는 한 줄당 6초 (더 긴 시간 할당)
+        'bridge': 5    # bridge는 한 줄당 5초
+    }
+    # 각 섹션별 예상 시간 계산
+    total_duration = 0
+    for section_type, lines in section_lines.items():
+        total_duration += lines * time_per_line[section_type]
+    # 최소 지속 시간 보장 (60초)
+    total_duration = max(60, total_duration)
+    # 토큰 계산 (1초당 약 50토큰으로 계산)
     tokens_per_second = 50
+    total_tokens = int(total_duration * tokens_per_second)
     # 섹션 기반 세그먼트 수 계산
+    if total_duration > 180:    # 3분 이상
         num_segments = 4
+    elif total_duration > 120:  # 2분 이상
         num_segments = 3
+    else:                      # 2분 미만
         num_segments = 2
+    # 토큰 수 제한 (최소 6000토큰 보장)
+    max_tokens = min(32000, max(6000, total_tokens))
     return {
         'max_tokens': max_tokens,
         'num_segments': num_segments,
         'sections': sections,
         'section_lines': section_lines,
+        'estimated_duration': total_duration,
+        'tokens_per_segment': max_tokens // num_segments
     }
 def get_audio_duration(file_path):
     model_path = detect_and_select_model(lyrics)
     params = calculate_generation_params(lyrics)
+    # 코러스 존재 여부에 따른 설정 조정
+    has_chorus = params['sections']['chorus'] > 0
     model_config = {
         "m-a-p/YuE-s1-7B-anneal-en-cot": {
             "max_tokens": params['max_tokens'],
             "temperature": 0.8,
             "batch_size": 8,
             "num_segments": params['num_segments'],
+            "tokens_per_segment": params['tokens_per_segment'],
+            "estimated_duration": params['estimated_duration']
         },
         "m-a-p/YuE-s1-7B-anneal-jp-kr-cot": {
             "max_tokens": params['max_tokens'],
             "temperature": 0.7,
             "batch_size": 8,
             "num_segments": params['num_segments'],
+            "tokens_per_segment": params['tokens_per_segment'],
+            "estimated_duration": params['estimated_duration']
         },
         "m-a-p/YuE-s1-7B-anneal-zh-cot": {
             "max_tokens": params['max_tokens'],
             "temperature": 0.7,
             "batch_size": 8,
             "num_segments": params['num_segments'],
+            "tokens_per_segment": params['tokens_per_segment'],
+            "estimated_duration": params['estimated_duration']
         }
     }
+    # 코러스가 있는 경우 토큰 수 증가
+    if has_chorus:
+        for config in model_config.values():
+            config['max_tokens'] = int(config['max_tokens'] * 1.5)  # 50% 더 많은 토큰 할당
     return model_path, model_config[model_path], params
 # GPU 설정 최적화
         has_chorus = params['sections']['chorus'] > 0
         estimated_duration = params.get('estimated_duration', 60)
+        # 토큰 수 조정 (코러스가 있는 경우 더 많은 토큰 할당)
+        if has_chorus:
+            actual_max_tokens = int(config['max_tokens'] * 1.5)  # 50% 더 많은 토큰
+            actual_num_segments = max(3, config['num_segments'])  # 최소 3개 세그먼트 보장
+        else:
+            actual_max_tokens = config['max_tokens']
+            actual_num_segments = config['num_segments']
         logging.info(f"Estimated duration: {estimated_duration} seconds")
         logging.info(f"Has chorus sections: {has_chorus}")
         logging.info(f"Using segments: {actual_num_segments}, tokens: {actual_max_tokens}")
         # 임시 파일 생성
         os.makedirs(output_dir, exist_ok=True)
         empty_output_folder(output_dir)
+        # 기본 명령어 구성
         command = [
             "python", "infer.py",
             "--stage1_model", model_path,
                 if duration:
                     logging.info(f"Audio duration: {duration:.2f} seconds")
                     logging.info(f"Expected duration: {estimated_duration} seconds")
+                    # 생성된 음악이 너무 짧은 경우 경고
+                    if duration < estimated_duration * 0.8:  # 예상 길이의 80% 미만인 경우
+                        logging.warning(f"Generated audio is shorter than expected: {duration:.2f}s < {estimated_duration:.2f}s")
             except Exception as e:
                 logging.warning(f"Failed to get audio duration: {e}")
             return last_mp3
     # Gradio 인터페이스
     with gr.Blocks() as demo:
         with gr.Column():
+            gr.Markdown("# Open SUNI: Full-Song Generation (Multi-Language Support)")
             with gr.Row():
                 with gr.Column():
         # 시스템 초기화
         initialize_system()
         def update_info(lyrics):
             if not lyrics:
                 return "No lyrics entered", "No sections detected"
             params = calculate_generation_params(lyrics)
+            duration = params['estimated_duration']
             sections = params['sections']
             return (
+                f"Estimated duration: {duration:.1f} seconds",
+                f"Verses: {sections['verse']}, Chorus: {sections['chorus']} (Expected full length including chorus)"
             )
         # 이벤트 핸들러
         lyrics_txt.change(
             fn=update_info,