YuE-music-generator-demo-zero

Paused

App Files Files Community

KingNish commited on Feb 1

Commit

7d83b5a

verified ·

1 Parent(s): 06ded55

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -4

app.py CHANGED Viewed

@@ -158,6 +158,7 @@ def stage2_generate(model_stage2, prompt, batch_size=16):
     Given a prompt (a numpy array of raw codec ids), upsample using the Stage2 model.
     """
     # Unflatten prompt: assume prompt shape (1, T) and then reformat.
     codec_ids = codectool.unflatten(prompt, n_quantizer=1)
     codec_ids = codectool.offset_tok_ids(
         codec_ids,
@@ -238,10 +239,20 @@ def stage2_inference(model_stage2, stage1_output_set, stage2_output_dir, batch_s
             print(f"{output_filename} already processed.")
             stage2_result.append(output_filename)
             continue
         prompt = np.load(path).astype(np.int32)
-        # Only process multiples of 6 seconds; here 50 tokens per second.
         output_duration = (prompt.shape[-1] // 50) // 6 * 6
         num_batch = output_duration // 6
         if num_batch <= batch_size:
             output = stage2_generate(model_stage2, prompt[:, :output_duration*50], batch_size=num_batch)
         else:
@@ -251,16 +262,25 @@ def stage2_inference(model_stage2, stage1_output_set, stage2_output_dir, batch_s
                 start_idx = seg * batch_size * 300
                 end_idx = min((seg + 1) * batch_size * 300, output_duration * 50)
                 current_batch = batch_size if (seg != num_segments - 1 or num_batch % batch_size == 0) else num_batch % batch_size
-                segment = stage2_generate(model_stage2, prompt[:, start_idx:end_idx], batch_size=current_batch)
                 segments.append(segment)
             output = np.concatenate(segments, axis=0)
         # Process any remaining tokens if prompt length not fully used.
         if output_duration * 50 != prompt.shape[-1]:
             ending = stage2_generate(model_stage2, prompt[:, output_duration * 50:], batch_size=1)
             output = np.concatenate([output, ending], axis=0)
-        # Convert Stage2 output tokens back to numpy array using stage2’s codec manipulator.
         output = codectool_stage2.ids2npy(output)
-        # Fix any invalid codes (if needed)
         fixed_output = copy.deepcopy(output)
         for i, line in enumerate(output):
             for j, element in enumerate(line):
@@ -268,6 +288,7 @@ def stage2_inference(model_stage2, stage1_output_set, stage2_output_dir, batch_s
                     counter = Counter(line)
                     most_common = sorted(counter.items(), key=lambda x: x[1], reverse=True)[0][0]
                     fixed_output[i, j] = most_common
         np.save(output_filename, fixed_output)
         stage2_result.append(output_filename)
     return stage2_result

     Given a prompt (a numpy array of raw codec ids), upsample using the Stage2 model.
     """
     # Unflatten prompt: assume prompt shape (1, T) and then reformat.
+    print(f"stage2_generate: received prompt with shape: {prompt.shape}")
     codec_ids = codectool.unflatten(prompt, n_quantizer=1)
     codec_ids = codectool.offset_tok_ids(
         codec_ids,
             print(f"{output_filename} already processed.")
             stage2_result.append(output_filename)
             continue
         prompt = np.load(path).astype(np.int32)
+        # Ensure prompt is 2D.
+        if prompt.ndim == 1:
+            prompt = prompt[np.newaxis, :]
+        print(f"Loaded prompt from {path} with shape: {prompt.shape}")
+        # Compute output duration: tokens per second assumed to be 50, only full 6-second segments.
         output_duration = (prompt.shape[-1] // 50) // 6 * 6
+        if output_duration == 0:
+            raise ValueError(f"Output duration computed as 0 for {path}. Prompt length: {prompt.shape[-1]} tokens")
         num_batch = output_duration // 6
+        # Process prompt in batches
         if num_batch <= batch_size:
             output = stage2_generate(model_stage2, prompt[:, :output_duration*50], batch_size=num_batch)
         else:
                 start_idx = seg * batch_size * 300
                 end_idx = min((seg + 1) * batch_size * 300, output_duration * 50)
                 current_batch = batch_size if (seg != num_segments - 1 or num_batch % batch_size == 0) else num_batch % batch_size
+                segment_prompt = prompt[:, start_idx:end_idx]
+                if segment_prompt.shape[-1] == 0:
+                    print(f"Warning: empty segment detected for seg {seg}, start {start_idx}, end {end_idx}. Skipping this segment.")
+                    continue
+                segment = stage2_generate(model_stage2, segment_prompt, batch_size=current_batch)
                 segments.append(segment)
+            if len(segments) == 0:
+                raise ValueError(f"No valid segments produced for {path}.")
             output = np.concatenate(segments, axis=0)
         # Process any remaining tokens if prompt length not fully used.
         if output_duration * 50 != prompt.shape[-1]:
             ending = stage2_generate(model_stage2, prompt[:, output_duration * 50:], batch_size=1)
             output = np.concatenate([output, ending], axis=0)
+        # Convert Stage2 output tokens back to numpy using Stage2’s codec manipulator.
         output = codectool_stage2.ids2npy(output)
+        # Fix any invalid codes
         fixed_output = copy.deepcopy(output)
         for i, line in enumerate(output):
             for j, element in enumerate(line):
                     counter = Counter(line)
                     most_common = sorted(counter.items(), key=lambda x: x[1], reverse=True)[0][0]
                     fixed_output[i, j] = most_common
         np.save(output_filename, fixed_output)
         stage2_result.append(output_filename)
     return stage2_result