Fix: Improve spectrogram rendering stability and accuracy
Browse filesReplaced manual time scaling with librosa.time_to_frames to prevent the visualization from freezing at the end of the video. Added a safety check for empty spectrograms to avoid crashes with very short audio clips.
app.py
CHANGED
@@ -505,8 +505,6 @@ def process_audio_to_video(*args, progress=gr.Progress(track_tqdm=True)):
|
|
505 |
|
506 |
text_clips.append(txt_clip)
|
507 |
|
508 |
-
|
509 |
-
|
510 |
N_FFT, HOP_LENGTH, N_BANDS = 2048, 512, 32
|
511 |
MIN_DB, MAX_DB = -80.0, 0.0
|
512 |
|
@@ -529,8 +527,22 @@ def process_audio_to_video(*args, progress=gr.Progress(track_tqdm=True)):
|
|
529 |
if not image_clips:
|
530 |
for i in range(1, 9):
|
531 |
y_pos = int(i * (HEIGHT / 9)); frame[y_pos-1:y_pos, :] = grid_rgb
|
532 |
-
|
533 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
534 |
bar_width = WIDTH / N_BANDS
|
535 |
for i in range(N_BANDS):
|
536 |
energy_db = S_mel_db[i, time_idx]
|
|
|
505 |
|
506 |
text_clips.append(txt_clip)
|
507 |
|
|
|
|
|
508 |
N_FFT, HOP_LENGTH, N_BANDS = 2048, 512, 32
|
509 |
MIN_DB, MAX_DB = -80.0, 0.0
|
510 |
|
|
|
527 |
if not image_clips:
|
528 |
for i in range(1, 9):
|
529 |
y_pos = int(i * (HEIGHT / 9)); frame[y_pos-1:y_pos, :] = grid_rgb
|
530 |
+
|
531 |
+
# 1. Safety Check: If the spectrogram has no time frames (e.g., from an extremely short audio file),
|
532 |
+
# return a blank frame immediately to prevent an IndexError.
|
533 |
+
if S_mel_db.shape[1] == 0:
|
534 |
+
return frame
|
535 |
+
|
536 |
+
# 2. Use librosa.time_to_frames to accurately convert the video time `t`
|
537 |
+
# into a spectrogram frame index. This is far more reliable than manual scaling
|
538 |
+
# and solves the problem of missing content on the rightmost side of the video.
|
539 |
+
time_idx = librosa.time_to_frames(t, sr=current_sr, hop_length=HOP_LENGTH)
|
540 |
+
|
541 |
+
# 3. Boundary Protection: Although time_to_frames is accurate, this extra `min`
|
542 |
+
# call acts as a safeguard to ensure the index never exceeds the array's
|
543 |
+
# maximum valid index, preventing any edge-case errors.
|
544 |
+
time_idx = min(time_idx, S_mel_db.shape[1] - 1)
|
545 |
+
|
546 |
bar_width = WIDTH / N_BANDS
|
547 |
for i in range(N_BANDS):
|
548 |
energy_db = S_mel_db[i, time_idx]
|