avans06 commited on
Commit
0c9293b
·
1 Parent(s): c6d7d8f

Fix: Improve spectrogram rendering stability and accuracy

Browse files

Replaced manual time scaling with librosa.time_to_frames to prevent the visualization from freezing at the end of the video. Added a safety check for empty spectrograms to avoid crashes with very short audio clips.

Files changed (1) hide show
  1. app.py +16 -4
app.py CHANGED
@@ -505,8 +505,6 @@ def process_audio_to_video(*args, progress=gr.Progress(track_tqdm=True)):
505
 
506
  text_clips.append(txt_clip)
507
 
508
-
509
-
510
  N_FFT, HOP_LENGTH, N_BANDS = 2048, 512, 32
511
  MIN_DB, MAX_DB = -80.0, 0.0
512
 
@@ -529,8 +527,22 @@ def process_audio_to_video(*args, progress=gr.Progress(track_tqdm=True)):
529
  if not image_clips:
530
  for i in range(1, 9):
531
  y_pos = int(i * (HEIGHT / 9)); frame[y_pos-1:y_pos, :] = grid_rgb
532
-
533
- time_idx = min(int((t / duration) * S_mel_db.shape[1]), S_mel_db.shape[1] - 1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
534
  bar_width = WIDTH / N_BANDS
535
  for i in range(N_BANDS):
536
  energy_db = S_mel_db[i, time_idx]
 
505
 
506
  text_clips.append(txt_clip)
507
 
 
 
508
  N_FFT, HOP_LENGTH, N_BANDS = 2048, 512, 32
509
  MIN_DB, MAX_DB = -80.0, 0.0
510
 
 
527
  if not image_clips:
528
  for i in range(1, 9):
529
  y_pos = int(i * (HEIGHT / 9)); frame[y_pos-1:y_pos, :] = grid_rgb
530
+
531
+ # 1. Safety Check: If the spectrogram has no time frames (e.g., from an extremely short audio file),
532
+ # return a blank frame immediately to prevent an IndexError.
533
+ if S_mel_db.shape[1] == 0:
534
+ return frame
535
+
536
+ # 2. Use librosa.time_to_frames to accurately convert the video time `t`
537
+ # into a spectrogram frame index. This is far more reliable than manual scaling
538
+ # and solves the problem of missing content on the rightmost side of the video.
539
+ time_idx = librosa.time_to_frames(t, sr=current_sr, hop_length=HOP_LENGTH)
540
+
541
+ # 3. Boundary Protection: Although time_to_frames is accurate, this extra `min`
542
+ # call acts as a safeguard to ensure the index never exceeds the array's
543
+ # maximum valid index, preventing any edge-case errors.
544
+ time_idx = min(time_idx, S_mel_db.shape[1] - 1)
545
+
546
  bar_width = WIDTH / N_BANDS
547
  for i in range(N_BANDS):
548
  energy_db = S_mel_db[i, time_idx]