avans06 commited on
Commit
44e9039
·
1 Parent(s): 2dbdd2e

feat(ui): Implement interactive preview for sound sources

Browse files

This commit introduces a live audio preview system, allowing users to instantly audition their selected SoundFont or 8-bit Synthesizer settings without running a full render. This significantly improves the sound design workflow.

Files changed (1) hide show
  1. app.py +148 -5
app.py CHANGED
@@ -536,6 +536,134 @@ def analyze_midi_velocity(midi_path):
536
  print("No notes found in this MIDI.")
537
 
538
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
539
  def scale_instrument_velocity(instrument, scale=0.8):
540
  for note in instrument.notes:
541
  note.velocity = max(1, min(127, int(note.velocity * scale)))
@@ -2471,11 +2599,20 @@ if __name__ == "__main__":
2471
  value=RENDER_TYPE_DESCRIPTIONS["Render as-is"], # Set initial value
2472
  elem_classes="description-box" # Optional: for CSS styling
2473
  )
2474
- # --- SoundFont Bank with 8-bit option ---
2475
- soundfont_bank = gr.Dropdown(
2476
- [SYNTH_8_BIT_LABEL] + list(soundfonts_dict.keys()),
2477
- label="SoundFont / Synthesizer",
2478
- value=list(soundfonts_dict.keys())[0] if soundfonts_dict else SYNTH_8_BIT_LABEL)
 
 
 
 
 
 
 
 
 
2479
  render_sample_rate = gr.Radio(
2480
  ["16000", "32000", "44100"],
2481
  label="Audio Sample Rate",
@@ -2787,6 +2924,12 @@ if __name__ == "__main__":
2787
  inputs=s8bit_echo_sustain,
2788
  outputs=echo_sustain_settings
2789
  )
 
 
 
 
 
 
2790
 
2791
  # Launch the Gradio app
2792
  app.queue().launch(inbrowser=True, debug=True)
 
536
  print("No notes found in this MIDI.")
537
 
538
 
539
+ def preview_sound_source(sound_source_name: str, *args):
540
+ """
541
+ Generates a short audio preview for either a selected SoundFont or the
542
+ 8-bit Synthesizer, using the Super Mario Bros. theme as a test melody.
543
+
544
+ This function acts as a router:
545
+ - If a SoundFont is selected, it uses FluidSynth.
546
+ - If the 8-bit Synthesizer is selected, it uses the internal `synthesize_8bit_style`
547
+ function, capturing the current UI settings for an accurate preview.
548
+
549
+ Args:
550
+ sound_source_name (str): The name of the SoundFont or the 8-bit synth label.
551
+ *args: Captures all current UI settings, which are passed to build an
552
+ AppParameters object for the 8-bit synth preview.
553
+
554
+ Returns:
555
+ A Gradio-compatible audio tuple (sample_rate, numpy_array).
556
+ """
557
+ srate = 44100 # Use a standard sample rate for all previews.
558
+
559
+ # 1. Create a MIDI object in memory.
560
+ preview_midi = pretty_midi.PrettyMIDI()
561
+
562
+ # Use a lead instrument. Program 81 (Lead 2, sawtooth) is a good, bright default.
563
+ instrument = pretty_midi.Instrument(program=81, is_drum=False, name="Preview Lead")
564
+
565
+ # 2. Define the melody: Super Mario Bros. theme intro
566
+ # - tempo: A brisk 200 BPM, so each 0.15s step is a 16th note.
567
+ # - notes: A list of tuples (pitch, duration_in_steps)
568
+ tempo = 200.0
569
+ time_per_step = 60.0 / tempo / 2 # 16th note duration at this tempo
570
+
571
+ # (Pitch, Duration in steps)
572
+ # MIDI Pitch 60 = C4 (Middle C)
573
+ melody_data = [
574
+ (76, 1), (76, 2), (76, 2), (72, 1), (76, 2), # E E E C E
575
+ (79, 4), (67, 4) # G G(low)
576
+ ]
577
+
578
+ current_time = 0.0
579
+ for pitch, duration_steps in melody_data:
580
+ start_time = current_time
581
+ end_time = start_time + (duration_steps * time_per_step)
582
+
583
+ # Add a tiny gap between notes to ensure they re-trigger clearly
584
+ note_end_time = end_time - 0.01
585
+
586
+ note = pretty_midi.Note(
587
+ velocity=120, # Use a high velocity for a bright, clear sound
588
+ pitch=pitch,
589
+ start=start_time,
590
+ end=note_end_time
591
+ )
592
+ instrument.notes.append(note)
593
+ current_time = end_time
594
+
595
+ preview_midi.instruments.append(instrument)
596
+
597
+ # --- ROUTING LOGIC: Decide which synthesizer to use ---
598
+
599
+ # CASE 1: 8-bit Synthesizer Preview
600
+ if sound_source_name == SYNTH_8_BIT_LABEL:
601
+ print("Generating preview for: 8-bit Synthesizer")
602
+ try:
603
+ # Create a temporary AppParameters object from the current UI settings
604
+ params = AppParameters(**dict(zip(ALL_PARAM_KEYS, args)))
605
+
606
+ # Use the internal synthesizer to render the preview MIDI
607
+ audio_waveform = synthesize_8bit_style(midi_data=preview_midi, fs=srate, params=params)
608
+
609
+ # Normalize and prepare for Gradio
610
+ peak_val = np.max(np.abs(audio_waveform))
611
+ if peak_val > 0:
612
+ audio_waveform /= peak_val
613
+
614
+ # The synth returns (channels, samples), Gradio needs (samples, channels)
615
+ audio_out = (audio_waveform.T * 32767).astype(np.int16)
616
+
617
+ print("8-bit preview generated successfully.")
618
+ return (srate, audio_out)
619
+
620
+ except Exception as e:
621
+ print(f"An error occurred during 8-bit preview generation: {e}")
622
+ return None
623
+
624
+ # CASE 2: SoundFont Preview
625
+ else:
626
+ soundfont_path = soundfonts_dict.get(sound_source_name)
627
+ if not soundfont_path or not os.path.exists(soundfont_path):
628
+ print(f"Preview failed: SoundFont file not found at '{soundfont_path}'")
629
+ raise gr.Error(f"Could not find the SoundFont file for '{sound_source_name}'.")
630
+
631
+ try:
632
+ print(f"Generating preview for: {sound_source_name}")
633
+ # Convert the in-memory MIDI object to a binary stream.
634
+ midi_io = io.BytesIO()
635
+ preview_midi.write(midi_io)
636
+ midi_data = midi_io.getvalue()
637
+
638
+ # Use the existing rendering function to generate the audio.
639
+ # Ensure the output is a tuple (sample_rate, numpy_array)
640
+ audio_out = midi_to_colab_audio(
641
+ midi_data,
642
+ soundfont_path=soundfont_path,
643
+ sample_rate=srate,
644
+ output_for_gradio=True
645
+ )
646
+
647
+ # Ensure the returned value is exactly what Gradio expects.
648
+ # The function `midi_to_colab_audio` should return a NumPy array.
649
+ # We must wrap it in a tuple with the sample rate.
650
+ if isinstance(audio_out, np.ndarray):
651
+ print("SoundFont preview generated successfully.")
652
+ return (srate, audio_out)
653
+ else:
654
+ # If the rendering function fails, it might return something else.
655
+ # We handle this to prevent the Gradio error.
656
+ print("Preview failed: Rendering function did not return valid audio data.")
657
+ return None
658
+
659
+ except Exception as e:
660
+ # Catch any other errors, including from FluidSynth, and report them.
661
+ print(f"An error occurred during SoundFont preview generation: {e}")
662
+ # It's better to return None than to crash the UI.
663
+ # The error will be visible in the console.
664
+ return None
665
+
666
+
667
  def scale_instrument_velocity(instrument, scale=0.8):
668
  for note in instrument.notes:
669
  note.velocity = max(1, min(127, int(note.velocity * scale)))
 
2599
  value=RENDER_TYPE_DESCRIPTIONS["Render as-is"], # Set initial value
2600
  elem_classes="description-box" # Optional: for CSS styling
2601
  )
2602
+ # --- SoundFont Bank with Preview Button ---
2603
+ with gr.Row(elem_id="soundfont_selector_row"):
2604
+ soundfont_bank = gr.Dropdown(
2605
+ [SYNTH_8_BIT_LABEL] + list(soundfonts_dict.keys()),
2606
+ label="SoundFont / Synthesizer",
2607
+ value=list(soundfonts_dict.keys())[0] if soundfonts_dict else SYNTH_8_BIT_LABEL,
2608
+ scale=4 # Give the dropdown more space
2609
+ )
2610
+ # The preview button, with a speaker icon for clarity.
2611
+ preview_sf_button = gr.Button("🔊 Preview", scale=1)
2612
+
2613
+ # This audio player is dedicated to playing the preview clips.
2614
+ # It's not interactive, as it's for output only.
2615
+ preview_sf_player = gr.Audio(label="SoundFont Preview", interactive=False, show_label=False)
2616
  render_sample_rate = gr.Radio(
2617
  ["16000", "32000", "44100"],
2618
  label="Audio Sample Rate",
 
2924
  inputs=s8bit_echo_sustain,
2925
  outputs=echo_sustain_settings
2926
  )
2927
+ # --- Event listener for the unified sound source preview button ---
2928
+ preview_sf_button.click(
2929
+ fn=preview_sound_source,
2930
+ inputs=[soundfont_bank] + all_settings_components,
2931
+ outputs=[preview_sf_player]
2932
+ )
2933
 
2934
  # Launch the Gradio app
2935
  app.queue().launch(inbrowser=True, debug=True)