feat(ui): Implement interactive preview for sound sources
Browse filesThis commit introduces a live audio preview system, allowing users to instantly audition their selected SoundFont or 8-bit Synthesizer settings without running a full render. This significantly improves the sound design workflow.
app.py
CHANGED
@@ -536,6 +536,134 @@ def analyze_midi_velocity(midi_path):
|
|
536 |
print("No notes found in this MIDI.")
|
537 |
|
538 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
539 |
def scale_instrument_velocity(instrument, scale=0.8):
|
540 |
for note in instrument.notes:
|
541 |
note.velocity = max(1, min(127, int(note.velocity * scale)))
|
@@ -2471,11 +2599,20 @@ if __name__ == "__main__":
|
|
2471 |
value=RENDER_TYPE_DESCRIPTIONS["Render as-is"], # Set initial value
|
2472 |
elem_classes="description-box" # Optional: for CSS styling
|
2473 |
)
|
2474 |
-
# --- SoundFont Bank with
|
2475 |
-
|
2476 |
-
|
2477 |
-
|
2478 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2479 |
render_sample_rate = gr.Radio(
|
2480 |
["16000", "32000", "44100"],
|
2481 |
label="Audio Sample Rate",
|
@@ -2787,6 +2924,12 @@ if __name__ == "__main__":
|
|
2787 |
inputs=s8bit_echo_sustain,
|
2788 |
outputs=echo_sustain_settings
|
2789 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
2790 |
|
2791 |
# Launch the Gradio app
|
2792 |
app.queue().launch(inbrowser=True, debug=True)
|
|
|
536 |
print("No notes found in this MIDI.")
|
537 |
|
538 |
|
539 |
+
def preview_sound_source(sound_source_name: str, *args):
|
540 |
+
"""
|
541 |
+
Generates a short audio preview for either a selected SoundFont or the
|
542 |
+
8-bit Synthesizer, using the Super Mario Bros. theme as a test melody.
|
543 |
+
|
544 |
+
This function acts as a router:
|
545 |
+
- If a SoundFont is selected, it uses FluidSynth.
|
546 |
+
- If the 8-bit Synthesizer is selected, it uses the internal `synthesize_8bit_style`
|
547 |
+
function, capturing the current UI settings for an accurate preview.
|
548 |
+
|
549 |
+
Args:
|
550 |
+
sound_source_name (str): The name of the SoundFont or the 8-bit synth label.
|
551 |
+
*args: Captures all current UI settings, which are passed to build an
|
552 |
+
AppParameters object for the 8-bit synth preview.
|
553 |
+
|
554 |
+
Returns:
|
555 |
+
A Gradio-compatible audio tuple (sample_rate, numpy_array).
|
556 |
+
"""
|
557 |
+
srate = 44100 # Use a standard sample rate for all previews.
|
558 |
+
|
559 |
+
# 1. Create a MIDI object in memory.
|
560 |
+
preview_midi = pretty_midi.PrettyMIDI()
|
561 |
+
|
562 |
+
# Use a lead instrument. Program 81 (Lead 2, sawtooth) is a good, bright default.
|
563 |
+
instrument = pretty_midi.Instrument(program=81, is_drum=False, name="Preview Lead")
|
564 |
+
|
565 |
+
# 2. Define the melody: Super Mario Bros. theme intro
|
566 |
+
# - tempo: A brisk 200 BPM, so each 0.15s step is a 16th note.
|
567 |
+
# - notes: A list of tuples (pitch, duration_in_steps)
|
568 |
+
tempo = 200.0
|
569 |
+
time_per_step = 60.0 / tempo / 2 # 16th note duration at this tempo
|
570 |
+
|
571 |
+
# (Pitch, Duration in steps)
|
572 |
+
# MIDI Pitch 60 = C4 (Middle C)
|
573 |
+
melody_data = [
|
574 |
+
(76, 1), (76, 2), (76, 2), (72, 1), (76, 2), # E E E C E
|
575 |
+
(79, 4), (67, 4) # G G(low)
|
576 |
+
]
|
577 |
+
|
578 |
+
current_time = 0.0
|
579 |
+
for pitch, duration_steps in melody_data:
|
580 |
+
start_time = current_time
|
581 |
+
end_time = start_time + (duration_steps * time_per_step)
|
582 |
+
|
583 |
+
# Add a tiny gap between notes to ensure they re-trigger clearly
|
584 |
+
note_end_time = end_time - 0.01
|
585 |
+
|
586 |
+
note = pretty_midi.Note(
|
587 |
+
velocity=120, # Use a high velocity for a bright, clear sound
|
588 |
+
pitch=pitch,
|
589 |
+
start=start_time,
|
590 |
+
end=note_end_time
|
591 |
+
)
|
592 |
+
instrument.notes.append(note)
|
593 |
+
current_time = end_time
|
594 |
+
|
595 |
+
preview_midi.instruments.append(instrument)
|
596 |
+
|
597 |
+
# --- ROUTING LOGIC: Decide which synthesizer to use ---
|
598 |
+
|
599 |
+
# CASE 1: 8-bit Synthesizer Preview
|
600 |
+
if sound_source_name == SYNTH_8_BIT_LABEL:
|
601 |
+
print("Generating preview for: 8-bit Synthesizer")
|
602 |
+
try:
|
603 |
+
# Create a temporary AppParameters object from the current UI settings
|
604 |
+
params = AppParameters(**dict(zip(ALL_PARAM_KEYS, args)))
|
605 |
+
|
606 |
+
# Use the internal synthesizer to render the preview MIDI
|
607 |
+
audio_waveform = synthesize_8bit_style(midi_data=preview_midi, fs=srate, params=params)
|
608 |
+
|
609 |
+
# Normalize and prepare for Gradio
|
610 |
+
peak_val = np.max(np.abs(audio_waveform))
|
611 |
+
if peak_val > 0:
|
612 |
+
audio_waveform /= peak_val
|
613 |
+
|
614 |
+
# The synth returns (channels, samples), Gradio needs (samples, channels)
|
615 |
+
audio_out = (audio_waveform.T * 32767).astype(np.int16)
|
616 |
+
|
617 |
+
print("8-bit preview generated successfully.")
|
618 |
+
return (srate, audio_out)
|
619 |
+
|
620 |
+
except Exception as e:
|
621 |
+
print(f"An error occurred during 8-bit preview generation: {e}")
|
622 |
+
return None
|
623 |
+
|
624 |
+
# CASE 2: SoundFont Preview
|
625 |
+
else:
|
626 |
+
soundfont_path = soundfonts_dict.get(sound_source_name)
|
627 |
+
if not soundfont_path or not os.path.exists(soundfont_path):
|
628 |
+
print(f"Preview failed: SoundFont file not found at '{soundfont_path}'")
|
629 |
+
raise gr.Error(f"Could not find the SoundFont file for '{sound_source_name}'.")
|
630 |
+
|
631 |
+
try:
|
632 |
+
print(f"Generating preview for: {sound_source_name}")
|
633 |
+
# Convert the in-memory MIDI object to a binary stream.
|
634 |
+
midi_io = io.BytesIO()
|
635 |
+
preview_midi.write(midi_io)
|
636 |
+
midi_data = midi_io.getvalue()
|
637 |
+
|
638 |
+
# Use the existing rendering function to generate the audio.
|
639 |
+
# Ensure the output is a tuple (sample_rate, numpy_array)
|
640 |
+
audio_out = midi_to_colab_audio(
|
641 |
+
midi_data,
|
642 |
+
soundfont_path=soundfont_path,
|
643 |
+
sample_rate=srate,
|
644 |
+
output_for_gradio=True
|
645 |
+
)
|
646 |
+
|
647 |
+
# Ensure the returned value is exactly what Gradio expects.
|
648 |
+
# The function `midi_to_colab_audio` should return a NumPy array.
|
649 |
+
# We must wrap it in a tuple with the sample rate.
|
650 |
+
if isinstance(audio_out, np.ndarray):
|
651 |
+
print("SoundFont preview generated successfully.")
|
652 |
+
return (srate, audio_out)
|
653 |
+
else:
|
654 |
+
# If the rendering function fails, it might return something else.
|
655 |
+
# We handle this to prevent the Gradio error.
|
656 |
+
print("Preview failed: Rendering function did not return valid audio data.")
|
657 |
+
return None
|
658 |
+
|
659 |
+
except Exception as e:
|
660 |
+
# Catch any other errors, including from FluidSynth, and report them.
|
661 |
+
print(f"An error occurred during SoundFont preview generation: {e}")
|
662 |
+
# It's better to return None than to crash the UI.
|
663 |
+
# The error will be visible in the console.
|
664 |
+
return None
|
665 |
+
|
666 |
+
|
667 |
def scale_instrument_velocity(instrument, scale=0.8):
|
668 |
for note in instrument.notes:
|
669 |
note.velocity = max(1, min(127, int(note.velocity * scale)))
|
|
|
2599 |
value=RENDER_TYPE_DESCRIPTIONS["Render as-is"], # Set initial value
|
2600 |
elem_classes="description-box" # Optional: for CSS styling
|
2601 |
)
|
2602 |
+
# --- SoundFont Bank with Preview Button ---
|
2603 |
+
with gr.Row(elem_id="soundfont_selector_row"):
|
2604 |
+
soundfont_bank = gr.Dropdown(
|
2605 |
+
[SYNTH_8_BIT_LABEL] + list(soundfonts_dict.keys()),
|
2606 |
+
label="SoundFont / Synthesizer",
|
2607 |
+
value=list(soundfonts_dict.keys())[0] if soundfonts_dict else SYNTH_8_BIT_LABEL,
|
2608 |
+
scale=4 # Give the dropdown more space
|
2609 |
+
)
|
2610 |
+
# The preview button, with a speaker icon for clarity.
|
2611 |
+
preview_sf_button = gr.Button("🔊 Preview", scale=1)
|
2612 |
+
|
2613 |
+
# This audio player is dedicated to playing the preview clips.
|
2614 |
+
# It's not interactive, as it's for output only.
|
2615 |
+
preview_sf_player = gr.Audio(label="SoundFont Preview", interactive=False, show_label=False)
|
2616 |
render_sample_rate = gr.Radio(
|
2617 |
["16000", "32000", "44100"],
|
2618 |
label="Audio Sample Rate",
|
|
|
2924 |
inputs=s8bit_echo_sustain,
|
2925 |
outputs=echo_sustain_settings
|
2926 |
)
|
2927 |
+
# --- Event listener for the unified sound source preview button ---
|
2928 |
+
preview_sf_button.click(
|
2929 |
+
fn=preview_sound_source,
|
2930 |
+
inputs=[soundfont_bank] + all_settings_components,
|
2931 |
+
outputs=[preview_sf_player]
|
2932 |
+
)
|
2933 |
|
2934 |
# Launch the Gradio app
|
2935 |
app.queue().launch(inbrowser=True, debug=True)
|