feat(synth): Implement Echo Sustain for plucky envelopes
Browse filesAdds an "Echo Sustain" effect to the 8-bit synthesizer. This feature fills the silent tail of long notes with decaying pulses when using a plucky envelope, preventing them from sounding choppy or empty.
app.py
CHANGED
@@ -148,6 +148,10 @@ class AppParameters:
|
|
148 |
s8bit_fm_modulation_depth: float = 0.0
|
149 |
s8bit_fm_modulation_rate: float = 0.0
|
150 |
s8bit_adaptive_decay: bool = False
|
|
|
|
|
|
|
|
|
151 |
|
152 |
|
153 |
# =================================================================================================
|
@@ -382,7 +386,7 @@ def synthesize_8bit_style(*, midi_data: pretty_midi.PrettyMIDI, fs: int, params
|
|
382 |
envelope[:attack_samples] = np.linspace(0, start_amp, attack_samples)
|
383 |
if decay_samples > 0:
|
384 |
envelope[attack_samples:attack_samples+decay_samples] = np.linspace(start_amp, 0, decay_samples)
|
385 |
-
|
386 |
else: # Sustained
|
387 |
envelope = np.linspace(start_amp, 0, num_samples)
|
388 |
|
@@ -418,6 +422,65 @@ def synthesize_8bit_style(*, midi_data: pretty_midi.PrettyMIDI, fs: int, params
|
|
418 |
# Apply envelope to the (potentially combined) waveform
|
419 |
note_waveform *= envelope
|
420 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
421 |
start_sample = int(note.start * fs)
|
422 |
end_sample = start_sample + num_samples
|
423 |
if end_sample > waveform.shape[1]:
|
@@ -2290,13 +2353,15 @@ if __name__ == "__main__":
|
|
2290 |
with gr.Accordion("▶️ Configure Global Settings (for both Single File and Batch)", open=True):
|
2291 |
with gr.Row():
|
2292 |
with gr.Column(scale=1):
|
2293 |
-
gr.Markdown("### Transcription Settings"
|
|
|
|
|
2294 |
# --- Transcription Method Selector ---
|
2295 |
transcription_method = gr.Radio(["General Purpose", "Piano-Specific"], label="Audio Transcription Method", value="General Purpose",
|
2296 |
info="Choose 'General Purpose' for most music (vocals, etc.). Choose 'Piano-Specific' only for solo piano recordings.")
|
2297 |
# --- Stereo Processing Checkbox ---
|
2298 |
enable_stereo_processing = gr.Checkbox(label="Enable Stereo Transcription", value=False,
|
2299 |
-
info="
|
2300 |
|
2301 |
# --- Vocal Separation Checkboxes ---
|
2302 |
with gr.Group():
|
@@ -2504,7 +2569,7 @@ if __name__ == "__main__":
|
|
2504 |
info="Controls vibrato continuity across notes. Low values (0) reset vibrato on each note (bouncy). High values (1) create a smooth, connected 'singing' vibrato."
|
2505 |
)
|
2506 |
# --- New accordion for advanced effects ---
|
2507 |
-
with gr.Accordion("Advanced Synthesis & FX", open=
|
2508 |
s8bit_noise_level = gr.Slider(
|
2509 |
0.0, 1.0, value=0.0, step=0.05,
|
2510 |
label="Noise Level",
|
@@ -2525,6 +2590,30 @@ if __name__ == "__main__":
|
|
2525 |
label="FM Rate",
|
2526 |
info="Frequency Modulation speed. Low values create a slow 'wobble'. High values create fast modulation, resulting in bright, dissonant harmonics."
|
2527 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2528 |
|
2529 |
# Create a dictionary mapping key names to the actual Gradio components
|
2530 |
ui_component_map = locals()
|
@@ -2611,6 +2700,12 @@ if __name__ == "__main__":
|
|
2611 |
inputs=render_type,
|
2612 |
outputs=render_type_info
|
2613 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
2614 |
|
2615 |
# Launch the Gradio app
|
2616 |
app.queue().launch(inbrowser=True, debug=True)
|
|
|
148 |
s8bit_fm_modulation_depth: float = 0.0
|
149 |
s8bit_fm_modulation_rate: float = 0.0
|
150 |
s8bit_adaptive_decay: bool = False
|
151 |
+
s8bit_echo_sustain: bool = False
|
152 |
+
s8bit_echo_rate_hz: float = 5.0
|
153 |
+
s8bit_echo_decay_factor: float = 0.6
|
154 |
+
s8bit_echo_trigger_threshold: float = 2.5
|
155 |
|
156 |
|
157 |
# =================================================================================================
|
|
|
386 |
envelope[:attack_samples] = np.linspace(0, start_amp, attack_samples)
|
387 |
if decay_samples > 0:
|
388 |
envelope[attack_samples:attack_samples+decay_samples] = np.linspace(start_amp, 0, decay_samples)
|
389 |
+
|
390 |
else: # Sustained
|
391 |
envelope = np.linspace(start_amp, 0, num_samples)
|
392 |
|
|
|
422 |
# Apply envelope to the (potentially combined) waveform
|
423 |
note_waveform *= envelope
|
424 |
|
425 |
+
# =========================================================================
|
426 |
+
# === Echo Sustain Logic for Long Plucky Notes (Now works correctly) ===
|
427 |
+
# =========================================================================
|
428 |
+
# This feature fills the silent tail of long notes with decaying echoes.
|
429 |
+
# It is applied only for Plucky envelopes and after the main envelope has been applied.
|
430 |
+
if params.s8bit_envelope_type == 'Plucky (AD Envelope)' and params.s8bit_echo_sustain and num_samples > 0:
|
431 |
+
|
432 |
+
# The duration of the initial pluck is determined by its decay time.
|
433 |
+
initial_pluck_duration_s = params.s8bit_decay_time_s
|
434 |
+
initial_pluck_samples = int(initial_pluck_duration_s * fs)
|
435 |
+
|
436 |
+
# Check if the note is long enough to even need echoes.
|
437 |
+
if num_samples > initial_pluck_samples * params.s8bit_echo_trigger_threshold: # Only trigger if there's significant empty space.
|
438 |
+
|
439 |
+
# Calculate the properties of the echoes.
|
440 |
+
echo_delay_samples = int(fs / params.s8bit_echo_rate_hz)
|
441 |
+
if echo_delay_samples > 0: # Prevent infinite loops
|
442 |
+
echo_amplitude = start_amp * params.s8bit_echo_decay_factor
|
443 |
+
|
444 |
+
# Start placing echoes after the first pluck has finished.
|
445 |
+
current_sample_offset = initial_pluck_samples
|
446 |
+
|
447 |
+
while current_sample_offset < num_samples:
|
448 |
+
# Ensure there's space for a new echo.
|
449 |
+
if current_sample_offset + echo_delay_samples <= num_samples:
|
450 |
+
|
451 |
+
# Create a very short, plucky envelope for the echo.
|
452 |
+
echo_attack_samples = min(int(0.002 * fs), echo_delay_samples) # 2ms attack
|
453 |
+
echo_decay_samples = echo_delay_samples - echo_attack_samples
|
454 |
+
|
455 |
+
if echo_decay_samples > 0:
|
456 |
+
# Create the small echo envelope shape.
|
457 |
+
echo_envelope = np.zeros(echo_delay_samples)
|
458 |
+
echo_envelope[:echo_attack_samples] = np.linspace(0, echo_amplitude, echo_attack_samples)
|
459 |
+
echo_envelope[echo_attack_samples:] = np.linspace(echo_amplitude, 0, echo_decay_samples)
|
460 |
+
|
461 |
+
# Create a temporary waveform for the echo and apply the envelope.
|
462 |
+
# It reuses the main note's frequency and oscillator phase.
|
463 |
+
# Re-calculating phase here is simpler than tracking, for additive synthesis
|
464 |
+
phase_inc_echo = 2 * np.pi * freq / fs
|
465 |
+
phase_echo = np.cumsum(np.full(echo_delay_samples, phase_inc_echo))
|
466 |
+
|
467 |
+
if params.s8bit_waveform_type == 'Square':
|
468 |
+
echo_waveform_segment = signal.square(phase_echo, duty=params.s8bit_pulse_width)
|
469 |
+
elif params.s8bit_waveform_type == 'Sawtooth':
|
470 |
+
echo_waveform_segment = signal.sawtooth(phase_echo)
|
471 |
+
else: # Triangle
|
472 |
+
echo_waveform_segment = signal.sawtooth(phase_echo, width=0.5)
|
473 |
+
|
474 |
+
# Add the enveloped echo on top of the already-enveloped main waveform
|
475 |
+
note_waveform[current_sample_offset : current_sample_offset + echo_delay_samples] += echo_waveform_segment * echo_envelope
|
476 |
+
|
477 |
+
# Prepare for the next echo.
|
478 |
+
echo_amplitude *= params.s8bit_echo_decay_factor
|
479 |
+
|
480 |
+
current_sample_offset += echo_delay_samples
|
481 |
+
# --- END of Echo Sustain Logic ---
|
482 |
+
|
483 |
+
|
484 |
start_sample = int(note.start * fs)
|
485 |
end_sample = start_sample + num_samples
|
486 |
if end_sample > waveform.shape[1]:
|
|
|
2353 |
with gr.Accordion("▶️ Configure Global Settings (for both Single File and Batch)", open=True):
|
2354 |
with gr.Row():
|
2355 |
with gr.Column(scale=1):
|
2356 |
+
gr.Markdown("### Transcription Settings\n"
|
2357 |
+
"> _**Note:** This entire section is for audio-to-MIDI conversion. All settings here are ignored if a MIDI file is uploaded._"
|
2358 |
+
)
|
2359 |
# --- Transcription Method Selector ---
|
2360 |
transcription_method = gr.Radio(["General Purpose", "Piano-Specific"], label="Audio Transcription Method", value="General Purpose",
|
2361 |
info="Choose 'General Purpose' for most music (vocals, etc.). Choose 'Piano-Specific' only for solo piano recordings.")
|
2362 |
# --- Stereo Processing Checkbox ---
|
2363 |
enable_stereo_processing = gr.Checkbox(label="Enable Stereo Transcription", value=False,
|
2364 |
+
info="For stereo audio files only. When enabled, transcribes left and right channels independently, then merges them. Note: This will double the transcription time.")
|
2365 |
|
2366 |
# --- Vocal Separation Checkboxes ---
|
2367 |
with gr.Group():
|
|
|
2569 |
info="Controls vibrato continuity across notes. Low values (0) reset vibrato on each note (bouncy). High values (1) create a smooth, connected 'singing' vibrato."
|
2570 |
)
|
2571 |
# --- New accordion for advanced effects ---
|
2572 |
+
with gr.Accordion("Advanced Synthesis & FX", open=True):
|
2573 |
s8bit_noise_level = gr.Slider(
|
2574 |
0.0, 1.0, value=0.0, step=0.05,
|
2575 |
label="Noise Level",
|
|
|
2590 |
label="FM Rate",
|
2591 |
info="Frequency Modulation speed. Low values create a slow 'wobble'. High values create fast modulation, resulting in bright, dissonant harmonics."
|
2592 |
)
|
2593 |
+
# This outer group ensures the checkbox and its settings are visually linked.
|
2594 |
+
with gr.Group():
|
2595 |
+
s8bit_echo_sustain = gr.Checkbox(
|
2596 |
+
value=True, # Default to off as it's a special effect.
|
2597 |
+
label="Enable Echo Sustain for Long Notes",
|
2598 |
+
info="For 'Plucky' envelope only. Fills the silent tail of long, sustained notes with quiet, repeating pulses. Fixes 'choppy' sound on long piano notes."
|
2599 |
+
)
|
2600 |
+
# This inner group contains the sliders and is controlled by the checkbox above.
|
2601 |
+
with gr.Group(visible=True) as echo_sustain_settings:
|
2602 |
+
s8bit_echo_rate_hz = gr.Slider(
|
2603 |
+
1.0, 20.0, value=5.0, step=0.5,
|
2604 |
+
label="Echo Rate (Hz)",
|
2605 |
+
info="How many echoes (pulses) per second. Higher values create a faster, 'tremolo'-like effect."
|
2606 |
+
)
|
2607 |
+
s8bit_echo_decay_factor = gr.Slider(
|
2608 |
+
0.1, 0.95, value=0.45, step=0.05,
|
2609 |
+
label="Echo Decay Factor",
|
2610 |
+
info="How quickly the echoes fade. A value of 0.6 means each echo is 60% of the previous one's volume. Lower is faster."
|
2611 |
+
)
|
2612 |
+
s8bit_echo_trigger_threshold = gr.Slider(
|
2613 |
+
1.1, 30.0, value=20, step=0.1,
|
2614 |
+
label="Echo Trigger Threshold (x Decay Time)",
|
2615 |
+
info="Controls how long a note must be to trigger echoes. This value is a multiplier of the 'Decay Time'. Example: If 'Decay Time' is 0.1s and this threshold is set to 10.0, only notes longer than 1.0s (0.1 * 10.0) will produce echoes."
|
2616 |
+
)
|
2617 |
|
2618 |
# Create a dictionary mapping key names to the actual Gradio components
|
2619 |
ui_component_map = locals()
|
|
|
2700 |
inputs=render_type,
|
2701 |
outputs=render_type_info
|
2702 |
)
|
2703 |
+
# --- New event listener for the Echo Sustain UI ---
|
2704 |
+
s8bit_echo_sustain.change(
|
2705 |
+
fn=lambda x: gr.update(visible=x), # A simple lambda function to update visibility.
|
2706 |
+
inputs=s8bit_echo_sustain,
|
2707 |
+
outputs=echo_sustain_settings
|
2708 |
+
)
|
2709 |
|
2710 |
# Launch the Gradio app
|
2711 |
app.queue().launch(inbrowser=True, debug=True)
|