feat(synth): Enhance 8-bit synthesizer with anti-aliasing and progress tracking
Browse filesThis commit introduces a major overhaul of the 8-bit synthesizer to significantly improve its audio quality and provide better user feedback during rendering.
The primary motivations were to reduce sonic harshness caused by digital aliasing and to offer real-time progress updates for a better user experience, especially on long tracks.
1. Advanced Anti-Aliasing and Audio Quality Engine:
Implemented a suite of optional, high-quality DSP features to produce a cleaner, less harsh sound. These are controlled by a new "Audio Quality & Anti-Aliasing" section in the UI.
**Additive Synthesis:** Added a high-quality (but CPU-intensive) band-limited waveform generator to eliminate aliasing at its source.
**Waveform Edge Smoothing:** A low-cost alternative that applies a configurable sub-millisecond smoothing filter to the edges of standard waveforms.
**Dynamic Harmonic Filtering:** A new frequency-dependent lowpass filter is applied to each note, intelligently removing excessive high-frequency harmonics while preserving the note's fundamental character.
**Improved Distortion and Noise:** The distortion algorithm is now a smoother soft-clipping function, and the noise generator includes a lowpass filter to soften its texture.
**Final Limiter:** A final gain stage with a soft limiter (`tanh`) is applied to prevent digital clipping and manage dynamics.
2. Note-Level Rendering Progress Bar:
The `synthesize_8bit_style` function has been refactored to provide detailed, real-time progress feedback.
The progress bar (`tqdm`) is now applied to the inner **note-processing loop** instead of the outer instrument loop.
This provides a smooth, granular progress update that accurately reflects the CPU workload, which is especially useful for MIDI files with thousands of notes but few instrument tracks.
The progress tracking system has been made robust to work seamlessly in both single-file and batch-processing modes.
@@ -166,12 +166,80 @@ class AppParameters:
|
|
166 |
s8bit_echo_rate_hz: float = 5.0
|
167 |
s8bit_echo_decay_factor: float = 0.6
|
168 |
s8bit_echo_trigger_threshold: float = 2.5
|
169 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
170 |
|
171 |
# =================================================================================================
|
172 |
# === Helper Functions ===
|
173 |
# =================================================================================================
|
174 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
def prepare_soundfonts():
|
176 |
"""
|
177 |
Ensures a default set of SoundFonts are downloaded, then scans the 'src/sf2'
|
@@ -258,7 +326,7 @@ def prepare_soundfonts():
|
|
258 |
# =================================================================================================
|
259 |
# === 8-bit Style Synthesizer (Stereo Enabled) ===
|
260 |
# =================================================================================================
|
261 |
-
def synthesize_8bit_style(*, midi_data: pretty_midi.PrettyMIDI, fs: int, params: AppParameters):
|
262 |
"""
|
263 |
Synthesizes an 8-bit style audio waveform from a PrettyMIDI object.
|
264 |
This function generates waveforms manually instead of using a synthesizer like FluidSynth.
|
@@ -266,6 +334,7 @@ def synthesize_8bit_style(*, midi_data: pretty_midi.PrettyMIDI, fs: int, params
|
|
266 |
Instruments are panned based on their order in the MIDI file.
|
267 |
Instrument 1 -> Left, Instrument 2 -> Right.
|
268 |
Now supports graded levels for smoothing and vibrato continuity.
|
|
|
269 |
"""
|
270 |
total_duration = midi_data.get_end_time()
|
271 |
# Initialize a stereo waveform buffer (2 channels: Left, Right)
|
@@ -277,7 +346,13 @@ def synthesize_8bit_style(*, midi_data: pretty_midi.PrettyMIDI, fs: int, params
|
|
277 |
osc_phase = {}
|
278 |
# Vibrato phase tracking
|
279 |
vibrato_phase = 0.0
|
|
|
|
|
|
|
280 |
|
|
|
|
|
|
|
281 |
for i, instrument in enumerate(midi_data.instruments):
|
282 |
# --- Panning Logic ---
|
283 |
# Default to center-panned mono
|
@@ -293,218 +368,274 @@ def synthesize_8bit_style(*, midi_data: pretty_midi.PrettyMIDI, fs: int, params
|
|
293 |
elif i == 1: # Right
|
294 |
pan_l, pan_r = 0.0, 1.0
|
295 |
# Other instruments remain centered
|
296 |
-
|
|
|
|
|
|
|
|
|
|
|
297 |
osc_phase[i] = 0.0 # Independent phase tracking for each instrument
|
298 |
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
num_samples = int(note_duration * fs)
|
303 |
-
if num_samples <= 0:
|
304 |
-
continue
|
305 |
-
|
306 |
-
t = np.arange(num_samples) / fs
|
307 |
-
|
308 |
-
# --- Graded Continuous Vibrato ---
|
309 |
-
# This now interpolates between a fully reset vibrato and a fully continuous one.
|
310 |
-
# Use accumulated phase to avoid vibrato reset per note
|
311 |
-
vib_phase_inc = 2 * np.pi * params.s8bit_vibrato_rate / fs
|
312 |
-
per_note_vib_phase = 2 * np.pi * params.s8bit_vibrato_rate * t
|
313 |
-
continuous_vib_phase = vibrato_phase + np.arange(num_samples) * vib_phase_inc
|
314 |
-
|
315 |
-
# Weighted average of the two phase types
|
316 |
-
final_vib_phase = (
|
317 |
-
per_note_vib_phase * (1 - params.s8bit_continuous_vibrato_level) +
|
318 |
-
continuous_vib_phase * params.s8bit_continuous_vibrato_level
|
319 |
-
)
|
320 |
-
vibrato_lfo = params.s8bit_vibrato_depth * np.sin(final_vib_phase)
|
321 |
-
|
322 |
-
# Update the global vibrato phase for the next note
|
323 |
-
if num_samples > 0:
|
324 |
-
vibrato_phase = (continuous_vib_phase[-1] + vib_phase_inc) % (2 * np.pi)
|
325 |
|
326 |
-
|
327 |
-
|
328 |
-
|
|
|
|
|
|
|
|
|
329 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
330 |
# --- Waveform Generation (Main Oscillator with phase continuity) ---
|
331 |
phase_inc = 2 * np.pi * (modulated_freq + vibrato_lfo) / fs
|
332 |
phase = osc_phase[i] + np.cumsum(phase_inc)
|
333 |
if num_samples > 0:
|
334 |
osc_phase[i] = phase[-1] % (2 * np.pi) # Store last phase
|
335 |
-
|
336 |
if params.s8bit_waveform_type == 'Square':
|
337 |
note_waveform = signal.square(phase, duty=params.s8bit_pulse_width)
|
338 |
elif params.s8bit_waveform_type == 'Sawtooth':
|
339 |
note_waveform = signal.sawtooth(phase)
|
340 |
-
else: # Triangle
|
341 |
note_waveform = signal.sawtooth(phase, width=0.5)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
342 |
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
# --- Noise & Distortion Simulation (White Noise) ---
|
358 |
-
if params.s8bit_noise_level > 0:
|
359 |
-
note_waveform += np.random.uniform(-1, 1, num_samples) * params.s8bit_noise_level
|
360 |
-
|
361 |
-
# --- Distortion (Wave Shaping) ---
|
362 |
-
if params.s8bit_distortion_level > 0:
|
363 |
# Using a tanh function for a smoother, "warmer" distortion
|
364 |
note_waveform = np.tanh(note_waveform * (1 + params.s8bit_distortion_level * 5))
|
365 |
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
-
|
394 |
-
# Apply the truncated decay curve.
|
395 |
-
envelope[attack_samples : attack_samples + num_samples_to_take] = ideal_decay_curve[:num_samples_to_take]
|
396 |
-
|
397 |
-
# --- Original Decay Logic (Fallback) ---
|
398 |
-
else:
|
399 |
-
decay_samples = min(int(params.s8bit_decay_time_s * fs), num_samples - attack_samples)
|
400 |
envelope[:attack_samples] = np.linspace(0, start_amp, attack_samples)
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
#
|
437 |
-
|
438 |
-
|
439 |
-
|
440 |
-
|
441 |
-
|
442 |
-
|
443 |
-
|
444 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
445 |
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
if
|
452 |
-
|
453 |
-
|
454 |
-
|
455 |
-
|
456 |
-
echo_amplitude = start_amp * params.s8bit_echo_decay_factor
|
457 |
-
|
458 |
-
# Start placing echoes after the first pluck has finished.
|
459 |
-
current_sample_offset = initial_pluck_samples
|
460 |
|
461 |
-
|
462 |
-
|
463 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
464 |
|
465 |
-
# Create a
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
-
|
472 |
-
|
473 |
-
|
474 |
-
|
475 |
-
|
476 |
-
|
477 |
-
|
478 |
-
|
479 |
-
|
480 |
-
|
481 |
-
|
482 |
-
|
483 |
-
elif params.s8bit_waveform_type == 'Sawtooth':
|
484 |
-
echo_waveform_segment = signal.sawtooth(phase_echo)
|
485 |
-
else: # Triangle
|
486 |
-
echo_waveform_segment = signal.sawtooth(phase_echo, width=0.5)
|
487 |
-
|
488 |
-
# Add the enveloped echo on top of the already-enveloped main waveform
|
489 |
-
note_waveform[current_sample_offset : current_sample_offset + echo_delay_samples] += echo_waveform_segment * echo_envelope
|
490 |
-
|
491 |
-
# Prepare for the next echo.
|
492 |
-
echo_amplitude *= params.s8bit_echo_decay_factor
|
493 |
-
|
494 |
-
current_sample_offset += echo_delay_samples
|
495 |
-
# --- END of Echo Sustain Logic ---
|
496 |
-
|
497 |
-
|
498 |
-
start_sample = int(note.start * fs)
|
499 |
-
end_sample = start_sample + num_samples
|
500 |
-
if end_sample > waveform.shape[1]:
|
501 |
-
end_sample = waveform.shape[1]
|
502 |
-
note_waveform = note_waveform[:end_sample-start_sample]
|
503 |
-
|
504 |
-
# Add the mono note waveform to the stereo buffer with panning
|
505 |
-
waveform[0, start_sample:end_sample] += note_waveform * pan_l
|
506 |
-
waveform[1, start_sample:end_sample] += note_waveform * pan_r
|
507 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
508 |
return waveform # Returns a (2, N) numpy array
|
509 |
|
510 |
|
@@ -922,7 +1053,7 @@ def TranscribeGeneralAudio(input_file, onset_threshold, frame_threshold, minimum
|
|
922 |
# === Stage 2: MIDI Transformation and Rendering Function ===
|
923 |
# =================================================================================================
|
924 |
|
925 |
-
def Render_MIDI(*, input_midi_path: str, params: AppParameters):
|
926 |
"""
|
927 |
Processes and renders a MIDI file according to user-defined settings.
|
928 |
Can render using SoundFonts or a custom 8-bit synthesizer.
|
@@ -1152,7 +1283,7 @@ def Render_MIDI(*, input_midi_path: str, params: AppParameters):
|
|
1152 |
midi_data_for_synth = pretty_midi.PrettyMIDI(midi_to_render_path)
|
1153 |
# Synthesize the waveform
|
1154 |
# --- Passing new FX parameters to the synthesis function ---
|
1155 |
-
audio = synthesize_8bit_style(midi_data=midi_data_for_synth, fs=srate, params=params)
|
1156 |
# Normalize and prepare for Gradio
|
1157 |
peak_val = np.max(np.abs(audio))
|
1158 |
if peak_val > 0:
|
@@ -1649,7 +1780,7 @@ def run_single_file_pipeline(input_file_path: str, timestamp: str, params: AppPa
|
|
1649 |
print(f"Proceeding to render MIDI file: {os.path.basename(midi_path_for_rendering)}")
|
1650 |
|
1651 |
# Call the rendering function, Pass dictionaries directly to Render_MIDI
|
1652 |
-
results_tuple = Render_MIDI(input_midi_path=midi_path_for_rendering, params=params)
|
1653 |
|
1654 |
# --- Final Audio Merging Logic ---
|
1655 |
stems_to_merge = []
|
@@ -1751,6 +1882,32 @@ def run_single_file_pipeline(input_file_path: str, timestamp: str, params: AppPa
|
|
1751 |
# === Gradio UI Wrappers ===
|
1752 |
# =================================================================================================
|
1753 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1754 |
# --- Thin wrapper for batch processing ---
|
1755 |
def batch_process_files(input_files, progress=gr.Progress(track_tqdm=True), *args):
|
1756 |
"""
|
@@ -1782,19 +1939,18 @@ def batch_process_files(input_files, progress=gr.Progress(track_tqdm=True), *arg
|
|
1782 |
input_path = file_obj.name
|
1783 |
filename = os.path.basename(input_path)
|
1784 |
|
1785 |
-
# ---
|
1786 |
-
#
|
1787 |
-
#
|
1788 |
-
|
1789 |
-
|
1790 |
-
|
1791 |
-
|
1792 |
-
|
1793 |
-
|
1794 |
-
progress(i / total_files, desc=f"Processing {os.path.basename(input_path)} ({i+1}/{total_files})")
|
1795 |
|
1796 |
# --- Pass the batch_timestamp to the pipeline ---
|
1797 |
-
results, _ = run_single_file_pipeline(input_path, batch_timestamp, copy.copy(params), progress=
|
1798 |
|
1799 |
if results:
|
1800 |
if results.get("final_audio_path"):
|
@@ -2558,7 +2714,13 @@ if __name__ == "__main__":
|
|
2558 |
transcribe_other_or_accompaniment = gr.Checkbox(label="Transcribe Accompaniment", value=True)
|
2559 |
|
2560 |
gr.Markdown("#### 3. Audio Merging Targets")
|
2561 |
-
gr.Markdown(
|
|
|
|
|
|
|
|
|
|
|
|
|
2562 |
with gr.Row():
|
2563 |
merge_vocals_to_render = gr.Checkbox(label="Merge Vocals", value=False)
|
2564 |
# These two will be hidden/shown dynamically
|
@@ -2792,6 +2954,7 @@ if __name__ == "__main__":
|
|
2792 |
label="FM Rate",
|
2793 |
info="Frequency Modulation speed. Low values create a slow 'wobble'. High values create fast modulation, resulting in bright, dissonant harmonics."
|
2794 |
)
|
|
|
2795 |
# This outer group ensures the checkbox and its settings are visually linked.
|
2796 |
with gr.Group():
|
2797 |
s8bit_echo_sustain = gr.Checkbox(
|
@@ -2816,6 +2979,39 @@ if __name__ == "__main__":
|
|
2816 |
label="Echo Trigger Threshold (x Decay Time)",
|
2817 |
info="Controls how long a note must be to trigger echoes. This value is a multiplier of the 'Decay Time'. Example: If 'Decay Time' is 0.1s and this threshold is set to 10.0, only notes longer than 1.0s (0.1 * 10.0) will produce echoes."
|
2818 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2819 |
|
2820 |
# Create a dictionary mapping key names to the actual Gradio components
|
2821 |
ui_component_map = locals()
|
@@ -2930,6 +3126,12 @@ if __name__ == "__main__":
|
|
2930 |
inputs=[soundfont_bank] + all_settings_components,
|
2931 |
outputs=[preview_sf_player]
|
2932 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
2933 |
|
2934 |
# Launch the Gradio app
|
2935 |
app.queue().launch(inbrowser=True, debug=True)
|
|
|
166 |
s8bit_echo_rate_hz: float = 5.0
|
167 |
s8bit_echo_decay_factor: float = 0.6
|
168 |
s8bit_echo_trigger_threshold: float = 2.5
|
169 |
+
|
170 |
+
# --- Anti-Aliasing & Quality Parameters ---
|
171 |
+
s8bit_enable_anti_aliasing: bool = True # Main toggle for all new quality features
|
172 |
+
s8bit_use_additive_synthesis: bool = False # High-quality but CPU-intensive waveform generation
|
173 |
+
s8bit_edge_smoothing_ms: float = 0.5 # Mild smoothing for standard waveforms (0 to disable)
|
174 |
+
s8bit_noise_lowpass_hz: float = 9000.0 # Lowpass filter frequency for noise
|
175 |
+
s8bit_harmonic_lowpass_factor: float = 12.0 # Multiplier for frequency-dependent lowpass filter
|
176 |
+
s8bit_final_gain: float = 0.8 # Final gain/limiter level to prevent clipping
|
177 |
|
178 |
# =================================================================================================
|
179 |
# === Helper Functions ===
|
180 |
# =================================================================================================
|
181 |
|
182 |
+
def one_pole_lowpass(x, cutoff_hz, fs):
|
183 |
+
"""Simple one-pole lowpass filter (causal), stable and cheap."""
|
184 |
+
if cutoff_hz <= 0 or cutoff_hz >= fs/2:
|
185 |
+
return x
|
186 |
+
dt = 1.0 / fs
|
187 |
+
rc = 1.0 / (2 * np.pi * cutoff_hz)
|
188 |
+
alpha = dt / (rc + dt)
|
189 |
+
y = np.empty_like(x)
|
190 |
+
y[0] = alpha * x[0]
|
191 |
+
for n in range(1, len(x)):
|
192 |
+
y[n] = y[n-1] + alpha * (x[n] - y[n-1])
|
193 |
+
return y
|
194 |
+
|
195 |
+
def smooth_square_or_saw(note_waveform, fs, smooth_ms=0.6):
|
196 |
+
"""Short triangular smoothing to soften sharp edges (simple anti-alias-ish)."""
|
197 |
+
if smooth_ms <= 0:
|
198 |
+
return note_waveform
|
199 |
+
kernel_len = max(1, int(fs * (smooth_ms/1000.0)))
|
200 |
+
# triangular kernel
|
201 |
+
k = np.convolve(np.ones(kernel_len), np.ones(kernel_len)) # triangle shape length=2*kernel_len-1
|
202 |
+
k = k / k.sum()
|
203 |
+
# pad and convolve
|
204 |
+
y = np.convolve(note_waveform, k, mode='same')
|
205 |
+
return y
|
206 |
+
|
207 |
+
def additive_bandlimited_waveform(wave_type, freq, t, fs, max_harmonics_cap=200):
|
208 |
+
"""
|
209 |
+
Simple additive band-limited generator:
|
210 |
+
- saw: sum_{n=1..N} sin(2π n f t)/n
|
211 |
+
- square: sum odd harmonics sin(2π n f t)/n
|
212 |
+
N chosen so n*f < fs/2.
|
213 |
+
This is heavier but yields much less aliasing.
|
214 |
+
"""
|
215 |
+
nyq = fs / 2.0
|
216 |
+
max_n = int(nyq // freq)
|
217 |
+
if max_n < 1:
|
218 |
+
return np.zeros_like(t)
|
219 |
+
max_n = min(max_n, max_harmonics_cap)
|
220 |
+
y = np.zeros_like(t)
|
221 |
+
if wave_type == 'Sawtooth':
|
222 |
+
# saw via Fourier series
|
223 |
+
for n in range(1, max_n + 1):
|
224 |
+
y += np.sin(2*np.pi * n * freq * t) / n
|
225 |
+
# normalization to [-1,1]
|
226 |
+
y = - (2/np.pi) * y
|
227 |
+
else: # square
|
228 |
+
n = 1
|
229 |
+
while n <= max_n:
|
230 |
+
y += np.sin(2*np.pi * n * freq * t) / n
|
231 |
+
n += 2
|
232 |
+
y = (4/np.pi) * y
|
233 |
+
# clip tiny numerical overshoot
|
234 |
+
y = np.clip(y, -1.0, 1.0)
|
235 |
+
return y
|
236 |
+
|
237 |
+
def safe_tanh_distortion(x, strength):
|
238 |
+
"""Milder soft clipping: scale then tanh, with adjustable drive."""
|
239 |
+
# make strength between 0..1 typical; map to drive factor
|
240 |
+
drive = 1.0 + strength * 4.0
|
241 |
+
return np.tanh(x * drive) / np.tanh(drive)
|
242 |
+
|
243 |
def prepare_soundfonts():
|
244 |
"""
|
245 |
Ensures a default set of SoundFonts are downloaded, then scans the 'src/sf2'
|
|
|
326 |
# =================================================================================================
|
327 |
# === 8-bit Style Synthesizer (Stereo Enabled) ===
|
328 |
# =================================================================================================
|
329 |
+
def synthesize_8bit_style(*, midi_data: pretty_midi.PrettyMIDI, fs: int, params: AppParameters, progress: gr.Progress = None):
|
330 |
"""
|
331 |
Synthesizes an 8-bit style audio waveform from a PrettyMIDI object.
|
332 |
This function generates waveforms manually instead of using a synthesizer like FluidSynth.
|
|
|
334 |
Instruments are panned based on their order in the MIDI file.
|
335 |
Instrument 1 -> Left, Instrument 2 -> Right.
|
336 |
Now supports graded levels for smoothing and vibrato continuity.
|
337 |
+
This enhanced version includes advanced anti-aliasing and quality features to produce a cleaner, less harsh sound.
|
338 |
"""
|
339 |
total_duration = midi_data.get_end_time()
|
340 |
# Initialize a stereo waveform buffer (2 channels: Left, Right)
|
|
|
346 |
osc_phase = {}
|
347 |
# Vibrato phase tracking
|
348 |
vibrato_phase = 0.0
|
349 |
+
|
350 |
+
# Retrieve anti-aliasing settings, using getattr for backward compatibility
|
351 |
+
use_aa = getattr(params, 's8bit_enable_anti_aliasing', False)
|
352 |
|
353 |
+
# --- Move progress tracking to the note level ---
|
354 |
+
# 1. First, collect all notes from all instruments into a single list.
|
355 |
+
all_notes_with_instrument_info = []
|
356 |
for i, instrument in enumerate(midi_data.instruments):
|
357 |
# --- Panning Logic ---
|
358 |
# Default to center-panned mono
|
|
|
368 |
elif i == 1: # Right
|
369 |
pan_l, pan_r = 0.0, 1.0
|
370 |
# Other instruments remain centered
|
371 |
+
|
372 |
+
# Store each note along with its parent instrument index and panning info
|
373 |
+
for note in instrument.notes:
|
374 |
+
all_notes_with_instrument_info.append({'note': note, 'instrument_index': i, 'pan_l': pan_l, 'pan_r': pan_r})
|
375 |
+
|
376 |
+
# Initialize oscillator phase for each instrument
|
377 |
osc_phase[i] = 0.0 # Independent phase tracking for each instrument
|
378 |
|
379 |
+
# 2. Create an iterable for the main note-processing loop.
|
380 |
+
notes_iterable = all_notes_with_instrument_info
|
381 |
+
total_notes = len(notes_iterable)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
382 |
|
383 |
+
# 3. Wrap this new iterable with tqdm if a progress object is available.
|
384 |
+
if progress and hasattr(progress, 'tqdm'):
|
385 |
+
notes_iterable = progress.tqdm(
|
386 |
+
notes_iterable,
|
387 |
+
desc="Synthesizing Notes...",
|
388 |
+
total=total_notes
|
389 |
+
)
|
390 |
|
391 |
+
# 4. The main loop now iterates over individual notes, not instruments.
|
392 |
+
for item in notes_iterable:
|
393 |
+
note = item['note']
|
394 |
+
i = item['instrument_index']
|
395 |
+
pan_l = item['pan_l']
|
396 |
+
pan_r = item['pan_r']
|
397 |
+
|
398 |
+
freq = pretty_midi.note_number_to_hz(note.pitch)
|
399 |
+
note_duration = note.end - note.start
|
400 |
+
num_samples = int(note_duration * fs)
|
401 |
+
if num_samples <= 0:
|
402 |
+
continue
|
403 |
+
|
404 |
+
t = np.arange(num_samples) / fs
|
405 |
+
|
406 |
+
# --- Graded Continuous Vibrato ---
|
407 |
+
# This now interpolates between a fully reset vibrato and a fully continuous one.
|
408 |
+
# Use accumulated phase to avoid vibrato reset per note
|
409 |
+
vib_phase_inc = 2 * np.pi * params.s8bit_vibrato_rate / fs
|
410 |
+
per_note_vib_phase = 2 * np.pi * params.s8bit_vibrato_rate * t
|
411 |
+
continuous_vib_phase = vibrato_phase + np.arange(num_samples) * vib_phase_inc
|
412 |
+
|
413 |
+
# Weighted average of the two phase types
|
414 |
+
final_vib_phase = (
|
415 |
+
per_note_vib_phase * (1 - params.s8bit_continuous_vibrato_level) +
|
416 |
+
continuous_vib_phase * params.s8bit_continuous_vibrato_level
|
417 |
+
)
|
418 |
+
vibrato_lfo = params.s8bit_vibrato_depth * np.sin(final_vib_phase)
|
419 |
+
|
420 |
+
# Update the global vibrato phase for the next note
|
421 |
+
if num_samples > 0:
|
422 |
+
vibrato_phase = (continuous_vib_phase[-1] + vib_phase_inc) % (2 * np.pi)
|
423 |
+
|
424 |
+
# --- Waveform Generation with FM ---
|
425 |
+
fm_lfo = params.s8bit_fm_modulation_depth * np.sin(2 * np.pi * params.s8bit_fm_modulation_rate * t)
|
426 |
+
modulated_freq = freq * (1 + fm_lfo)
|
427 |
+
|
428 |
+
# --- Waveform Generation (with Anti-Aliasing options) ---
|
429 |
+
use_additive = use_aa and getattr(params, 's8bit_use_additive_synthesis', False)
|
430 |
+
if use_additive and params.s8bit_waveform_type in ['Square', 'Sawtooth']:
|
431 |
+
note_waveform = additive_bandlimited_waveform(params.s8bit_waveform_type, freq, t, fs)
|
432 |
+
else:
|
433 |
# --- Waveform Generation (Main Oscillator with phase continuity) ---
|
434 |
phase_inc = 2 * np.pi * (modulated_freq + vibrato_lfo) / fs
|
435 |
phase = osc_phase[i] + np.cumsum(phase_inc)
|
436 |
if num_samples > 0:
|
437 |
osc_phase[i] = phase[-1] % (2 * np.pi) # Store last phase
|
438 |
+
|
439 |
if params.s8bit_waveform_type == 'Square':
|
440 |
note_waveform = signal.square(phase, duty=params.s8bit_pulse_width)
|
441 |
elif params.s8bit_waveform_type == 'Sawtooth':
|
442 |
note_waveform = signal.sawtooth(phase)
|
443 |
+
else: # Triangle (less prone to aliasing)
|
444 |
note_waveform = signal.sawtooth(phase, width=0.5)
|
445 |
+
|
446 |
+
if use_aa and params.s8bit_waveform_type in ['Square', 'Sawtooth']:
|
447 |
+
edge_smooth_ms = getattr(params, 's8bit_edge_smoothing_ms', 0.5)
|
448 |
+
note_waveform = smooth_square_or_saw(note_waveform, fs, smooth_ms=edge_smooth_ms)
|
449 |
+
|
450 |
+
# --- Bass Boost (Sub-Octave Oscillator) ---
|
451 |
+
if params.s8bit_bass_boost_level > 0:
|
452 |
+
bass_freq = freq / 2.0
|
453 |
+
# Only add bass if the frequency is reasonably audible
|
454 |
+
if bass_freq > 20:
|
455 |
+
# Bass uses a simple square wave, no vibrato, for stability
|
456 |
+
bass_phase_inc = 2 * np.pi * bass_freq / fs
|
457 |
+
bass_phase = np.cumsum(np.full(num_samples, bass_phase_inc))
|
458 |
+
bass_sub_waveform = signal.square(bass_phase, duty=0.5)
|
459 |
+
# Mix the main and bass waveforms.
|
460 |
+
# As bass level increases, slightly decrease main waveform volume to prevent clipping.
|
461 |
+
main_level = 1.0 - (0.5 * params.s8bit_bass_boost_level)
|
462 |
+
note_waveform = (note_waveform * main_level) + (bass_sub_waveform * params.s8bit_bass_boost_level)
|
463 |
|
464 |
+
# --- Noise & Distortion (Reordered and Improved) ---
|
465 |
+
if params.s8bit_noise_level > 0:
|
466 |
+
raw_noise = np.random.uniform(-1, 1, num_samples) * params.s8bit_noise_level
|
467 |
+
if use_aa:
|
468 |
+
noise_cutoff = getattr(params, 's8bit_noise_lowpass_hz', 9000.0)
|
469 |
+
raw_noise = one_pole_lowpass(raw_noise, cutoff_hz=noise_cutoff, fs=fs)
|
470 |
+
note_waveform += raw_noise
|
471 |
+
|
472 |
+
# --- Distortion (Wave Shaping) ---
|
473 |
+
if params.s8bit_distortion_level > 0:
|
474 |
+
if use_aa:
|
475 |
+
note_waveform = safe_tanh_distortion(note_waveform, params.s8bit_distortion_level)
|
476 |
+
else: # Original harsher distortion
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
477 |
# Using a tanh function for a smoother, "warmer" distortion
|
478 |
note_waveform = np.tanh(note_waveform * (1 + params.s8bit_distortion_level * 5))
|
479 |
|
480 |
+
# --- ADSR Envelope Generation (with improvements) ---
|
481 |
+
start_amp = note.velocity / 127.0
|
482 |
+
envelope = np.zeros(num_samples)
|
483 |
+
|
484 |
+
min_attack_s = 0.001 # 1 ms minimum attack to prevent clicks
|
485 |
+
if params.s8bit_envelope_type == 'Plucky (AD Envelope)':
|
486 |
+
attack_samples = max(int(min_attack_s * fs), min(int(0.005 * fs), num_samples))
|
487 |
+
|
488 |
+
# --- Adaptive Decay Logic ---
|
489 |
+
# This ensures short staccato notes have the same initial decay rate
|
490 |
+
# as long notes, fixing the perceived low volume issue.
|
491 |
+
if params.s8bit_adaptive_decay:
|
492 |
+
# 1. Calculate the "ideal" number of decay samples based on the user's setting.
|
493 |
+
ideal_decay_samples = int(params.s8bit_decay_time_s * fs)
|
494 |
+
if ideal_decay_samples <= 0:
|
495 |
+
ideal_decay_samples = 1 # Avoid division by zero.
|
496 |
+
|
497 |
+
# 2. Create the full, "ideal" decay curve from peak to zero.
|
498 |
+
ideal_decay_curve = np.linspace(start_amp, 0, ideal_decay_samples)
|
499 |
+
|
500 |
+
# 3. Determine how many decay samples can actually fit in this note's duration.
|
501 |
+
actual_decay_samples = num_samples - attack_samples
|
502 |
+
|
503 |
+
if actual_decay_samples > 0:
|
504 |
+
# 4. Take the initial part of the ideal curve, sized to fit the note.
|
505 |
+
num_samples_to_take = min(len(ideal_decay_curve), actual_decay_samples)
|
506 |
+
|
507 |
+
# Apply the attack portion.
|
|
|
|
|
|
|
|
|
|
|
|
|
508 |
envelope[:attack_samples] = np.linspace(0, start_amp, attack_samples)
|
509 |
+
# Apply the truncated decay curve.
|
510 |
+
envelope[attack_samples : attack_samples + num_samples_to_take] = ideal_decay_curve[:num_samples_to_take]
|
511 |
+
|
512 |
+
# --- Original Decay Logic (Fallback) ---
|
513 |
+
else:
|
514 |
+
decay_samples = min(int(params.s8bit_decay_time_s * fs), num_samples - attack_samples)
|
515 |
+
envelope[:attack_samples] = np.linspace(0, start_amp, attack_samples)
|
516 |
+
if decay_samples > 0:
|
517 |
+
envelope[attack_samples:attack_samples+decay_samples] = np.linspace(start_amp, 0, decay_samples)
|
518 |
+
|
519 |
+
else: # Sustained
|
520 |
+
envelope = np.linspace(start_amp, 0, num_samples)
|
521 |
+
if use_aa and num_samples > 20: # Add a tiny release fade to prevent clicks
|
522 |
+
release_samples = int(min(0.005*fs, num_samples // 10))
|
523 |
+
if release_samples > 0:
|
524 |
+
envelope[-release_samples:] *= np.linspace(1.0, 0.0, release_samples)
|
525 |
+
|
526 |
+
# --- Hybrid Note Smoothing (Proportional with an Absolute Cap) ---
|
527 |
+
# This improved logic calculates the fade duration as a percentage of the note's
|
528 |
+
# length but caps it at a fixed maximum duration. This provides the best of both worlds:
|
529 |
+
# it preserves volume on short notes while ensuring long notes have a crisp attack.
|
530 |
+
if params.s8bit_smooth_notes_level > 0 and num_samples > 10:
|
531 |
+
# 1. Define the maximum allowable fade time in seconds (e.g., 30ms).
|
532 |
+
# This prevents fades from becoming too long on sustained notes.
|
533 |
+
max_fade_duration_s = 0.03
|
534 |
+
|
535 |
+
# 2. Calculate the proportional fade length based on the note's duration.
|
536 |
+
# At level 1.0, this is 10% of the note's start and 10% of its end.
|
537 |
+
fade_percentage = 0.1 * params.s8bit_smooth_notes_level
|
538 |
+
proportional_fade_samples = int(num_samples * fade_percentage)
|
539 |
+
|
540 |
+
# 3. Calculate the absolute maximum fade length in samples.
|
541 |
+
absolute_max_fade_samples = int(fs * max_fade_duration_s)
|
542 |
+
|
543 |
+
# 4. The final fade_samples is the SMALLEST of the three constraints:
|
544 |
+
# a) The proportional length.
|
545 |
+
# b) The absolute maximum length.
|
546 |
+
# c) Half the note's total length (to prevent overlap).
|
547 |
+
fade_samples = min(proportional_fade_samples, absolute_max_fade_samples, num_samples // 2)
|
548 |
+
|
549 |
+
if fade_samples > 0:
|
550 |
+
# Apply a fade-in to the attack portion of the envelope.
|
551 |
+
envelope[:fade_samples] *= np.linspace(0.5, 1.0, fade_samples)
|
552 |
+
# Apply a fade-out to the tail portion of the envelope.
|
553 |
+
envelope[-fade_samples:] *= np.linspace(1.0, 0.0, fade_samples)
|
554 |
+
|
555 |
+
# Apply envelope to the (potentially combined) waveform
|
556 |
+
note_waveform *= envelope
|
557 |
+
|
558 |
+
# =========================================================================
|
559 |
+
# === Echo Sustain Logic for Long Plucky Notes (Now works correctly) ===
|
560 |
+
# =========================================================================
|
561 |
+
# This feature fills the silent tail of long notes with decaying echoes.
|
562 |
+
# It is applied only for Plucky envelopes and after the main envelope has been applied.
|
563 |
+
if params.s8bit_envelope_type == 'Plucky (AD Envelope)' and params.s8bit_echo_sustain and num_samples > 0:
|
564 |
+
|
565 |
+
# The duration of the initial pluck is determined by its decay time.
|
566 |
+
initial_pluck_duration_s = params.s8bit_decay_time_s
|
567 |
+
initial_pluck_samples = int(initial_pluck_duration_s * fs)
|
568 |
|
569 |
+
# Check if the note is long enough to even need echoes.
|
570 |
+
if num_samples > initial_pluck_samples * params.s8bit_echo_trigger_threshold: # Only trigger if there's significant empty space.
|
571 |
+
|
572 |
+
# Calculate the properties of the echoes.
|
573 |
+
echo_delay_samples = int(fs / params.s8bit_echo_rate_hz)
|
574 |
+
if echo_delay_samples > 0: # Prevent infinite loops
|
575 |
+
echo_amplitude = start_amp * params.s8bit_echo_decay_factor
|
576 |
+
|
577 |
+
# Start placing echoes after the first pluck has finished.
|
578 |
+
current_sample_offset = initial_pluck_samples
|
|
|
|
|
|
|
|
|
579 |
|
580 |
+
while current_sample_offset < num_samples:
|
581 |
+
# Ensure there's space for a new echo.
|
582 |
+
if current_sample_offset + echo_delay_samples <= num_samples:
|
583 |
+
|
584 |
+
# Create a very short, plucky envelope for the echo.
|
585 |
+
echo_attack_samples = min(int(0.002 * fs), echo_delay_samples) # 2ms attack
|
586 |
+
echo_decay_samples = echo_delay_samples - echo_attack_samples
|
587 |
+
|
588 |
+
if echo_decay_samples > 0:
|
589 |
+
# Create the small echo envelope shape.
|
590 |
+
echo_envelope = np.zeros(echo_delay_samples)
|
591 |
+
echo_envelope[:echo_attack_samples] = np.linspace(0, echo_amplitude, echo_attack_samples)
|
592 |
+
echo_envelope[echo_attack_samples:] = np.linspace(echo_amplitude, 0, echo_decay_samples)
|
593 |
|
594 |
+
# Create a temporary waveform for the echo and apply the envelope.
|
595 |
+
# It reuses the main note's frequency and oscillator phase.
|
596 |
+
# Re-calculating phase here is simpler than tracking, for additive synthesis
|
597 |
+
phase_inc_echo = 2 * np.pi * freq / fs
|
598 |
+
phase_echo = np.cumsum(np.full(echo_delay_samples, phase_inc_echo))
|
599 |
+
|
600 |
+
if params.s8bit_waveform_type == 'Square':
|
601 |
+
echo_waveform_segment = signal.square(phase_echo, duty=params.s8bit_pulse_width)
|
602 |
+
elif params.s8bit_waveform_type == 'Sawtooth':
|
603 |
+
echo_waveform_segment = signal.sawtooth(phase_echo)
|
604 |
+
else: # Triangle
|
605 |
+
echo_waveform_segment = signal.sawtooth(phase_echo, width=0.5)
|
606 |
+
|
607 |
+
# Add the enveloped echo on top of the already-enveloped main waveform
|
608 |
+
note_waveform[current_sample_offset : current_sample_offset + echo_delay_samples] += echo_waveform_segment * echo_envelope
|
609 |
+
|
610 |
+
# Prepare for the next echo.
|
611 |
+
echo_amplitude *= params.s8bit_echo_decay_factor
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
612 |
|
613 |
+
current_sample_offset += echo_delay_samples
|
614 |
+
# --- END of Echo Sustain Logic ---
|
615 |
+
|
616 |
+
# --- Final Processing Stage (Per-Note) ---
|
617 |
+
if use_aa:
|
618 |
+
# 1. Frequency-dependent lowpass filter
|
619 |
+
harm_limit = getattr(params, 's8bit_harmonic_lowpass_factor', 12.0)
|
620 |
+
cutoff = min(fs * 0.45, max(3000.0, freq * harm_limit))
|
621 |
+
note_waveform = one_pole_lowpass(note_waveform, cutoff_hz=cutoff, fs=fs)
|
622 |
+
|
623 |
+
# 2. Final Gain and Soft Limiter
|
624 |
+
final_gain = getattr(params, 's8bit_final_gain', 0.8)
|
625 |
+
note_waveform *= final_gain
|
626 |
+
note_waveform = np.tanh(note_waveform) # Soft clip/limit
|
627 |
+
|
628 |
+
# --- Add to main waveform buffer ---
|
629 |
+
start_sample = int(note.start * fs)
|
630 |
+
end_sample = start_sample + num_samples
|
631 |
+
if end_sample > waveform.shape[1]:
|
632 |
+
end_sample = waveform.shape[1]
|
633 |
+
note_waveform = note_waveform[:end_sample-start_sample]
|
634 |
+
|
635 |
+
# Add the mono note waveform to the stereo buffer with panning
|
636 |
+
waveform[0, start_sample:end_sample] += note_waveform * pan_l
|
637 |
+
waveform[1, start_sample:end_sample] += note_waveform * pan_r
|
638 |
+
|
639 |
return waveform # Returns a (2, N) numpy array
|
640 |
|
641 |
|
|
|
1053 |
# === Stage 2: MIDI Transformation and Rendering Function ===
|
1054 |
# =================================================================================================
|
1055 |
|
1056 |
+
def Render_MIDI(*, input_midi_path: str, params: AppParameters, progress: gr.Progress = None):
|
1057 |
"""
|
1058 |
Processes and renders a MIDI file according to user-defined settings.
|
1059 |
Can render using SoundFonts or a custom 8-bit synthesizer.
|
|
|
1283 |
midi_data_for_synth = pretty_midi.PrettyMIDI(midi_to_render_path)
|
1284 |
# Synthesize the waveform
|
1285 |
# --- Passing new FX parameters to the synthesis function ---
|
1286 |
+
audio = synthesize_8bit_style(midi_data=midi_data_for_synth, fs=srate, params=params, progress=progress)
|
1287 |
# Normalize and prepare for Gradio
|
1288 |
peak_val = np.max(np.abs(audio))
|
1289 |
if peak_val > 0:
|
|
|
1780 |
print(f"Proceeding to render MIDI file: {os.path.basename(midi_path_for_rendering)}")
|
1781 |
|
1782 |
# Call the rendering function, Pass dictionaries directly to Render_MIDI
|
1783 |
+
results_tuple = Render_MIDI(input_midi_path=midi_path_for_rendering, params=params, progress=progress)
|
1784 |
|
1785 |
# --- Final Audio Merging Logic ---
|
1786 |
stems_to_merge = []
|
|
|
1882 |
# === Gradio UI Wrappers ===
|
1883 |
# =================================================================================================
|
1884 |
|
1885 |
+
class BatchProgressTracker:
|
1886 |
+
"""
|
1887 |
+
A custom progress tracker for batch processing that can update a main
|
1888 |
+
progress bar and also create its own tqdm-style sub-progress bars.
|
1889 |
+
"""
|
1890 |
+
def __init__(self, main_progress: gr.Progress, total_files: int, current_file_index: int, filename: str):
|
1891 |
+
self._main_progress = main_progress
|
1892 |
+
self._total_files = total_files
|
1893 |
+
self._current_file_index = current_file_index
|
1894 |
+
self._filename = filename
|
1895 |
+
self._progress_per_file = 1 / total_files if total_files > 0 else 0
|
1896 |
+
|
1897 |
+
def __call__(self, local_fraction: float, desc: str = ""):
|
1898 |
+
"""Makes the object callable like a function for simple progress updates."""
|
1899 |
+
overall_fraction = (self._current_file_index / self._total_files) + (local_fraction * self._progress_per_file)
|
1900 |
+
full_desc = f"({self._current_file_index + 1}/{self._total_files}) {self._filename}: {desc}"
|
1901 |
+
# Update the main progress bar
|
1902 |
+
self._main_progress(overall_fraction, desc=full_desc)
|
1903 |
+
|
1904 |
+
def tqdm(self, iterable, desc="", total=None):
|
1905 |
+
"""Provides a tqdm method that delegates to the original gr.Progress object."""
|
1906 |
+
# The description for the sub-progress bar
|
1907 |
+
tqdm_desc = f"({self._current_file_index + 1}/{self._total_files}) {self._filename}: {desc}"
|
1908 |
+
# Use the original gr.Progress object to create the tqdm iterator
|
1909 |
+
return self._main_progress.tqdm(iterable, desc=tqdm_desc, total=total)
|
1910 |
+
|
1911 |
# --- Thin wrapper for batch processing ---
|
1912 |
def batch_process_files(input_files, progress=gr.Progress(track_tqdm=True), *args):
|
1913 |
"""
|
|
|
1939 |
input_path = file_obj.name
|
1940 |
filename = os.path.basename(input_path)
|
1941 |
|
1942 |
+
# --- Use the new BatchProgressTracker class ---
|
1943 |
+
# Instead of a simple function, create an instance of our tracker class.
|
1944 |
+
# This object can both update the main progress and has a .tqdm method.
|
1945 |
+
batch_progress_tracker = BatchProgressTracker(
|
1946 |
+
main_progress=progress,
|
1947 |
+
total_files=total_files,
|
1948 |
+
current_file_index=i,
|
1949 |
+
filename=filename
|
1950 |
+
)
|
|
|
1951 |
|
1952 |
# --- Pass the batch_timestamp to the pipeline ---
|
1953 |
+
results, _ = run_single_file_pipeline(input_path, batch_timestamp, copy.copy(params), progress=batch_progress_tracker)
|
1954 |
|
1955 |
if results:
|
1956 |
if results.get("final_audio_path"):
|
|
|
2714 |
transcribe_other_or_accompaniment = gr.Checkbox(label="Transcribe Accompaniment", value=True)
|
2715 |
|
2716 |
gr.Markdown("#### 3. Audio Merging Targets")
|
2717 |
+
gr.Markdown(
|
2718 |
+
"""
|
2719 |
+
_Select which **original, unprocessed** audio stems to merge back into the final output.
|
2720 |
+
This does **not** use the transcribed MIDI; it uses the raw audio from the initial separation.
|
2721 |
+
You can leave all boxes unchecked. This step only affects the final audio file, not the MIDI output._
|
2722 |
+
"""
|
2723 |
+
)
|
2724 |
with gr.Row():
|
2725 |
merge_vocals_to_render = gr.Checkbox(label="Merge Vocals", value=False)
|
2726 |
# These two will be hidden/shown dynamically
|
|
|
2954 |
label="FM Rate",
|
2955 |
info="Frequency Modulation speed. Low values create a slow 'wobble'. High values create fast modulation, resulting in bright, dissonant harmonics."
|
2956 |
)
|
2957 |
+
# --- Echo Sustain Feature Block (Visually Grouped) ---
|
2958 |
# This outer group ensures the checkbox and its settings are visually linked.
|
2959 |
with gr.Group():
|
2960 |
s8bit_echo_sustain = gr.Checkbox(
|
|
|
2979 |
label="Echo Trigger Threshold (x Decay Time)",
|
2980 |
info="Controls how long a note must be to trigger echoes. This value is a multiplier of the 'Decay Time'. Example: If 'Decay Time' is 0.1s and this threshold is set to 10.0, only notes longer than 1.0s (0.1 * 10.0) will produce echoes."
|
2981 |
)
|
2982 |
+
# --- NEW: Accordion for Anti-Aliasing and Quality Settings ---
|
2983 |
+
with gr.Accordion("Audio Quality & Anti-Aliasing (Advanced)", open=False):
|
2984 |
+
s8bit_enable_anti_aliasing = gr.Checkbox(
|
2985 |
+
value=True,
|
2986 |
+
label="Enable All Audio Quality Enhancements",
|
2987 |
+
info="Master toggle for all settings below. Disabling may slightly speed up rendering but can result in harsher, more aliased sound."
|
2988 |
+
)
|
2989 |
+
with gr.Group(visible=True) as anti_aliasing_settings_box:
|
2990 |
+
s8bit_use_additive_synthesis = gr.Checkbox(
|
2991 |
+
value=False,
|
2992 |
+
label="Use Additive Synthesis (High Quality, High CPU)",
|
2993 |
+
info="Generates band-limited waveforms to drastically reduce aliasing (harshness). Slower to render but produces a much cleaner sound. Note: The other anti-aliasing settings below will still apply even if this is disabled."
|
2994 |
+
)
|
2995 |
+
s8bit_edge_smoothing_ms = gr.Slider(
|
2996 |
+
0.0, 2.0, value=0.5, step=0.1,
|
2997 |
+
label="Waveform Edge Smoothing (ms)",
|
2998 |
+
info="Applies a tiny blur to the sharp edges of standard Square/Sawtooth waves to reduce aliasing. A cheap and effective alternative to Additive Synthesis."
|
2999 |
+
)
|
3000 |
+
s8bit_noise_lowpass_hz = gr.Slider(
|
3001 |
+
1000, 20000, value=9000, step=500,
|
3002 |
+
label="Noise Lowpass Filter (Hz)",
|
3003 |
+
info="Applies a lowpass filter to the white noise, making it sound softer and less harsh. Lower values produce a 'darker' noise."
|
3004 |
+
)
|
3005 |
+
s8bit_harmonic_lowpass_factor = gr.Slider(
|
3006 |
+
4.0, 32.0, value=12.0, step=0.5,
|
3007 |
+
label="Harmonic Lowpass Factor",
|
3008 |
+
info="Controls a dynamic lowpass filter. The cutoff frequency is (Note Frequency * this factor). Lower values create a darker, more muted sound."
|
3009 |
+
)
|
3010 |
+
s8bit_final_gain = gr.Slider(
|
3011 |
+
0.1, 1.5, value=0.8, step=0.05,
|
3012 |
+
label="Final Gain / Limiter Level",
|
3013 |
+
info="A final volume adjustment before adding the sound to the mix. Values > 1.0 can introduce soft clipping (distortion)."
|
3014 |
+
)
|
3015 |
|
3016 |
# Create a dictionary mapping key names to the actual Gradio components
|
3017 |
ui_component_map = locals()
|
|
|
3126 |
inputs=[soundfont_bank] + all_settings_components,
|
3127 |
outputs=[preview_sf_player]
|
3128 |
)
|
3129 |
+
# Event listener for the new Anti-Aliasing settings box
|
3130 |
+
s8bit_enable_anti_aliasing.change(
|
3131 |
+
fn=lambda x: gr.update(visible=x),
|
3132 |
+
inputs=s8bit_enable_anti_aliasing,
|
3133 |
+
outputs=anti_aliasing_settings_box
|
3134 |
+
)
|
3135 |
|
3136 |
# Launch the Gradio app
|
3137 |
app.queue().launch(inbrowser=True, debug=True)
|