feat(synth): Add adjustable bass boost to 8-bit synthesizer
Browse filesThis commit introduces a bass enhancement feature to the 8-bit synthesizer, allowing users to add more weight and depth to the sound.
app.py
CHANGED
@@ -3,14 +3,14 @@
|
|
3 |
# Merged and Integrated Script for Audio/MIDI Processing and Rendering
|
4 |
#
|
5 |
# This script combines two functionalities:
|
6 |
-
# 1. Transcribing audio
|
7 |
# a) A general-purpose model (basic-pitch by Spotify).
|
8 |
# b) A model specialized for solo piano (ByteDance).
|
9 |
# 2. Applying advanced transformations and re-rendering MIDI files using:
|
10 |
# a) Standard SoundFonts via FluidSynth.
|
11 |
# b) A custom 8-bit style synthesizer for a chiptune sound.
|
12 |
#
|
13 |
-
# The user can upload a WAV, MP3, or MIDI file.
|
14 |
# - If an audio file is uploaded, it is first transcribed to MIDI using the selected method.
|
15 |
# - The resulting MIDI (or an uploaded MIDI) can then be processed
|
16 |
# with various effects and rendered into audio.
|
@@ -160,10 +160,11 @@ def prepare_soundfonts():
|
|
160 |
# =================================================================================================
|
161 |
# === 8-bit Style Synthesizer ===
|
162 |
# =================================================================================================
|
163 |
-
def synthesize_8bit_style(midi_data, waveform_type, envelope_type, decay_time_s, pulse_width, vibrato_rate, vibrato_depth, fs=44100):
|
164 |
"""
|
165 |
Synthesizes an 8-bit style audio waveform from a PrettyMIDI object.
|
166 |
This function generates waveforms manually instead of using a synthesizer like FluidSynth.
|
|
|
167 |
"""
|
168 |
total_duration = midi_data.get_end_time()
|
169 |
waveform = np.zeros(int(total_duration * fs) + fs)
|
@@ -181,7 +182,7 @@ def synthesize_8bit_style(midi_data, waveform_type, envelope_type, decay_time_s,
|
|
181 |
# --- Vibrato LFO ---
|
182 |
vibrato_lfo = vibrato_depth * np.sin(2 * np.pi * vibrato_rate * t)
|
183 |
|
184 |
-
# --- Waveform Generation ---
|
185 |
if waveform_type == 'Square':
|
186 |
note_waveform = signal.square(2 * np.pi * (freq + vibrato_lfo) * t, duty=pulse_width)
|
187 |
elif waveform_type == 'Sawtooth':
|
@@ -189,6 +190,18 @@ def synthesize_8bit_style(midi_data, waveform_type, envelope_type, decay_time_s,
|
|
189 |
elif waveform_type == 'Triangle':
|
190 |
note_waveform = signal.sawtooth(2 * np.pi * (freq + vibrato_lfo) * t, width=0.5)
|
191 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
192 |
# --- ADSR Envelope ---
|
193 |
start_amp = note.velocity / 127.0
|
194 |
envelope = np.zeros(num_samples)
|
@@ -204,6 +217,7 @@ def synthesize_8bit_style(midi_data, waveform_type, envelope_type, decay_time_s,
|
|
204 |
elif envelope_type == 'Sustained (Full Decay)' and num_samples > 0:
|
205 |
envelope = np.linspace(start_amp, 0, num_samples)
|
206 |
|
|
|
207 |
note_waveform *= envelope
|
208 |
|
209 |
start_sample = int(note.start * fs)
|
@@ -276,7 +290,7 @@ def TranscribePianoAudio(input_file):
|
|
276 |
|
277 |
def TranscribeGeneralAudio(input_file, onset_thresh, frame_thresh, min_note_len, min_freq, max_freq, infer_onsets_bool, melodia_trick_bool, multiple_bends_bool):
|
278 |
"""
|
279 |
-
Transcribes a general audio file
|
280 |
This is suitable for various instruments and vocals.
|
281 |
"""
|
282 |
print('=' * 70)
|
@@ -332,7 +346,8 @@ def Render_MIDI(input_midi_path,
|
|
332 |
render_remove_drums,
|
333 |
# --- 8-bit synth params ---
|
334 |
s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
|
335 |
-
s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth
|
|
|
336 |
):
|
337 |
"""
|
338 |
Processes and renders a MIDI file according to user-defined settings.
|
@@ -555,6 +570,7 @@ def Render_MIDI(input_midi_path,
|
|
555 |
midi_data_for_synth,
|
556 |
s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
|
557 |
s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth,
|
|
|
558 |
fs=srate
|
559 |
)
|
560 |
# Normalize audio
|
@@ -619,7 +635,8 @@ def process_and_render_file(input_file,
|
|
619 |
render_transpose_value, render_transpose_to_C4, render_output_as_solo_piano, render_remove_drums,
|
620 |
# --- 8-bit synth params ---
|
621 |
s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
|
622 |
-
s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth
|
|
|
623 |
):
|
624 |
"""
|
625 |
Main function to handle file processing. It determines the file type and calls the
|
@@ -662,7 +679,7 @@ def process_and_render_file(input_file,
|
|
662 |
render_with_sustains, merge_misaligned_notes, custom_render_patch, render_align,
|
663 |
render_transpose_value, render_transpose_to_C4, render_output_as_solo_piano, render_remove_drums,
|
664 |
s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
|
665 |
-
s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth)
|
666 |
|
667 |
print(f'Total processing time: {(reqtime.time() - start_time):.2f} sec')
|
668 |
print('*' * 70)
|
@@ -705,7 +722,7 @@ if __name__ == "__main__":
|
|
705 |
with app:
|
706 |
gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>Audio-to-MIDI & Advanced Renderer</h1>")
|
707 |
gr.Markdown(
|
708 |
-
"**Upload a
|
709 |
"This application combines piano audio transcription with a powerful MIDI transformation and rendering toolkit. "
|
710 |
"Based on the work of [asigalov61](https://github.com/asigalov61)."
|
711 |
)
|
@@ -720,7 +737,7 @@ if __name__ == "__main__":
|
|
720 |
# type="filepath" ensures the component returns a string path to the uploaded file.
|
721 |
# The component will show a player for supported audio types (e.g., WAV, MP3).
|
722 |
input_file = gr.Audio(
|
723 |
-
label="Input Audio
|
724 |
type="filepath",
|
725 |
sources=["upload"], waveform_options=waveform_options
|
726 |
)
|
@@ -780,7 +797,8 @@ if __name__ == "__main__":
|
|
780 |
s8bit_pulse_width = gr.Slider(0.01, 0.99, value=0.5, step=0.01, label="Pulse Width")
|
781 |
s8bit_vibrato_rate = gr.Slider(0, 20, value=5, label="Vibrato Rate (Hz)")
|
782 |
s8bit_vibrato_depth = gr.Slider(0, 50, value=0, label="Vibrato Depth (Hz)")
|
783 |
-
|
|
|
784 |
# --- Original Advanced Options (Now tied to Piano-Specific) ---
|
785 |
with gr.Accordion("Advanced MIDI Rendering Options", open=False) as advanced_rendering_options:
|
786 |
render_with_sustains = gr.Checkbox(label="Apply sustain pedal effects (if present)", value=True)
|
@@ -789,7 +807,7 @@ if __name__ == "__main__":
|
|
789 |
render_transpose_to_C4 = gr.Checkbox(label="Transpose entire score to center around C4", value=False)
|
790 |
render_transpose_value = gr.Slider(-12, 12, value=0, step=1, label="Transpose (semitones)")
|
791 |
custom_render_patch = gr.Slider(-1, 127, value=-1, step=1, label="Force MIDI Patch (-1 to disable)")
|
792 |
-
merge_misaligned_notes = gr.Slider(-1, 127, value=-1,
|
793 |
render_align = gr.Radio(
|
794 |
["Do not align", "Start Times", "Start Times and Durations", "Start Times and Split Durations"],
|
795 |
label="Align notes to musical bars",
|
@@ -820,7 +838,7 @@ if __name__ == "__main__":
|
|
820 |
render_with_sustains, merge_misaligned_notes, custom_render_patch, render_align,
|
821 |
render_transpose_value, render_transpose_to_C4, render_output_as_solo_piano, render_remove_drums,
|
822 |
s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
|
823 |
-
s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth
|
824 |
]
|
825 |
all_outputs = [
|
826 |
output_midi_md5, output_midi_title, output_midi_summary,
|
|
|
3 |
# Merged and Integrated Script for Audio/MIDI Processing and Rendering
|
4 |
#
|
5 |
# This script combines two functionalities:
|
6 |
+
# 1. Transcribing audio to MIDI using two methods:
|
7 |
# a) A general-purpose model (basic-pitch by Spotify).
|
8 |
# b) A model specialized for solo piano (ByteDance).
|
9 |
# 2. Applying advanced transformations and re-rendering MIDI files using:
|
10 |
# a) Standard SoundFonts via FluidSynth.
|
11 |
# b) A custom 8-bit style synthesizer for a chiptune sound.
|
12 |
#
|
13 |
+
# The user can upload a Audio (e.g., WAV, MP3), or MIDI file.
|
14 |
# - If an audio file is uploaded, it is first transcribed to MIDI using the selected method.
|
15 |
# - The resulting MIDI (or an uploaded MIDI) can then be processed
|
16 |
# with various effects and rendered into audio.
|
|
|
160 |
# =================================================================================================
|
161 |
# === 8-bit Style Synthesizer ===
|
162 |
# =================================================================================================
|
163 |
+
def synthesize_8bit_style(midi_data, waveform_type, envelope_type, decay_time_s, pulse_width, vibrato_rate, vibrato_depth, bass_boost_level, fs=44100):
|
164 |
"""
|
165 |
Synthesizes an 8-bit style audio waveform from a PrettyMIDI object.
|
166 |
This function generates waveforms manually instead of using a synthesizer like FluidSynth.
|
167 |
+
Includes an optional sub-octave bass booster with adjustable level.
|
168 |
"""
|
169 |
total_duration = midi_data.get_end_time()
|
170 |
waveform = np.zeros(int(total_duration * fs) + fs)
|
|
|
182 |
# --- Vibrato LFO ---
|
183 |
vibrato_lfo = vibrato_depth * np.sin(2 * np.pi * vibrato_rate * t)
|
184 |
|
185 |
+
# --- Waveform Generation (Main Oscillator) ---
|
186 |
if waveform_type == 'Square':
|
187 |
note_waveform = signal.square(2 * np.pi * (freq + vibrato_lfo) * t, duty=pulse_width)
|
188 |
elif waveform_type == 'Sawtooth':
|
|
|
190 |
elif waveform_type == 'Triangle':
|
191 |
note_waveform = signal.sawtooth(2 * np.pi * (freq + vibrato_lfo) * t, width=0.5)
|
192 |
|
193 |
+
# --- Bass Boost (Sub-Octave Oscillator) ---
|
194 |
+
if bass_boost_level > 0:
|
195 |
+
bass_freq = freq / 2.0
|
196 |
+
# Only add bass if the frequency is reasonably audible
|
197 |
+
if bass_freq > 20:
|
198 |
+
# Bass uses a simple square wave, no vibrato, for stability
|
199 |
+
bass_sub_waveform = signal.square(2 * np.pi * bass_freq * t, duty=0.5)
|
200 |
+
# Mix the main and bass waveforms.
|
201 |
+
# As bass level increases, slightly decrease main waveform volume to prevent clipping.
|
202 |
+
main_level = 1.0 - (0.5 * bass_boost_level)
|
203 |
+
note_waveform = (note_waveform * main_level) + (bass_sub_waveform * bass_boost_level)
|
204 |
+
|
205 |
# --- ADSR Envelope ---
|
206 |
start_amp = note.velocity / 127.0
|
207 |
envelope = np.zeros(num_samples)
|
|
|
217 |
elif envelope_type == 'Sustained (Full Decay)' and num_samples > 0:
|
218 |
envelope = np.linspace(start_amp, 0, num_samples)
|
219 |
|
220 |
+
# Apply envelope to the (potentially combined) waveform
|
221 |
note_waveform *= envelope
|
222 |
|
223 |
start_sample = int(note.start * fs)
|
|
|
290 |
|
291 |
def TranscribeGeneralAudio(input_file, onset_thresh, frame_thresh, min_note_len, min_freq, max_freq, infer_onsets_bool, melodia_trick_bool, multiple_bends_bool):
|
292 |
"""
|
293 |
+
Transcribes a general audio file into a MIDI file using basic-pitch.
|
294 |
This is suitable for various instruments and vocals.
|
295 |
"""
|
296 |
print('=' * 70)
|
|
|
346 |
render_remove_drums,
|
347 |
# --- 8-bit synth params ---
|
348 |
s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
|
349 |
+
s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth,
|
350 |
+
s8bit_bass_boost_level
|
351 |
):
|
352 |
"""
|
353 |
Processes and renders a MIDI file according to user-defined settings.
|
|
|
570 |
midi_data_for_synth,
|
571 |
s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
|
572 |
s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth,
|
573 |
+
s8bit_bass_boost_level,
|
574 |
fs=srate
|
575 |
)
|
576 |
# Normalize audio
|
|
|
635 |
render_transpose_value, render_transpose_to_C4, render_output_as_solo_piano, render_remove_drums,
|
636 |
# --- 8-bit synth params ---
|
637 |
s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
|
638 |
+
s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth,
|
639 |
+
s8bit_bass_boost_level
|
640 |
):
|
641 |
"""
|
642 |
Main function to handle file processing. It determines the file type and calls the
|
|
|
679 |
render_with_sustains, merge_misaligned_notes, custom_render_patch, render_align,
|
680 |
render_transpose_value, render_transpose_to_C4, render_output_as_solo_piano, render_remove_drums,
|
681 |
s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
|
682 |
+
s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth, s8bit_bass_boost_level)
|
683 |
|
684 |
print(f'Total processing time: {(reqtime.time() - start_time):.2f} sec')
|
685 |
print('*' * 70)
|
|
|
722 |
with app:
|
723 |
gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>Audio-to-MIDI & Advanced Renderer</h1>")
|
724 |
gr.Markdown(
|
725 |
+
"**Upload a Audio for transcription-then-rendering, or a MIDI for rendering-only.**\n\n"
|
726 |
"This application combines piano audio transcription with a powerful MIDI transformation and rendering toolkit. "
|
727 |
"Based on the work of [asigalov61](https://github.com/asigalov61)."
|
728 |
)
|
|
|
737 |
# type="filepath" ensures the component returns a string path to the uploaded file.
|
738 |
# The component will show a player for supported audio types (e.g., WAV, MP3).
|
739 |
input_file = gr.Audio(
|
740 |
+
label="Input Audio or MIDI File",
|
741 |
type="filepath",
|
742 |
sources=["upload"], waveform_options=waveform_options
|
743 |
)
|
|
|
797 |
s8bit_pulse_width = gr.Slider(0.01, 0.99, value=0.5, step=0.01, label="Pulse Width")
|
798 |
s8bit_vibrato_rate = gr.Slider(0, 20, value=5, label="Vibrato Rate (Hz)")
|
799 |
s8bit_vibrato_depth = gr.Slider(0, 50, value=0, label="Vibrato Depth (Hz)")
|
800 |
+
s8bit_bass_boost_level = gr.Slider(minimum=0.0, maximum=1.0, value=0.0, step=0.1, label="Bass Boost Level", info="Adjusts the volume of the sub-octave. 0 is off.")
|
801 |
+
|
802 |
# --- Original Advanced Options (Now tied to Piano-Specific) ---
|
803 |
with gr.Accordion("Advanced MIDI Rendering Options", open=False) as advanced_rendering_options:
|
804 |
render_with_sustains = gr.Checkbox(label="Apply sustain pedal effects (if present)", value=True)
|
|
|
807 |
render_transpose_to_C4 = gr.Checkbox(label="Transpose entire score to center around C4", value=False)
|
808 |
render_transpose_value = gr.Slider(-12, 12, value=0, step=1, label="Transpose (semitones)")
|
809 |
custom_render_patch = gr.Slider(-1, 127, value=-1, step=1, label="Force MIDI Patch (-1 to disable)")
|
810 |
+
merge_misaligned_notes = gr.Slider(-1, 127, value=-1, label="Time to merge notes in ms (-1 to disable)")
|
811 |
render_align = gr.Radio(
|
812 |
["Do not align", "Start Times", "Start Times and Durations", "Start Times and Split Durations"],
|
813 |
label="Align notes to musical bars",
|
|
|
838 |
render_with_sustains, merge_misaligned_notes, custom_render_patch, render_align,
|
839 |
render_transpose_value, render_transpose_to_C4, render_output_as_solo_piano, render_remove_drums,
|
840 |
s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
|
841 |
+
s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth, s8bit_bass_boost_level
|
842 |
]
|
843 |
all_outputs = [
|
844 |
output_midi_md5, output_midi_title, output_midi_summary,
|