Audio-To-MIDI-And-Advanced-Renderer

Running

avans06 commited on 13 days ago

Commit

80ab93c

1 Parent(s): adcbc9f

feat(synth): Add adjustable bass boost to 8-bit synthesizer

This commit introduces a bass enhancement feature to the 8-bit synthesizer, allowing users to add more weight and depth to the sound.

Files changed (1) hide show

app.py +31 -13

app.py CHANGED Viewed

@@ -3,14 +3,14 @@
 # Merged and Integrated Script for Audio/MIDI Processing and Rendering
 #
 # This script combines two functionalities:
-# 1. Transcribing audio (WAV/MP3) to MIDI using two methods:
 #    a) A general-purpose model (basic-pitch by Spotify).
 #    b) A model specialized for solo piano (ByteDance).
 # 2. Applying advanced transformations and re-rendering MIDI files using:
 #    a) Standard SoundFonts via FluidSynth.
 #    b) A custom 8-bit style synthesizer for a chiptune sound.
 #
-# The user can upload a WAV, MP3, or MIDI file.
 # - If an audio file is uploaded, it is first transcribed to MIDI using the selected method.
 # - The resulting MIDI (or an uploaded MIDI) can then be processed
 #   with various effects and rendered into audio.
@@ -160,10 +160,11 @@ def prepare_soundfonts():
 # =================================================================================================
 # === 8-bit Style Synthesizer ===
 # =================================================================================================
-def synthesize_8bit_style(midi_data, waveform_type, envelope_type, decay_time_s, pulse_width, vibrato_rate, vibrato_depth, fs=44100):
     """
     Synthesizes an 8-bit style audio waveform from a PrettyMIDI object.
     This function generates waveforms manually instead of using a synthesizer like FluidSynth.
     """
     total_duration = midi_data.get_end_time()
     waveform = np.zeros(int(total_duration * fs) + fs)
@@ -181,7 +182,7 @@ def synthesize_8bit_style(midi_data, waveform_type, envelope_type, decay_time_s,
             # --- Vibrato LFO ---
             vibrato_lfo = vibrato_depth * np.sin(2 * np.pi * vibrato_rate * t)
-            # --- Waveform Generation ---
             if waveform_type == 'Square':
                 note_waveform = signal.square(2 * np.pi * (freq + vibrato_lfo) * t, duty=pulse_width)
             elif waveform_type == 'Sawtooth':
@@ -189,6 +190,18 @@ def synthesize_8bit_style(midi_data, waveform_type, envelope_type, decay_time_s,
             elif waveform_type == 'Triangle':
                 note_waveform = signal.sawtooth(2 * np.pi * (freq + vibrato_lfo) * t, width=0.5)
             # --- ADSR Envelope ---
             start_amp = note.velocity / 127.0
             envelope = np.zeros(num_samples)
@@ -204,6 +217,7 @@ def synthesize_8bit_style(midi_data, waveform_type, envelope_type, decay_time_s,
             elif envelope_type == 'Sustained (Full Decay)' and num_samples > 0:
                 envelope = np.linspace(start_amp, 0, num_samples)
             note_waveform *= envelope
             start_sample = int(note.start * fs)
@@ -276,7 +290,7 @@ def TranscribePianoAudio(input_file):
 def TranscribeGeneralAudio(input_file, onset_thresh, frame_thresh, min_note_len, min_freq, max_freq, infer_onsets_bool, melodia_trick_bool, multiple_bends_bool):
     """
-    Transcribes a general audio file (WAV/MP3) into a MIDI file using basic-pitch.
     This is suitable for various instruments and vocals.
     """
     print('=' * 70)
@@ -332,7 +346,8 @@ def Render_MIDI(input_midi_path,
                 render_remove_drums,
                 # --- 8-bit synth params ---
                 s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
-                s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth
                 ):
     """
     Processes and renders a MIDI file according to user-defined settings.
@@ -555,6 +570,7 @@ def Render_MIDI(input_midi_path,
                 midi_data_for_synth,
                 s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
                 s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth,
                 fs=srate
             )
             # Normalize audio
@@ -619,7 +635,8 @@ def process_and_render_file(input_file,
                             render_transpose_value, render_transpose_to_C4, render_output_as_solo_piano, render_remove_drums,
                             # --- 8-bit synth params ---
                             s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
-                            s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth
                            ):
     """
     Main function to handle file processing. It determines the file type and calls the
@@ -662,7 +679,7 @@ def process_and_render_file(input_file,
                           render_with_sustains, merge_misaligned_notes, custom_render_patch, render_align,
                           render_transpose_value, render_transpose_to_C4, render_output_as_solo_piano, render_remove_drums,
                           s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
-                          s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth)
     print(f'Total processing time: {(reqtime.time() - start_time):.2f} sec')
     print('*' * 70)
@@ -705,7 +722,7 @@ if __name__ == "__main__":
     with app:
         gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>Audio-to-MIDI & Advanced Renderer</h1>")
         gr.Markdown(
-            "**Upload a WAV/MP3 for transcription-then-rendering, or a MIDI for rendering-only.**\n\n"
             "This application combines piano audio transcription with a powerful MIDI transformation and rendering toolkit. "
             "Based on the work of [asigalov61](https://github.com/asigalov61)."
         )
@@ -720,7 +737,7 @@ if __name__ == "__main__":
                 # type="filepath" ensures the component returns a string path to the uploaded file.
                 # The component will show a player for supported audio types (e.g., WAV, MP3).
                 input_file = gr.Audio(
-                    label="Input Audio (WAV, MP3) or MIDI File",
                     type="filepath",
                     sources=["upload"], waveform_options=waveform_options
                 )
@@ -780,7 +797,8 @@ if __name__ == "__main__":
                     s8bit_pulse_width = gr.Slider(0.01, 0.99, value=0.5, step=0.01, label="Pulse Width")
                     s8bit_vibrato_rate = gr.Slider(0, 20, value=5, label="Vibrato Rate (Hz)")
                     s8bit_vibrato_depth = gr.Slider(0, 50, value=0, label="Vibrato Depth (Hz)")
                 # --- Original Advanced Options (Now tied to Piano-Specific) ---
                 with gr.Accordion("Advanced MIDI Rendering Options", open=False) as advanced_rendering_options:
                     render_with_sustains = gr.Checkbox(label="Apply sustain pedal effects (if present)", value=True)
@@ -789,7 +807,7 @@ if __name__ == "__main__":
                     render_transpose_to_C4 = gr.Checkbox(label="Transpose entire score to center around C4", value=False)
                     render_transpose_value = gr.Slider(-12, 12, value=0, step=1, label="Transpose (semitones)")
                     custom_render_patch = gr.Slider(-1, 127, value=-1, step=1, label="Force MIDI Patch (-1 to disable)")
-                    merge_misaligned_notes = gr.Slider(-1, 127, value=-1, info="Time to merge notes in ms (-1 to disable)")
                     render_align = gr.Radio(
                         ["Do not align", "Start Times", "Start Times and Durations", "Start Times and Split Durations"],
                         label="Align notes to musical bars",
@@ -820,7 +838,7 @@ if __name__ == "__main__":
             render_with_sustains, merge_misaligned_notes, custom_render_patch, render_align,
             render_transpose_value, render_transpose_to_C4, render_output_as_solo_piano, render_remove_drums,
             s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
-            s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth
         ]
         all_outputs = [
             output_midi_md5, output_midi_title, output_midi_summary,

 # Merged and Integrated Script for Audio/MIDI Processing and Rendering
 #
 # This script combines two functionalities:
+# 1. Transcribing audio to MIDI using two methods:
 #    a) A general-purpose model (basic-pitch by Spotify).
 #    b) A model specialized for solo piano (ByteDance).
 # 2. Applying advanced transformations and re-rendering MIDI files using:
 #    a) Standard SoundFonts via FluidSynth.
 #    b) A custom 8-bit style synthesizer for a chiptune sound.
 #
+# The user can upload a Audio (e.g., WAV, MP3), or MIDI file.
 # - If an audio file is uploaded, it is first transcribed to MIDI using the selected method.
 # - The resulting MIDI (or an uploaded MIDI) can then be processed
 #   with various effects and rendered into audio.
 # =================================================================================================
 # === 8-bit Style Synthesizer ===
 # =================================================================================================
+def synthesize_8bit_style(midi_data, waveform_type, envelope_type, decay_time_s, pulse_width, vibrato_rate, vibrato_depth, bass_boost_level, fs=44100):
     """
     Synthesizes an 8-bit style audio waveform from a PrettyMIDI object.
     This function generates waveforms manually instead of using a synthesizer like FluidSynth.
+    Includes an optional sub-octave bass booster with adjustable level.
     """
     total_duration = midi_data.get_end_time()
     waveform = np.zeros(int(total_duration * fs) + fs)
             # --- Vibrato LFO ---
             vibrato_lfo = vibrato_depth * np.sin(2 * np.pi * vibrato_rate * t)
+            # --- Waveform Generation (Main Oscillator) ---
             if waveform_type == 'Square':
                 note_waveform = signal.square(2 * np.pi * (freq + vibrato_lfo) * t, duty=pulse_width)
             elif waveform_type == 'Sawtooth':
             elif waveform_type == 'Triangle':
                 note_waveform = signal.sawtooth(2 * np.pi * (freq + vibrato_lfo) * t, width=0.5)
+            # --- Bass Boost (Sub-Octave Oscillator) ---
+            if bass_boost_level > 0:
+                bass_freq = freq / 2.0
+                # Only add bass if the frequency is reasonably audible
+                if bass_freq > 20:
+                    # Bass uses a simple square wave, no vibrato, for stability
+                    bass_sub_waveform = signal.square(2 * np.pi * bass_freq * t, duty=0.5)
+                    # Mix the main and bass waveforms.
+                    # As bass level increases, slightly decrease main waveform volume to prevent clipping.
+                    main_level = 1.0 - (0.5 * bass_boost_level)
+                    note_waveform = (note_waveform * main_level) + (bass_sub_waveform * bass_boost_level)
             # --- ADSR Envelope ---
             start_amp = note.velocity / 127.0
             envelope = np.zeros(num_samples)
             elif envelope_type == 'Sustained (Full Decay)' and num_samples > 0:
                 envelope = np.linspace(start_amp, 0, num_samples)
+            # Apply envelope to the (potentially combined) waveform
             note_waveform *= envelope
             start_sample = int(note.start * fs)
 def TranscribeGeneralAudio(input_file, onset_thresh, frame_thresh, min_note_len, min_freq, max_freq, infer_onsets_bool, melodia_trick_bool, multiple_bends_bool):
     """
+    Transcribes a general audio file into a MIDI file using basic-pitch.
     This is suitable for various instruments and vocals.
     """
     print('=' * 70)
                 render_remove_drums,
                 # --- 8-bit synth params ---
                 s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
+                s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth,
+                s8bit_bass_boost_level
                 ):
     """
     Processes and renders a MIDI file according to user-defined settings.
                 midi_data_for_synth,
                 s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
                 s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth,
+                s8bit_bass_boost_level,
                 fs=srate
             )
             # Normalize audio
                             render_transpose_value, render_transpose_to_C4, render_output_as_solo_piano, render_remove_drums,
                             # --- 8-bit synth params ---
                             s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
+                            s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth,
+                            s8bit_bass_boost_level
                            ):
     """
     Main function to handle file processing. It determines the file type and calls the
                           render_with_sustains, merge_misaligned_notes, custom_render_patch, render_align,
                           render_transpose_value, render_transpose_to_C4, render_output_as_solo_piano, render_remove_drums,
                           s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
+                          s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth, s8bit_bass_boost_level)
     print(f'Total processing time: {(reqtime.time() - start_time):.2f} sec')
     print('*' * 70)
     with app:
         gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>Audio-to-MIDI & Advanced Renderer</h1>")
         gr.Markdown(
+            "**Upload a Audio for transcription-then-rendering, or a MIDI for rendering-only.**\n\n"
             "This application combines piano audio transcription with a powerful MIDI transformation and rendering toolkit. "
             "Based on the work of [asigalov61](https://github.com/asigalov61)."
         )
                 # type="filepath" ensures the component returns a string path to the uploaded file.
                 # The component will show a player for supported audio types (e.g., WAV, MP3).
                 input_file = gr.Audio(
+                    label="Input Audio or MIDI File",
                     type="filepath",
                     sources=["upload"], waveform_options=waveform_options
                 )
                     s8bit_pulse_width = gr.Slider(0.01, 0.99, value=0.5, step=0.01, label="Pulse Width")
                     s8bit_vibrato_rate = gr.Slider(0, 20, value=5, label="Vibrato Rate (Hz)")
                     s8bit_vibrato_depth = gr.Slider(0, 50, value=0, label="Vibrato Depth (Hz)")
+                    s8bit_bass_boost_level = gr.Slider(minimum=0.0, maximum=1.0, value=0.0, step=0.1, label="Bass Boost Level", info="Adjusts the volume of the sub-octave. 0 is off.")
                 # --- Original Advanced Options (Now tied to Piano-Specific) ---
                 with gr.Accordion("Advanced MIDI Rendering Options", open=False) as advanced_rendering_options:
                     render_with_sustains = gr.Checkbox(label="Apply sustain pedal effects (if present)", value=True)
                     render_transpose_to_C4 = gr.Checkbox(label="Transpose entire score to center around C4", value=False)
                     render_transpose_value = gr.Slider(-12, 12, value=0, step=1, label="Transpose (semitones)")
                     custom_render_patch = gr.Slider(-1, 127, value=-1, step=1, label="Force MIDI Patch (-1 to disable)")
+                    merge_misaligned_notes = gr.Slider(-1, 127, value=-1, label="Time to merge notes in ms (-1 to disable)")
                     render_align = gr.Radio(
                         ["Do not align", "Start Times", "Start Times and Durations", "Start Times and Split Durations"],
                         label="Align notes to musical bars",
             render_with_sustains, merge_misaligned_notes, custom_render_patch, render_align,
             render_transpose_value, render_transpose_to_C4, render_output_as_solo_piano, render_remove_drums,
             s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
+            s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth, s8bit_bass_boost_level
         ]
         all_outputs = [
             output_midi_md5, output_midi_title, output_midi_summary,