avans06 commited on
Commit
80ab93c
·
1 Parent(s): adcbc9f

feat(synth): Add adjustable bass boost to 8-bit synthesizer

Browse files

This commit introduces a bass enhancement feature to the 8-bit synthesizer, allowing users to add more weight and depth to the sound.

Files changed (1) hide show
  1. app.py +31 -13
app.py CHANGED
@@ -3,14 +3,14 @@
3
  # Merged and Integrated Script for Audio/MIDI Processing and Rendering
4
  #
5
  # This script combines two functionalities:
6
- # 1. Transcribing audio (WAV/MP3) to MIDI using two methods:
7
  # a) A general-purpose model (basic-pitch by Spotify).
8
  # b) A model specialized for solo piano (ByteDance).
9
  # 2. Applying advanced transformations and re-rendering MIDI files using:
10
  # a) Standard SoundFonts via FluidSynth.
11
  # b) A custom 8-bit style synthesizer for a chiptune sound.
12
  #
13
- # The user can upload a WAV, MP3, or MIDI file.
14
  # - If an audio file is uploaded, it is first transcribed to MIDI using the selected method.
15
  # - The resulting MIDI (or an uploaded MIDI) can then be processed
16
  # with various effects and rendered into audio.
@@ -160,10 +160,11 @@ def prepare_soundfonts():
160
  # =================================================================================================
161
  # === 8-bit Style Synthesizer ===
162
  # =================================================================================================
163
- def synthesize_8bit_style(midi_data, waveform_type, envelope_type, decay_time_s, pulse_width, vibrato_rate, vibrato_depth, fs=44100):
164
  """
165
  Synthesizes an 8-bit style audio waveform from a PrettyMIDI object.
166
  This function generates waveforms manually instead of using a synthesizer like FluidSynth.
 
167
  """
168
  total_duration = midi_data.get_end_time()
169
  waveform = np.zeros(int(total_duration * fs) + fs)
@@ -181,7 +182,7 @@ def synthesize_8bit_style(midi_data, waveform_type, envelope_type, decay_time_s,
181
  # --- Vibrato LFO ---
182
  vibrato_lfo = vibrato_depth * np.sin(2 * np.pi * vibrato_rate * t)
183
 
184
- # --- Waveform Generation ---
185
  if waveform_type == 'Square':
186
  note_waveform = signal.square(2 * np.pi * (freq + vibrato_lfo) * t, duty=pulse_width)
187
  elif waveform_type == 'Sawtooth':
@@ -189,6 +190,18 @@ def synthesize_8bit_style(midi_data, waveform_type, envelope_type, decay_time_s,
189
  elif waveform_type == 'Triangle':
190
  note_waveform = signal.sawtooth(2 * np.pi * (freq + vibrato_lfo) * t, width=0.5)
191
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  # --- ADSR Envelope ---
193
  start_amp = note.velocity / 127.0
194
  envelope = np.zeros(num_samples)
@@ -204,6 +217,7 @@ def synthesize_8bit_style(midi_data, waveform_type, envelope_type, decay_time_s,
204
  elif envelope_type == 'Sustained (Full Decay)' and num_samples > 0:
205
  envelope = np.linspace(start_amp, 0, num_samples)
206
 
 
207
  note_waveform *= envelope
208
 
209
  start_sample = int(note.start * fs)
@@ -276,7 +290,7 @@ def TranscribePianoAudio(input_file):
276
 
277
  def TranscribeGeneralAudio(input_file, onset_thresh, frame_thresh, min_note_len, min_freq, max_freq, infer_onsets_bool, melodia_trick_bool, multiple_bends_bool):
278
  """
279
- Transcribes a general audio file (WAV/MP3) into a MIDI file using basic-pitch.
280
  This is suitable for various instruments and vocals.
281
  """
282
  print('=' * 70)
@@ -332,7 +346,8 @@ def Render_MIDI(input_midi_path,
332
  render_remove_drums,
333
  # --- 8-bit synth params ---
334
  s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
335
- s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth
 
336
  ):
337
  """
338
  Processes and renders a MIDI file according to user-defined settings.
@@ -555,6 +570,7 @@ def Render_MIDI(input_midi_path,
555
  midi_data_for_synth,
556
  s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
557
  s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth,
 
558
  fs=srate
559
  )
560
  # Normalize audio
@@ -619,7 +635,8 @@ def process_and_render_file(input_file,
619
  render_transpose_value, render_transpose_to_C4, render_output_as_solo_piano, render_remove_drums,
620
  # --- 8-bit synth params ---
621
  s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
622
- s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth
 
623
  ):
624
  """
625
  Main function to handle file processing. It determines the file type and calls the
@@ -662,7 +679,7 @@ def process_and_render_file(input_file,
662
  render_with_sustains, merge_misaligned_notes, custom_render_patch, render_align,
663
  render_transpose_value, render_transpose_to_C4, render_output_as_solo_piano, render_remove_drums,
664
  s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
665
- s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth)
666
 
667
  print(f'Total processing time: {(reqtime.time() - start_time):.2f} sec')
668
  print('*' * 70)
@@ -705,7 +722,7 @@ if __name__ == "__main__":
705
  with app:
706
  gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>Audio-to-MIDI & Advanced Renderer</h1>")
707
  gr.Markdown(
708
- "**Upload a WAV/MP3 for transcription-then-rendering, or a MIDI for rendering-only.**\n\n"
709
  "This application combines piano audio transcription with a powerful MIDI transformation and rendering toolkit. "
710
  "Based on the work of [asigalov61](https://github.com/asigalov61)."
711
  )
@@ -720,7 +737,7 @@ if __name__ == "__main__":
720
  # type="filepath" ensures the component returns a string path to the uploaded file.
721
  # The component will show a player for supported audio types (e.g., WAV, MP3).
722
  input_file = gr.Audio(
723
- label="Input Audio (WAV, MP3) or MIDI File",
724
  type="filepath",
725
  sources=["upload"], waveform_options=waveform_options
726
  )
@@ -780,7 +797,8 @@ if __name__ == "__main__":
780
  s8bit_pulse_width = gr.Slider(0.01, 0.99, value=0.5, step=0.01, label="Pulse Width")
781
  s8bit_vibrato_rate = gr.Slider(0, 20, value=5, label="Vibrato Rate (Hz)")
782
  s8bit_vibrato_depth = gr.Slider(0, 50, value=0, label="Vibrato Depth (Hz)")
783
-
 
784
  # --- Original Advanced Options (Now tied to Piano-Specific) ---
785
  with gr.Accordion("Advanced MIDI Rendering Options", open=False) as advanced_rendering_options:
786
  render_with_sustains = gr.Checkbox(label="Apply sustain pedal effects (if present)", value=True)
@@ -789,7 +807,7 @@ if __name__ == "__main__":
789
  render_transpose_to_C4 = gr.Checkbox(label="Transpose entire score to center around C4", value=False)
790
  render_transpose_value = gr.Slider(-12, 12, value=0, step=1, label="Transpose (semitones)")
791
  custom_render_patch = gr.Slider(-1, 127, value=-1, step=1, label="Force MIDI Patch (-1 to disable)")
792
- merge_misaligned_notes = gr.Slider(-1, 127, value=-1, info="Time to merge notes in ms (-1 to disable)")
793
  render_align = gr.Radio(
794
  ["Do not align", "Start Times", "Start Times and Durations", "Start Times and Split Durations"],
795
  label="Align notes to musical bars",
@@ -820,7 +838,7 @@ if __name__ == "__main__":
820
  render_with_sustains, merge_misaligned_notes, custom_render_patch, render_align,
821
  render_transpose_value, render_transpose_to_C4, render_output_as_solo_piano, render_remove_drums,
822
  s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
823
- s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth
824
  ]
825
  all_outputs = [
826
  output_midi_md5, output_midi_title, output_midi_summary,
 
3
  # Merged and Integrated Script for Audio/MIDI Processing and Rendering
4
  #
5
  # This script combines two functionalities:
6
+ # 1. Transcribing audio to MIDI using two methods:
7
  # a) A general-purpose model (basic-pitch by Spotify).
8
  # b) A model specialized for solo piano (ByteDance).
9
  # 2. Applying advanced transformations and re-rendering MIDI files using:
10
  # a) Standard SoundFonts via FluidSynth.
11
  # b) A custom 8-bit style synthesizer for a chiptune sound.
12
  #
13
+ # The user can upload a Audio (e.g., WAV, MP3), or MIDI file.
14
  # - If an audio file is uploaded, it is first transcribed to MIDI using the selected method.
15
  # - The resulting MIDI (or an uploaded MIDI) can then be processed
16
  # with various effects and rendered into audio.
 
160
  # =================================================================================================
161
  # === 8-bit Style Synthesizer ===
162
  # =================================================================================================
163
+ def synthesize_8bit_style(midi_data, waveform_type, envelope_type, decay_time_s, pulse_width, vibrato_rate, vibrato_depth, bass_boost_level, fs=44100):
164
  """
165
  Synthesizes an 8-bit style audio waveform from a PrettyMIDI object.
166
  This function generates waveforms manually instead of using a synthesizer like FluidSynth.
167
+ Includes an optional sub-octave bass booster with adjustable level.
168
  """
169
  total_duration = midi_data.get_end_time()
170
  waveform = np.zeros(int(total_duration * fs) + fs)
 
182
  # --- Vibrato LFO ---
183
  vibrato_lfo = vibrato_depth * np.sin(2 * np.pi * vibrato_rate * t)
184
 
185
+ # --- Waveform Generation (Main Oscillator) ---
186
  if waveform_type == 'Square':
187
  note_waveform = signal.square(2 * np.pi * (freq + vibrato_lfo) * t, duty=pulse_width)
188
  elif waveform_type == 'Sawtooth':
 
190
  elif waveform_type == 'Triangle':
191
  note_waveform = signal.sawtooth(2 * np.pi * (freq + vibrato_lfo) * t, width=0.5)
192
 
193
+ # --- Bass Boost (Sub-Octave Oscillator) ---
194
+ if bass_boost_level > 0:
195
+ bass_freq = freq / 2.0
196
+ # Only add bass if the frequency is reasonably audible
197
+ if bass_freq > 20:
198
+ # Bass uses a simple square wave, no vibrato, for stability
199
+ bass_sub_waveform = signal.square(2 * np.pi * bass_freq * t, duty=0.5)
200
+ # Mix the main and bass waveforms.
201
+ # As bass level increases, slightly decrease main waveform volume to prevent clipping.
202
+ main_level = 1.0 - (0.5 * bass_boost_level)
203
+ note_waveform = (note_waveform * main_level) + (bass_sub_waveform * bass_boost_level)
204
+
205
  # --- ADSR Envelope ---
206
  start_amp = note.velocity / 127.0
207
  envelope = np.zeros(num_samples)
 
217
  elif envelope_type == 'Sustained (Full Decay)' and num_samples > 0:
218
  envelope = np.linspace(start_amp, 0, num_samples)
219
 
220
+ # Apply envelope to the (potentially combined) waveform
221
  note_waveform *= envelope
222
 
223
  start_sample = int(note.start * fs)
 
290
 
291
  def TranscribeGeneralAudio(input_file, onset_thresh, frame_thresh, min_note_len, min_freq, max_freq, infer_onsets_bool, melodia_trick_bool, multiple_bends_bool):
292
  """
293
+ Transcribes a general audio file into a MIDI file using basic-pitch.
294
  This is suitable for various instruments and vocals.
295
  """
296
  print('=' * 70)
 
346
  render_remove_drums,
347
  # --- 8-bit synth params ---
348
  s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
349
+ s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth,
350
+ s8bit_bass_boost_level
351
  ):
352
  """
353
  Processes and renders a MIDI file according to user-defined settings.
 
570
  midi_data_for_synth,
571
  s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
572
  s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth,
573
+ s8bit_bass_boost_level,
574
  fs=srate
575
  )
576
  # Normalize audio
 
635
  render_transpose_value, render_transpose_to_C4, render_output_as_solo_piano, render_remove_drums,
636
  # --- 8-bit synth params ---
637
  s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
638
+ s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth,
639
+ s8bit_bass_boost_level
640
  ):
641
  """
642
  Main function to handle file processing. It determines the file type and calls the
 
679
  render_with_sustains, merge_misaligned_notes, custom_render_patch, render_align,
680
  render_transpose_value, render_transpose_to_C4, render_output_as_solo_piano, render_remove_drums,
681
  s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
682
+ s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth, s8bit_bass_boost_level)
683
 
684
  print(f'Total processing time: {(reqtime.time() - start_time):.2f} sec')
685
  print('*' * 70)
 
722
  with app:
723
  gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>Audio-to-MIDI & Advanced Renderer</h1>")
724
  gr.Markdown(
725
+ "**Upload a Audio for transcription-then-rendering, or a MIDI for rendering-only.**\n\n"
726
  "This application combines piano audio transcription with a powerful MIDI transformation and rendering toolkit. "
727
  "Based on the work of [asigalov61](https://github.com/asigalov61)."
728
  )
 
737
  # type="filepath" ensures the component returns a string path to the uploaded file.
738
  # The component will show a player for supported audio types (e.g., WAV, MP3).
739
  input_file = gr.Audio(
740
+ label="Input Audio or MIDI File",
741
  type="filepath",
742
  sources=["upload"], waveform_options=waveform_options
743
  )
 
797
  s8bit_pulse_width = gr.Slider(0.01, 0.99, value=0.5, step=0.01, label="Pulse Width")
798
  s8bit_vibrato_rate = gr.Slider(0, 20, value=5, label="Vibrato Rate (Hz)")
799
  s8bit_vibrato_depth = gr.Slider(0, 50, value=0, label="Vibrato Depth (Hz)")
800
+ s8bit_bass_boost_level = gr.Slider(minimum=0.0, maximum=1.0, value=0.0, step=0.1, label="Bass Boost Level", info="Adjusts the volume of the sub-octave. 0 is off.")
801
+
802
  # --- Original Advanced Options (Now tied to Piano-Specific) ---
803
  with gr.Accordion("Advanced MIDI Rendering Options", open=False) as advanced_rendering_options:
804
  render_with_sustains = gr.Checkbox(label="Apply sustain pedal effects (if present)", value=True)
 
807
  render_transpose_to_C4 = gr.Checkbox(label="Transpose entire score to center around C4", value=False)
808
  render_transpose_value = gr.Slider(-12, 12, value=0, step=1, label="Transpose (semitones)")
809
  custom_render_patch = gr.Slider(-1, 127, value=-1, step=1, label="Force MIDI Patch (-1 to disable)")
810
+ merge_misaligned_notes = gr.Slider(-1, 127, value=-1, label="Time to merge notes in ms (-1 to disable)")
811
  render_align = gr.Radio(
812
  ["Do not align", "Start Times", "Start Times and Durations", "Start Times and Split Durations"],
813
  label="Align notes to musical bars",
 
838
  render_with_sustains, merge_misaligned_notes, custom_render_patch, render_align,
839
  render_transpose_value, render_transpose_to_C4, render_output_as_solo_piano, render_remove_drums,
840
  s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
841
+ s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth, s8bit_bass_boost_level
842
  ]
843
  all_outputs = [
844
  output_midi_md5, output_midi_title, output_midi_summary,