avans06 commited on
Commit
58195d3
·
1 Parent(s): e835c5f

feat(synth): Add auto-recommendation for 8-bit synthesizer parameters

Browse files

This commit introduces a new "Auto-Recommend" feature for the 8-bit synthesizer, designed to help users quickly generate a suitable sound profile for their MIDI files without manual tweaking.

- A new `"Auto-Recommend (Analyze MIDI)"` option has been added to the "Style Preset" dropdown.
- When selected, the main processing function (`process_and_render_file`) now triggers the analysis and recommendation engine.
- The recommended parameters are then used for rendering and are also reflected back onto the UI sliders, allowing users to see and further adjust the generated settings.

Files changed (1) hide show
  1. app.py +572 -197
app.py CHANGED
@@ -166,14 +166,16 @@ def prepare_soundfonts():
166
  # =================================================================================================
167
  def synthesize_8bit_style(midi_data, waveform_type, envelope_type, decay_time_s, pulse_width,
168
  vibrato_rate, vibrato_depth, bass_boost_level, fs=44100,
169
- smooth_notes=False, continuous_vibrato=False, noise_level=0.0,
170
- distortion_level=0.0, fm_modulation_depth=0.0, fm_modulation_rate=0.0):
 
171
  """
172
  Synthesizes an 8-bit style audio waveform from a PrettyMIDI object.
173
  This function generates waveforms manually instead of using a synthesizer like FluidSynth.
174
  Includes an optional sub-octave bass booster with adjustable level.
175
  Instruments are panned based on their order in the MIDI file.
176
  Instrument 1 -> Left, Instrument 2 -> Right.
 
177
  """
178
  total_duration = midi_data.get_end_time()
179
  # Initialize a stereo waveform buffer (2 channels: Left, Right)
@@ -213,26 +215,39 @@ def synthesize_8bit_style(midi_data, waveform_type, envelope_type, decay_time_s,
213
 
214
  t = np.arange(num_samples) / fs
215
 
216
- # --- Vibrato LFO ---
217
- if continuous_vibrato:
218
- # Use accumulated phase to avoid vibrato reset per note
219
- vib_phase_inc = 2 * np.pi * vibrato_rate / fs
220
- vib_phase_array = vibrato_phase + np.arange(num_samples) * vib_phase_inc
221
- vibrato_phase = (vib_phase_array[-1] + vib_phase_inc) % (2 * np.pi)
222
- vibrato_lfo = vibrato_depth * np.sin(vib_phase_array)
223
- else:
224
- vibrato_lfo = vibrato_depth * np.sin(2 * np.pi * vibrato_rate * t)
225
-
 
 
 
 
 
 
 
 
 
 
 
 
226
  # --- Waveform Generation (Main Oscillator with phase continuity) ---
227
- phase_inc = 2 * np.pi * (freq + vibrato_lfo) / fs
228
  phase = osc_phase[i] + np.cumsum(phase_inc)
229
- osc_phase[i] = phase[-1] % (2 * np.pi) # Store last phase
 
230
 
231
  if waveform_type == 'Square':
232
  note_waveform = signal.square(phase, duty=pulse_width)
233
  elif waveform_type == 'Sawtooth':
234
  note_waveform = signal.sawtooth(phase)
235
- elif waveform_type == 'Triangle':
236
  note_waveform = signal.sawtooth(phase, width=0.5)
237
 
238
  # --- Bass Boost (Sub-Octave Oscillator) ---
@@ -249,49 +264,37 @@ def synthesize_8bit_style(midi_data, waveform_type, envelope_type, decay_time_s,
249
  main_level = 1.0 - (0.5 * bass_boost_level)
250
  note_waveform = (note_waveform * main_level) + (bass_sub_waveform * bass_boost_level)
251
 
252
- # --- Noise Channel Simulation (White Noise) ---
253
  if noise_level > 0:
254
- noise_waveform = np.random.uniform(-1, 1, num_samples)
255
- note_waveform += noise_waveform * noise_level
256
 
257
  # --- Distortion (Wave Shaping) ---
258
  if distortion_level > 0:
259
- note_waveform = np.sign(note_waveform) * np.abs(note_waveform) ** (1.0 - distortion_level)
260
-
261
- # --- Frequency Modulation (FM) ---
262
- if fm_modulation_depth > 0:
263
- modulated_freq = freq * (1 + fm_modulation_depth * np.sin(2 * np.pi * fm_modulation_rate * t))
264
- phase_inc = 2 * np.pi * modulated_freq / fs
265
- phase = osc_phase[i] + np.cumsum(phase_inc)
266
- osc_phase[i] = phase[-1] % (2 * np.pi) # Store last phase
267
- if waveform_type == 'Square':
268
- note_waveform = signal.square(phase, duty=pulse_width)
269
- elif waveform_type == 'Sawtooth':
270
- note_waveform = signal.sawtooth(phase)
271
- elif waveform_type == 'Triangle':
272
- note_waveform = signal.sawtooth(phase, width=0.5)
273
 
274
  # --- ADSR Envelope ---
275
  start_amp = note.velocity / 127.0
276
  envelope = np.zeros(num_samples)
277
 
278
  if envelope_type == 'Plucky (AD Envelope)':
279
- attack_time_s = 0.005
280
- attack_samples = min(int(attack_time_s * fs), num_samples)
281
  decay_samples = min(int(decay_time_s * fs), num_samples - attack_samples)
282
 
283
  envelope[:attack_samples] = np.linspace(0, start_amp, attack_samples)
284
  if decay_samples > 0:
285
  envelope[attack_samples:attack_samples+decay_samples] = np.linspace(start_amp, 0, decay_samples)
286
- elif envelope_type == 'Sustained (Full Decay)':
287
  envelope = np.linspace(start_amp, 0, num_samples)
288
 
289
- if smooth_notes:
290
- # Add short release
291
- release_samples = min(int(0.005 * fs), num_samples)
292
- envelope[-release_samples:] *= np.linspace(1, 0, release_samples)
293
- # Small crossfade (to avoid clicks)
294
- envelope[:min(10, num_samples)] *= np.linspace(0.5, 1, min(10, num_samples))
 
 
295
 
296
  # Apply envelope to the (potentially combined) waveform
297
  note_waveform *= envelope
@@ -572,7 +575,7 @@ def Render_MIDI(input_midi_path,
572
  # --- 8-bit synth params ---
573
  s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
574
  s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth,
575
- s8bit_bass_boost_level, s8bit_smooth_notes, s8bit_continuous_vibrato,
576
  s8bit_noise_level, s8bit_distortion_level, s8bit_fm_modulation_depth, s8bit_fm_modulation_rate
577
  ):
578
  """
@@ -799,8 +802,8 @@ def Render_MIDI(input_midi_path,
799
  s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth,
800
  s8bit_bass_boost_level,
801
  fs=srate,
802
- smooth_notes=s8bit_smooth_notes,
803
- continuous_vibrato=s8bit_continuous_vibrato,
804
  noise_level=s8bit_noise_level,
805
  distortion_level=s8bit_distortion_level,
806
  fm_modulation_depth=s8bit_fm_modulation_depth,
@@ -855,11 +858,190 @@ def Render_MIDI(input_midi_path,
855
 
856
  return new_md5_hash, fn1, output_midi_summary, midi_to_render_path, (srate, audio_out), output_plot, song_description
857
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
858
  # =================================================================================================
859
  # === Main Application Logic ===
860
  # =================================================================================================
861
 
862
- def process_and_render_file(input_file,
 
 
863
  # --- Transcription params ---
864
  enable_stereo_processing,
865
  transcription_method,
@@ -871,7 +1053,7 @@ def process_and_render_file(input_file,
871
  # --- 8-bit synth params ---
872
  s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
873
  s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth,
874
- s8bit_bass_boost_level, s8bit_smooth_notes, s8bit_continuous_vibrato,
875
  s8bit_noise_level, s8bit_distortion_level, s8bit_fm_modulation_depth, s8bit_fm_modulation_rate
876
  ):
877
  """
@@ -880,8 +1062,8 @@ def process_and_render_file(input_file,
880
  """
881
  start_time = reqtime.time()
882
  if input_file is None:
883
- # Return a list of updates to clear all output fields
884
- return [gr.update(value=None)] * 7
885
 
886
  # The input_file from gr.Audio(type="filepath") is now the direct path (a string),
887
  # not a temporary file object. We no longer need to access the .name attribute.
@@ -890,9 +1072,13 @@ def process_and_render_file(input_file,
890
  print(f"Processing new file: {filename}")
891
 
892
  try:
 
893
  audio_data, native_sample_rate = librosa.load(input_file_path, sr=None, mono=False)
894
  except Exception as e:
895
- raise gr.Error(f"Failed to load audio file: {e}")
 
 
 
896
 
897
  # --- Step 1: Check file type and transcribe if necessary ---
898
  if filename.lower().endswith(('.mid', '.midi', '.kar')):
@@ -907,7 +1093,7 @@ def process_and_render_file(input_file,
907
 
908
  # === STEREO PROCESSING LOGIC ===
909
  if enable_stereo_processing:
910
- if audio_data.ndim != 2 or audio_data.shape[0] != 2:
911
  print("Warning: Audio is not stereo or could not be loaded as stereo. Falling back to mono transcription.")
912
  enable_stereo_processing = False # Disable stereo processing if audio is not stereo
913
 
@@ -929,16 +1115,12 @@ def process_and_render_file(input_file,
929
  print(f"Saved left channel to: {temp_left_wav_path}")
930
  print(f"Saved right channel to: {temp_right_wav_path}")
931
 
932
- print("Transcribing left channel...")
933
  if transcription_method == "General Purpose":
934
  midi_path_left = TranscribeGeneralAudio(temp_left_wav_path, onset_thresh, frame_thresh, min_note_len, min_freq, max_freq, infer_onsets_bool, melodia_trick_bool, multiple_bends_bool)
935
- else:
936
- midi_path_left = TranscribePianoAudio(temp_left_wav_path)
937
-
938
- print("Transcribing right channel...")
939
- if transcription_method == "General Purpose":
940
  midi_path_right = TranscribeGeneralAudio(temp_right_wav_path, onset_thresh, frame_thresh, min_note_len, min_freq, max_freq, infer_onsets_bool, melodia_trick_bool, multiple_bends_bool)
941
  else:
 
942
  midi_path_right = TranscribePianoAudio(temp_right_wav_path)
943
 
944
  if midi_path_left and midi_path_right:
@@ -956,48 +1138,111 @@ def process_and_render_file(input_file,
956
  except Exception as e:
957
  print(f"An error occurred during stereo processing: {e}")
958
  raise gr.Error(f"Stereo Processing Failed: {e}")
959
- else:
960
  print("Stereo processing disabled. Using standard mono transcription.")
961
- if audio_data.ndim == 1:
962
- mono_signal = audio_data
963
- else:
964
- mono_signal = np.mean(audio_data, axis=0)
965
-
966
- normalized_mono = normalize_loudness(mono_signal, native_sample_rate)
967
 
968
- temp_mono_wav_path = os.path.join(temp_dir, f"{base_name}_mono.wav")
969
- sf.write(temp_mono_wav_path, normalized_mono, native_sample_rate)
 
 
 
970
 
971
  try:
972
  if transcription_method == "General Purpose":
973
- midi_path_for_rendering = TranscribeGeneralAudio(
974
- temp_mono_wav_path, onset_thresh, frame_thresh, min_note_len,
975
- min_freq, max_freq, infer_onsets_bool, melodia_trick_bool, multiple_bends_bool
976
- )
977
  else: # Piano-Specific
978
- midi_path_for_rendering = TranscribePianoAudio(temp_mono_wav_path)
979
- analyze_midi_velocity(midi_path_for_rendering)
980
  except Exception as e:
981
  print(f"An error occurred during transcription: {e}")
982
  raise gr.Error(f"Transcription Failed: {e}")
983
 
984
  # --- Step 2: Render the MIDI file with selected options ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
985
  print(f"Proceeding to render MIDI file: {os.path.basename(midi_path_for_rendering)}")
986
- # --- Passing new FX parameters to the Render_MIDI function ---
 
 
 
987
  results = Render_MIDI(midi_path_for_rendering,
988
  render_type, soundfont_bank, render_sample_rate,
989
  render_with_sustains, merge_misaligned_notes, custom_render_patch, render_align,
990
  render_transpose_value, render_transpose_to_C4, render_output_as_solo_piano, render_remove_drums,
991
- s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
992
- s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth, s8bit_bass_boost_level,
993
- s8bit_smooth_notes, s8bit_continuous_vibrato,
994
- s8bit_noise_level, s8bit_distortion_level, s8bit_fm_modulation_depth, s8bit_fm_modulation_rate
 
 
 
 
 
 
 
 
 
 
995
  )
996
 
997
  print(f'Total processing time: {(reqtime.time() - start_time):.2f} sec')
998
  print('*' * 70)
999
 
1000
- return results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1001
 
1002
  # =================================================================================================
1003
  # === Gradio UI Setup ===
@@ -1016,40 +1261,38 @@ def update_ui_visibility(transcription_method, soundfont_choice):
1016
  }
1017
 
1018
  # --- Function to apply 8-bit synthesizer presets ---
 
1019
  def apply_8bit_preset(preset_name):
1020
  """
1021
  Takes the name of a preset and returns a dictionary of gr.update objects
1022
- to set the values of the 8-bit synthesizer's UI components.
1023
  """
 
 
 
 
 
 
 
1024
  # If the user selects "Custom" or the preset is not found, do not change the values.
1025
  if preset_name == "Custom" or preset_name not in S8BIT_PRESETS:
1026
- return {
1027
- s8bit_waveform_type: gr.update(),
1028
- s8bit_pulse_width: gr.update(),
1029
- s8bit_envelope_type: gr.update(),
1030
- s8bit_decay_time_s: gr.update(),
1031
- s8bit_vibrato_rate: gr.update(),
1032
- s8bit_vibrato_depth: gr.update(),
1033
- s8bit_smooth_notes: gr.update(),
1034
- s8bit_continuous_vibrato: gr.update(),
1035
- s8bit_bass_boost_level: gr.update()
1036
- }
1037
 
1038
  # Get the settings dictionary for the chosen preset.
1039
  settings = S8BIT_PRESETS[preset_name]
1040
 
1041
- # Return a dictionary that maps each UI component to a gr.update call with the new value.
1042
- return {
1043
- s8bit_waveform_type: gr.update(value=settings['waveform_type']),
1044
- s8bit_pulse_width: gr.update(value=settings['pulse_width']),
1045
- s8bit_envelope_type: gr.update(value=settings['envelope_type']),
1046
- s8bit_decay_time_s: gr.update(value=settings['decay_time_s']),
1047
- s8bit_vibrato_rate: gr.update(value=settings['vibrato_rate']),
1048
- s8bit_vibrato_depth: gr.update(value=settings['vibrato_depth']),
1049
- s8bit_smooth_notes: gr.update(value=settings['smooth_notes']),
1050
- s8bit_continuous_vibrato: gr.update(value=settings['continuous_vibrato']),
1051
- s8bit_bass_boost_level: gr.update(value=settings['bass_boost_level'])
1052
- }
1053
 
1054
  if __name__ == "__main__":
1055
  # Initialize the app: download model (if needed) and apply patches
@@ -1066,146 +1309,268 @@ if __name__ == "__main__":
1066
  print("\nWARNING: No SoundFonts were found or could be downloaded.")
1067
  print("Rendering with SoundFonts will fail. Only the 8-bit synthesizer will be available.")
1068
 
 
 
 
 
1069
  # --- Data structure for 8-bit synthesizer presets ---
1070
  # Comprehensive preset dictionary with new FX parameters for all presets
1071
  # Comprehensive preset dictionary including new JRPG and Handheld classics
1072
  # Note: Vibrato depth is mapped to a representative value on the 0-50 Hz slider.
1073
  S8BIT_PRESETS = {
1074
  # --- Rhythmic & Action ---
1075
- "Rhythm Pop Lead": {
1076
  # Description: A clean, round square wave perfect for the snappy, catchy feel of rhythm games.
1077
- 'waveform_type': 'Square', 'pulse_width': 0.5, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.18, 'vibrato_rate': 4.5, 'vibrato_depth': 4,
1078
- 'smooth_notes': True, 'continuous_vibrato': True, 'bass_boost_level': 0.3, 'noise_level': 0.0, 'distortion_level': 0.0, 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
 
 
 
 
1079
  },
1080
- "Arcade Brawler Lead": {
1081
  # Description: A gritty sawtooth lead with a hard attack, capturing the high-energy feel of classic fighting games.
1082
- 'waveform_type': 'Sawtooth', 'pulse_width': 0.5, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.15, 'vibrato_rate': 5.0, 'vibrato_depth': 6,
1083
- 'smooth_notes': True, 'continuous_vibrato': True, 'bass_boost_level': 0.4, 'noise_level': 0.05, 'distortion_level': 0.1, 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
 
 
 
 
1084
  },
1085
- "Mega Man (Rockman)": {
1086
  # Description: A thin, sharp square wave lead with fast vibrato, iconic for its driving, heroic melodies.
1087
- 'waveform_type': 'Square', 'pulse_width': 0.2, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.15, 'vibrato_rate': 6.0, 'vibrato_depth': 8,
1088
- 'smooth_notes': True, 'continuous_vibrato': True, 'bass_boost_level': 0.3, 'noise_level': 0.0, 'distortion_level': 0.05, 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
 
 
 
 
1089
  },
1090
- "Kirby's Bubbly Melody": {
1091
  # Description: A soft, round square wave with a bouncy vibrato, creating a cheerful and adorable sound.
1092
- 'waveform_type': 'Square', 'pulse_width': 0.4, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.2, 'vibrato_rate': 6.0, 'vibrato_depth': 4,
1093
- 'smooth_notes': True, 'continuous_vibrato': False, 'bass_boost_level': 0.1, 'noise_level': 0.0, 'distortion_level': 0.0, 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
 
 
 
 
1094
  },
1095
- "Mario (Super Mario Bros)": {
1096
  # Description: A bright square wave with a per-note vibrato, producing the classic bouncy platformer sound.
1097
- 'waveform_type': 'Square', 'pulse_width': 0.3, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.25, 'vibrato_rate': 5.0, 'vibrato_depth': 5,
1098
- 'smooth_notes': True, 'continuous_vibrato': False, 'bass_boost_level': 0.2, 'noise_level': 0.0, 'distortion_level': 0.0, 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
 
 
 
 
1099
  },
1100
  # --- Epic & Atmospheric ---
1101
- "Mecha & Tactics Brass": {
1102
  # Description: A powerful, sustained sawtooth emulating the bold, heroic synth-brass of strategy and mecha anime themes.
1103
- 'waveform_type': 'Sawtooth', 'pulse_width': 0.5, 'envelope_type': 'Sustained (Full Decay)', 'decay_time_s': 0.4, 'vibrato_rate': 3.5, 'vibrato_depth': 5,
1104
- 'smooth_notes': True, 'continuous_vibrato': True, 'bass_boost_level': 0.5, 'noise_level': 0.1, 'distortion_level': 0.15, 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
 
 
 
 
1105
  },
1106
- "Mystic Mana Pad": {
1107
- # Description: A warm, ethereal square wave pad with slow vibrato, capturing a feeling of fantasy and wonder.
1108
- 'waveform_type': 'Square', 'pulse_width': 0.5, 'envelope_type': 'Sustained (Full Decay)', 'decay_time_s': 0.5, 'vibrato_rate': 2.5, 'vibrato_depth': 4,
1109
- 'smooth_notes': True, 'continuous_vibrato': True, 'bass_boost_level': 0.3, 'noise_level': 0.0, 'distortion_level': 0.0, 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
 
 
 
 
1110
  },
1111
- "Dragon Quest (Orchestral Feel)": {
1112
  # Description: A pure triangle wave with a long decay, mimicking the grand, orchestral feel of a classical flute or string section.
1113
- 'waveform_type': 'Triangle', 'pulse_width': 0.5, 'envelope_type': 'Sustained (Full Decay)', 'decay_time_s': 0.6, 'vibrato_rate': 3.0, 'vibrato_depth': 4,
1114
- 'smooth_notes': True, 'continuous_vibrato': True, 'bass_boost_level': 0.3, 'noise_level': 0.0, 'distortion_level': 0.0, 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
 
 
 
 
1115
  },
1116
- "ONI V (Wafu Mystic)": {
1117
  # Description: A solemn triangle wave with a slow, expressive vibrato, evoking the mysterious atmosphere of Japanese folklore.
1118
- 'waveform_type': 'Triangle', 'pulse_width': 0.5, 'envelope_type': 'Sustained (Full Decay)', 'decay_time_s': 0.4, 'vibrato_rate': 3.5, 'vibrato_depth': 3,
1119
- 'smooth_notes': True, 'continuous_vibrato': True, 'bass_boost_level': 0.4, 'noise_level': 0.0, 'distortion_level': 0.0, 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
 
 
 
 
1120
  },
1121
- "Zelda (NES)": {
1122
  # Description: The classic pure triangle wave lead, perfect for heroic and adventurous overworld themes.
1123
- 'waveform_type': 'Triangle', 'pulse_width': 0.5, 'envelope_type': 'Sustained (Full Decay)', 'decay_time_s': 0.3, 'vibrato_rate': 4.5, 'vibrato_depth': 4,
1124
- 'smooth_notes': True, 'continuous_vibrato': True, 'bass_boost_level': 0.15, 'noise_level': 0.0, 'distortion_level': 0.0, 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
 
 
 
 
1125
  },
1126
  # --- JRPG & System Classics ---
1127
- "Falcom Ys (Rock Lead)": {
1128
  # Description: A powerful sawtooth with slight distortion, emulating the driving rock organ and guitar leads of action JRPGs.
1129
- 'waveform_type': 'Sawtooth', 'pulse_width': 0.5, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.15, 'vibrato_rate': 5.5, 'vibrato_depth': 6,
1130
- 'smooth_notes': True, 'continuous_vibrato': True, 'bass_boost_level': 0.4, 'noise_level': 0.05, 'distortion_level': 0.15, 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
 
 
 
 
1131
  },
1132
- "Final Fantasy (Arpeggio)": {
1133
  # Description: A perfect, clean square wave with zero vibrato, creating the iconic, crystal-clear arpeggio sound.
1134
- 'waveform_type': 'Square', 'pulse_width': 0.5, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.22, 'vibrato_rate': 5.0, 'vibrato_depth': 0,
1135
- 'smooth_notes': True, 'continuous_vibrato': False, 'bass_boost_level': 0.2, 'noise_level': 0.0, 'distortion_level': 0.0, 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
 
 
 
 
1136
  },
1137
- "Castlevania (Akumajō Dracula)": {
1138
  # Description: A sharp square wave with dramatic vibrato, ideal for fast, gothic, and baroque-inspired melodies.
1139
- 'waveform_type': 'Square', 'pulse_width': 0.25, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.18, 'vibrato_rate': 6.5, 'vibrato_depth': 6,
1140
- 'smooth_notes': True, 'continuous_vibrato': True, 'bass_boost_level': 0.35, 'noise_level': 0.0, 'distortion_level': 0.0, 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
 
 
 
 
1141
  },
1142
- "Pokémon (Game Boy Classics)": {
1143
  # Description: A full, friendly square wave sound, capturing the cheerful and adventurous spirit of early handheld RPGs.
1144
- 'waveform_type': 'Square', 'pulse_width': 0.5, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.22, 'vibrato_rate': 5.0, 'vibrato_depth': 5,
1145
- 'smooth_notes': True, 'continuous_vibrato': True, 'bass_boost_level': 0.25, 'noise_level': 0.0, 'distortion_level': 0.0, 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
 
 
 
 
1146
  },
1147
  # --- Advanced System Impressions ---
1148
  "Commodore 64 (SID Feel)": {
1149
  # Description: (Impression) Uses high-speed, shallow vibrato to mimic the characteristic "buzzy" texture of the SID chip's PWM.
1150
- 'waveform_type': 'Square', 'pulse_width': 0.25, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.25, 'vibrato_rate': 8.0, 'vibrato_depth': 4,
1151
- 'smooth_notes': True, 'continuous_vibrato': False, 'bass_boost_level': 0.2, 'noise_level': 0.05, 'distortion_level': 0.1, 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
 
 
 
 
1152
  },
1153
  "Megadrive/Genesis (FM Grit)": {
1154
  # Description: (Impression) Uses FM, distortion, and noise to capture the gritty, metallic, and aggressive tone of the YM2612 chip.
1155
- 'waveform_type': 'Sawtooth', 'pulse_width': 0.5, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.18, 'vibrato_rate': 0.0, 'vibrato_depth': 0,
1156
- 'smooth_notes': False, 'continuous_vibrato': True, 'bass_boost_level': 0.4, 'noise_level': 0.1, 'distortion_level': 0.2, 'fm_modulation_depth': 0.2, 'fm_modulation_rate': 150
 
 
 
 
1157
  },
1158
- "PC-98 (Touhou Feel)": {
1159
  # Description: (Impression) A very sharp square wave with fast FM, emulating the bright, high-energy leads of Japanese PC games.
1160
- 'waveform_type': 'Square', 'pulse_width': 0.15, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.12, 'vibrato_rate': 7.5, 'vibrato_depth': 7,
1161
- 'smooth_notes': True, 'continuous_vibrato': True, 'bass_boost_level': 0.3, 'noise_level': 0.0, 'distortion_level': 0.0, 'fm_modulation_depth': 0.1, 'fm_modulation_rate': 200
 
 
 
 
1162
  },
1163
  "Roland SC-88 (GM Vibe)": {
1164
  # Description: (Impression) A clean, stable triangle wave with no effects, mimicking the polished, sample-based sounds of General MIDI.
1165
- 'waveform_type': 'Triangle', 'pulse_width': 0.5, 'envelope_type': 'Sustained (Full Decay)', 'decay_time_s': 0.35, 'vibrato_rate': 0, 'vibrato_depth': 0,
1166
- 'smooth_notes': True, 'continuous_vibrato': False, 'bass_boost_level': 0.1, 'noise_level': 0.0, 'distortion_level': 0.0, 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
 
 
 
 
1167
  },
1168
  # --- Experimental & Sound FX ---
1169
  "Sci-Fi Energy Field": {
1170
  # Description: (SFX) High-speed vibrato and noise create a constant, shimmering hum suitable for energy shields or force fields.
1171
- 'waveform_type': 'Triangle', 'pulse_width': 0.5, 'envelope_type': 'Sustained (Full Decay)', 'decay_time_s': 0.4, 'vibrato_rate': 10.0, 'vibrato_depth': 3,
1172
- 'smooth_notes': True, 'continuous_vibrato': True, 'bass_boost_level': 0.1, 'noise_level': 0.1, 'distortion_level': 0.0, 'fm_modulation_depth': 0.05, 'fm_modulation_rate': 50
 
 
 
 
1173
  },
1174
  "Industrial Alarm": {
1175
  # Description: (SFX) Extreme vibrato rate on a sawtooth wave produces a harsh, metallic, dissonant alarm sound.
1176
- 'waveform_type': 'Sawtooth', 'pulse_width': 0.5, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.2, 'vibrato_rate': 15.0, 'vibrato_depth': 8,
1177
- 'smooth_notes': False, 'continuous_vibrato': False, 'bass_boost_level': 0.3, 'noise_level': 0.2, 'distortion_level': 0.3, 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
 
 
 
 
1178
  },
1179
  "Laser Charge-Up": {
1180
  # Description: (SFX) Extreme vibrato depth creates a dramatic, rising pitch effect, perfect for sci-fi weapon sounds.
1181
- 'waveform_type': 'Sawtooth', 'pulse_width': 0.5, 'envelope_type': 'Sustained (Full Decay)', 'decay_time_s': 0.3, 'vibrato_rate': 4.0, 'vibrato_depth': 25,
1182
- 'smooth_notes': True, 'continuous_vibrato': True, 'bass_boost_level': 0.2, 'noise_level': 0.0, 'distortion_level': 0.0, 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
 
 
 
 
1183
  },
1184
  "Unstable Machine Core": {
1185
  # Description: (SFX) Maximum depth and distortion create a chaotic, atonal noise, simulating a machine on the verge of exploding.
1186
- 'waveform_type': 'Sawtooth', 'pulse_width': 0.5, 'envelope_type': 'Sustained (Full Decay)', 'decay_time_s': 0.5, 'vibrato_rate': 1.0, 'vibrato_depth': 50,
1187
- 'smooth_notes': False, 'continuous_vibrato': True, 'bass_boost_level': 0.5, 'noise_level': 0.3, 'distortion_level': 0.4, 'fm_modulation_depth': 0.5, 'fm_modulation_rate': 10
 
 
 
 
1188
  },
1189
  "Hardcore Gabber Kick": {
1190
  # Description: (Experimental) Maximum bass boost and distortion create an overwhelmingly powerful, clipped kick drum sound.
1191
- 'waveform_type': 'Sawtooth', 'pulse_width': 0.5, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.1, 'vibrato_rate': 0, 'vibrato_depth': 0,
1192
- 'smooth_notes': False, 'continuous_vibrato': False, 'bass_boost_level': 0.8, 'noise_level': 0.2, 'distortion_level': 0.5, 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
 
 
 
 
1193
  },
1194
  # --- Utility ---
1195
  "Generic Chiptune Loop": {
1196
  # Description: A well-balanced, pleasant square wave lead that serves as a great starting point for custom sounds.
1197
- 'waveform_type': 'Square', 'pulse_width': 0.25, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.2, 'vibrato_rate': 5.5, 'vibrato_depth': 4,
1198
- 'smooth_notes': True, 'continuous_vibrato': True, 'bass_boost_level': 0.25, 'noise_level': 0.0, 'distortion_level': 0.0, 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
 
 
 
 
1199
  },
1200
- "Dark/Boss Atmosphere": {
1201
- # Description: An aggressive sawtooth with heavy bass and distortion, perfect for tense or menacing background music.
1202
- 'waveform_type': 'Sawtooth', 'pulse_width': 0.5, 'envelope_type': 'Sustained (Full Decay)', 'decay_time_s': 0.35, 'vibrato_rate': 7.0, 'vibrato_depth': 12,
1203
- 'smooth_notes': False, 'continuous_vibrato': False, 'bass_boost_level': 0.4, 'noise_level': 0.15, 'distortion_level': 0.25, 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1204
  }
1205
  }
1206
 
1207
  app = gr.Blocks(theme=gr.themes.Base())
1208
-
1209
  with app:
1210
  gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>Audio-to-MIDI & Advanced Renderer</h1>")
1211
  gr.Markdown(
@@ -1348,10 +1713,12 @@ if __name__ == "__main__":
1348
  # - High: Creates fast modulation, resulting in bright, complex, often metallic harmonics and sidebands.
1349
  # =================================================================================
1350
  #
 
 
1351
  with gr.Accordion("8-bit Synthesizer Settings", open=False, visible=False) as synth_8bit_settings:
1352
- # --- ADDED: Preset selector dropdown ---
1353
  s8bit_preset_selector = gr.Dropdown(
1354
- choices=["Custom"] + list(S8BIT_PRESETS.keys()),
1355
  value="Custom",
1356
  label="Style Preset",
1357
  info="Select a preset to auto-fill the settings below. Choose 'Custom' for manual control.\nFor reference and entertainment only. These presets are not guaranteed to be perfectly accurate."
@@ -1360,20 +1727,20 @@ if __name__ == "__main__":
1360
  s8bit_waveform_type = gr.Dropdown(['Square', 'Sawtooth', 'Triangle'], value='Square', label="Waveform Type")
1361
  s8bit_pulse_width = gr.Slider(0.01, 0.99, value=0.5, step=0.01, label="Pulse Width (Square Wave Only)")
1362
  s8bit_envelope_type = gr.Dropdown(['Plucky (AD Envelope)', 'Sustained (Full Decay)'], value='Plucky (AD Envelope)', label="Envelope Type")
1363
- s8bit_decay_time_s = gr.Slider(0.01, 0.6, value=0.1, step=0.01, label="Decay Time (s)")
1364
  s8bit_vibrato_rate = gr.Slider(0, 20, value=5, label="Vibrato Rate (Hz)")
1365
  s8bit_vibrato_depth = gr.Slider(0, 50, value=0, label="Vibrato Depth (Hz)")
1366
- s8bit_bass_boost_level = gr.Slider(minimum=0.0, maximum=1.0, value=0.0, step=0.1, label="Bass Boost Level", info="Adjusts the volume of the sub-octave. 0 is off.")
1367
- s8bit_smooth_notes = gr.Checkbox(value=True, label="Smooth Notes", info="Applies a tiny fade-in/out to notes to reduce clicking.")
1368
- s8bit_continuous_vibrato = gr.Checkbox(value=True, label="Continuous Vibrato", info="Prevents vibrato from resetting on each note.")
1369
-
1370
  # --- New accordion for advanced effects ---
1371
  with gr.Accordion("Advanced Synthesis & FX", open=False):
1372
  s8bit_noise_level = gr.Slider(minimum=0.0, maximum=1.0, value=0.0, step=0.05, label="Noise Level", info="Mixes in white noise. Great for percussion or adding 'air'.")
1373
  s8bit_distortion_level = gr.Slider(minimum=0.0, maximum=0.9, value=0.0, step=0.05, label="Distortion Level", info="Applies wave-shaping distortion for a grittier, harsher sound.")
1374
  s8bit_fm_modulation_depth = gr.Slider(minimum=0.0, maximum=1.0, value=0.0, step=0.05, label="FM Depth", info="Depth of Frequency Modulation. Creates complex, metallic, or bell-like tones.")
1375
  s8bit_fm_modulation_rate = gr.Slider(minimum=0.0, maximum=500.0, value=0.0, step=1.0, label="FM Rate", info="Rate of Frequency Modulation. Higher values create brighter, more complex harmonics.")
1376
-
1377
  # --- Original Advanced Options (Now tied to Piano-Specific) ---
1378
  with gr.Accordion("Advanced MIDI Rendering Options", open=False) as advanced_rendering_options:
1379
  render_with_sustains = gr.Checkbox(label="Apply sustain pedal effects (if present)", value=True)
@@ -1404,38 +1771,44 @@ if __name__ == "__main__":
1404
  output_midi_summary = gr.Textbox(label="MIDI metadata summary", lines=4)
1405
 
1406
  # Define all input components for the click event, excluding the preset selector which is not a direct input to the final processing.
 
 
1407
  all_inputs = [
1408
- input_file,
1409
- enable_stereo_processing,
1410
- transcription_method,
1411
- onset_threshold, frame_threshold, minimum_note_length, minimum_frequency, maximum_frequency,
1412
- infer_onsets, melodia_trick, multiple_pitch_bends,
1413
- render_type, soundfont_bank, render_sample_rate,
1414
- render_with_sustains, merge_misaligned_notes, custom_render_patch, render_align,
1415
- render_transpose_value, render_transpose_to_C4, render_output_as_solo_piano, render_remove_drums,
1416
  s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
1417
  s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth, s8bit_bass_boost_level,
1418
- s8bit_smooth_notes, s8bit_continuous_vibrato,
1419
  s8bit_noise_level, s8bit_distortion_level, s8bit_fm_modulation_depth, s8bit_fm_modulation_rate
1420
  ]
1421
- all_outputs = [
 
 
1422
  output_midi_md5, output_midi_title, output_midi_summary,
1423
  output_midi, output_audio, output_plot, output_song_description
1424
  ]
1425
-
1426
- # Define the output components for the preset updater function.
1427
- s8bit_updater_outputs = [
1428
- s8bit_waveform_type, s8bit_pulse_width, s8bit_envelope_type,
1429
- s8bit_decay_time_s, s8bit_vibrato_rate, s8bit_vibrato_depth,
1430
- s8bit_smooth_notes, s8bit_continuous_vibrato, s8bit_bass_boost_level,
 
1431
  s8bit_noise_level, s8bit_distortion_level, s8bit_fm_modulation_depth, s8bit_fm_modulation_rate
1432
  ]
1433
 
1434
- # --- Event Handling ---
 
 
 
1435
  submit_btn.click(
1436
  process_and_render_file,
1437
  inputs=all_inputs,
1438
- outputs=all_outputs
1439
  )
1440
 
1441
  # --- Listeners for dynamic UI updates ---
@@ -1450,14 +1823,16 @@ if __name__ == "__main__":
1450
  outputs=[general_transcription_settings, synth_8bit_settings]
1451
  )
1452
 
 
1453
  # --- Event listener for the preset selector ---
1454
  # When the preset dropdown changes, it calls the `apply_8bit_preset` function.
1455
  # The input to the function is the selected preset name.
1456
  # The outputs are all the individual 8-bit setting components that need to be updated.
 
1457
  s8bit_preset_selector.change(
1458
  fn=apply_8bit_preset,
1459
  inputs=[s8bit_preset_selector],
1460
- outputs=s8bit_updater_outputs
1461
  )
1462
 
1463
 
 
166
  # =================================================================================================
167
  def synthesize_8bit_style(midi_data, waveform_type, envelope_type, decay_time_s, pulse_width,
168
  vibrato_rate, vibrato_depth, bass_boost_level, fs=44100,
169
+ smooth_notes_level=0.0, continuous_vibrato_level=0.0,
170
+ noise_level=0.0, distortion_level=0.0,
171
+ fm_modulation_depth=0.0, fm_modulation_rate=0.0):
172
  """
173
  Synthesizes an 8-bit style audio waveform from a PrettyMIDI object.
174
  This function generates waveforms manually instead of using a synthesizer like FluidSynth.
175
  Includes an optional sub-octave bass booster with adjustable level.
176
  Instruments are panned based on their order in the MIDI file.
177
  Instrument 1 -> Left, Instrument 2 -> Right.
178
+ Now supports graded levels for smoothing and vibrato continuity.
179
  """
180
  total_duration = midi_data.get_end_time()
181
  # Initialize a stereo waveform buffer (2 channels: Left, Right)
 
215
 
216
  t = np.arange(num_samples) / fs
217
 
218
+ # --- Graded Continuous Vibrato ---
219
+ # This now interpolates between a fully reset vibrato and a fully continuous one.
220
+ # Use accumulated phase to avoid vibrato reset per note
221
+ vib_phase_inc = 2 * np.pi * vibrato_rate / fs
222
+ per_note_vib_phase = 2 * np.pi * vibrato_rate * t
223
+ continuous_vib_phase = vibrato_phase + np.arange(num_samples) * vib_phase_inc
224
+
225
+ # Weighted average of the two phase types
226
+ final_vib_phase = (
227
+ per_note_vib_phase * (1 - continuous_vibrato_level) +
228
+ continuous_vib_phase * continuous_vibrato_level
229
+ )
230
+ vibrato_lfo = vibrato_depth * np.sin(final_vib_phase)
231
+
232
+ # Update the global vibrato phase for the next note
233
+ if num_samples > 0:
234
+ vibrato_phase = (continuous_vib_phase[-1] + vib_phase_inc) % (2 * np.pi)
235
+
236
+ # --- Waveform Generation with FM ---
237
+ fm_lfo = fm_modulation_depth * np.sin(2 * np.pi * fm_modulation_rate * t)
238
+ modulated_freq = freq * (1 + fm_lfo)
239
+
240
  # --- Waveform Generation (Main Oscillator with phase continuity) ---
241
+ phase_inc = 2 * np.pi * (modulated_freq + vibrato_lfo) / fs
242
  phase = osc_phase[i] + np.cumsum(phase_inc)
243
+ if num_samples > 0:
244
+ osc_phase[i] = phase[-1] % (2 * np.pi) # Store last phase
245
 
246
  if waveform_type == 'Square':
247
  note_waveform = signal.square(phase, duty=pulse_width)
248
  elif waveform_type == 'Sawtooth':
249
  note_waveform = signal.sawtooth(phase)
250
+ else: # Triangle
251
  note_waveform = signal.sawtooth(phase, width=0.5)
252
 
253
  # --- Bass Boost (Sub-Octave Oscillator) ---
 
264
  main_level = 1.0 - (0.5 * bass_boost_level)
265
  note_waveform = (note_waveform * main_level) + (bass_sub_waveform * bass_boost_level)
266
 
267
+ # --- Noise & Distortion Simulation (White Noise) ---
268
  if noise_level > 0:
269
+ note_waveform += np.random.uniform(-1, 1, num_samples) * noise_level
 
270
 
271
  # --- Distortion (Wave Shaping) ---
272
  if distortion_level > 0:
273
+ # Using a tanh function for a smoother, "warmer" distortion
274
+ note_waveform = np.tanh(note_waveform * (1 + distortion_level * 5))
 
 
 
 
 
 
 
 
 
 
 
 
275
 
276
  # --- ADSR Envelope ---
277
  start_amp = note.velocity / 127.0
278
  envelope = np.zeros(num_samples)
279
 
280
  if envelope_type == 'Plucky (AD Envelope)':
281
+ attack_samples = min(int(0.005 * fs), num_samples)
 
282
  decay_samples = min(int(decay_time_s * fs), num_samples - attack_samples)
283
 
284
  envelope[:attack_samples] = np.linspace(0, start_amp, attack_samples)
285
  if decay_samples > 0:
286
  envelope[attack_samples:attack_samples+decay_samples] = np.linspace(start_amp, 0, decay_samples)
287
+ else: # Sustained
288
  envelope = np.linspace(start_amp, 0, num_samples)
289
 
290
+ # --- Graded Note Smoothing ---
291
+ # The level controls the length of the fade in/out. Max fade is 10ms.
292
+ if smooth_notes_level > 0 and num_samples > 10:
293
+ fade_length = int(fs * 0.01 * smooth_notes_level)
294
+ fade_samples = min(fade_length, num_samples // 2)
295
+ if fade_samples > 0:
296
+ envelope[:fade_samples] *= np.linspace(0.5, 1.0, fade_samples)
297
+ envelope[-fade_samples:] *= np.linspace(1.0, 0.0, fade_samples)
298
 
299
  # Apply envelope to the (potentially combined) waveform
300
  note_waveform *= envelope
 
575
  # --- 8-bit synth params ---
576
  s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
577
  s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth,
578
+ s8bit_bass_boost_level, s8bit_smooth_notes_level, s8bit_continuous_vibrato_level,
579
  s8bit_noise_level, s8bit_distortion_level, s8bit_fm_modulation_depth, s8bit_fm_modulation_rate
580
  ):
581
  """
 
802
  s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth,
803
  s8bit_bass_boost_level,
804
  fs=srate,
805
+ smooth_notes_level=s8bit_smooth_notes_level,
806
+ continuous_vibrato_level=s8bit_continuous_vibrato_level,
807
  noise_level=s8bit_noise_level,
808
  distortion_level=s8bit_distortion_level,
809
  fm_modulation_depth=s8bit_fm_modulation_depth,
 
858
 
859
  return new_md5_hash, fn1, output_midi_summary, midi_to_render_path, (srate, audio_out), output_plot, song_description
860
 
861
+
862
+ def analyze_midi_features(midi_data):
863
+ """
864
+ Analyzes a PrettyMIDI object to extract musical features for parameter recommendation.
865
+
866
+ Args:
867
+ midi_data (pretty_midi.PrettyMIDI): The MIDI data to analyze.
868
+
869
+ Returns:
870
+ dict or None: A dictionary containing features, or None if the MIDI is empty.
871
+ Features: 'note_count', 'instruments_count', 'duration',
872
+ 'note_density', 'avg_velocity', 'pitch_range'.
873
+ """
874
+ all_notes = [note for instrument in midi_data.instruments for note in instrument.notes]
875
+ note_count = len(all_notes)
876
+
877
+ # Return None if the MIDI file has no notes to analyze.
878
+ if note_count == 0:
879
+ return None
880
+
881
+ duration = midi_data.get_end_time()
882
+ # Avoid division by zero for empty-duration MIDI files.
883
+ if duration == 0:
884
+ note_density = 0
885
+ else:
886
+ note_density = note_count / duration
887
+
888
+ # --- Calculate new required features ---
889
+ avg_velocity = sum(note.velocity for note in all_notes) / note_count
890
+ avg_pitch = sum(note.pitch for note in all_notes) / note_count
891
+ avg_note_length = sum(note.end - note.start for note in all_notes) / note_count
892
+
893
+ # Calculate pitch range
894
+ if note_count > 1:
895
+ min_pitch = min(note.pitch for note in all_notes)
896
+ max_pitch = max(note.pitch for note in all_notes)
897
+ pitch_range = max_pitch - min_pitch
898
+ else:
899
+ pitch_range = 0
900
+
901
+ return {
902
+ 'note_count': note_count,
903
+ 'instruments_count': len(midi_data.instruments),
904
+ 'duration': duration,
905
+ 'note_density': note_density, # Notes per second
906
+ 'avg_velocity': avg_velocity,
907
+ 'pitch_range': pitch_range, # In semitones
908
+ 'avg_pitch': avg_pitch,
909
+ 'avg_note_length': avg_note_length,
910
+ }
911
+
912
+ def determine_waveform_type(features):
913
+ """
914
+ Determines the best waveform type based on analyzed MIDI features.
915
+ - Square: Best for most general-purpose, bright melodies.
916
+ - Sawtooth: Best for intense, heavy, or powerful leads and basses.
917
+ - Triangle: Best for soft, gentle basses or flute-like sounds.
918
+
919
+ Args:
920
+ features (dict): The dictionary of features from analyze_midi_features.
921
+
922
+ Returns:
923
+ str: The recommended waveform type ('Square', 'Sawtooth', or 'Triangle').
924
+ """
925
+ # 1. Check for conditions that strongly suggest a Triangle wave (soft bassline)
926
+ # MIDI Pitch 52 is ~G#3. If the average pitch is below this, it's likely a bass part.
927
+ # If notes are long and the pitch range is narrow, it confirms a simple, melodic bassline.
928
+ if features['avg_pitch'] <= 52 and features['avg_note_length'] >= 0.3 and features['pitch_range'] < 12:
929
+ return "Triangle"
930
+
931
+ # 2. Check for conditions that suggest a Sawtooth wave (intense/complex part)
932
+ # High note density or a very wide pitch range often indicates an aggressive lead or a complex solo.
933
+ # The sawtooth's rich harmonics are perfect for this.
934
+ if features['note_density'] >= 6 or features['pitch_range'] >= 18:
935
+ return "Sawtooth"
936
+
937
+ # 3. Default to the most versatile waveform: Square
938
+ return "Square"
939
+
940
+ def recommend_8bit_params(midi_data, default_preset):
941
+ """
942
+ Recommends 8-bit synthesizer parameters using a unified, factor-based model.
943
+ This "AI" generates a sound profile based on normalized musical features.
944
+
945
+ Args:
946
+ midi_data (pretty_midi.PrettyMIDI): The MIDI data to analyze.
947
+ default_preset (dict): A fallback preset if analysis fails.
948
+
949
+ Returns:
950
+ dict: A dictionary of recommended synthesizer parameters.
951
+ """
952
+ features = analyze_midi_features(midi_data)
953
+ if features is None:
954
+ # Return a default preset if MIDI is empty or cannot be analyzed
955
+ return default_preset
956
+
957
+ # --- Rule-based Parameter Recommendation ---
958
+ params = {}
959
+
960
+ # --- 1. Core Timbre Selection ---
961
+ # Intelligent Waveform Selection
962
+ params['waveform_type'] = determine_waveform_type(features)
963
+ # Determine pulse width *after* knowing the waveform.
964
+ # This only applies if the waveform is Square.
965
+ if params['waveform_type'] == 'Square':
966
+ # For Square waves, use pitch complexity to decide pulse width.
967
+ # Complex melodies get a thinner sound (0.3) for clarity.
968
+ # Simpler melodies get a fuller sound (0.5).
969
+ params['pulse_width'] = 0.3 if features['pitch_range'] > 30 else 0.5
970
+ else:
971
+ # For Sawtooth or Triangle, pulse width is not applicable. Set a default.
972
+ params['pulse_width'] = 0.5
973
+
974
+ # --- 2. Envelope and Rhythm ---
975
+ # Determine envelope type based on note density
976
+ is_plucky = features['note_density'] > 10
977
+ params['envelope_type'] = 'Plucky (AD Envelope)' if is_plucky else 'Sustained (Full Decay)'
978
+ params['decay_time_s'] = 0.15 if is_plucky else 0.4
979
+
980
+ # --- 3. Modulation (Vibrato) ---
981
+ # Vibrato depth and rate based on velocity and density
982
+ params['vibrato_depth'] = min(max((features['avg_velocity'] - 60) / 20, 0), 10) # More velocity = more depth
983
+ if features['note_density'] > 12:
984
+ params['vibrato_rate'] = 7.0 # Very fast music -> frantic vibrato
985
+ elif features['note_density'] > 6:
986
+ params['vibrato_rate'] = 5.0 # Moderately fast music -> standard vibrato
987
+ else:
988
+ params['vibrato_rate'] = 3.0 # Slow music -> gentle vibrato
989
+
990
+ # --- 4. Progressive/Graded Parameters using Normalization ---
991
+
992
+ # Smooth notes level (0.0 to 1.0): More smoothing for denser passages.
993
+ # Effective range: 3 to 8 notes/sec.
994
+ params['smooth_notes_level'] = min(max((features['note_density'] - 3) / 5.0, 0.0), 1.0) # Smoothen notes in denser passages
995
+
996
+ # Continuous vibrato level (0.0 to 1.0): Less dense passages get more lyrical, continuous vibrato.
997
+ # Effective range: 5 to 10 notes/sec. (Inverted)
998
+ params['continuous_vibrato_level'] = 1.0 - min(max((features['note_density'] - 5) / 5.0, 0.0), 1.0) # Lyrical (less dense) music gets connected vibrato
999
+
1000
+ # Noise level (0.0 to 0.1): Higher velocity passages get more "air" or "grit".
1001
+ # Effective range: velocity 50 to 90.
1002
+ params['noise_level'] = min(max((features['avg_velocity'] - 50) / 40.0, 0.0), 1.0) * 0.1
1003
+
1004
+ # Distortion level (0.0 to 0.1): Shorter notes get more distortion for punch.
1005
+ # Effective range: note length 0.5s down to 0.25s. (Inverted)
1006
+ if features['avg_note_length'] < 0.25: # Short, staccato notes
1007
+ params['distortion_level'] = 0.1
1008
+ elif features['avg_note_length'] < 0.5: # Medium length notes
1009
+ params['distortion_level'] = 0.05
1010
+ else: # Long, sustained notes
1011
+ params['distortion_level'] = 0.0
1012
+
1013
+ # Progressive FM modulation based on a combined complexity factor.
1014
+ # Normalizes note density and pitch range to a 0-1 scale.
1015
+ density_factor = min(max((features['note_density'] - 5) / 15, 0), 1) # Effective range 5-20 notes/sec
1016
+ range_factor = min(max((features['pitch_range'] - 15) / 30, 0), 1) # Effective range 15-45 semitones
1017
+
1018
+ # The overall complexity is the average of these two factors.
1019
+ complexity_factor = (density_factor + range_factor) / 2
1020
+ params['fm_modulation_depth'] = round(0.3 * complexity_factor, 3)
1021
+ params['fm_modulation_rate'] = round(200 * complexity_factor, 1)
1022
+
1023
+ # Non-linear bass boost
1024
+ # REFINED LOGIC: Non-linear bass boost based on instrument count.
1025
+ # More instruments lead to less bass boost to avoid a muddy mix,
1026
+ # while solo or duo arrangements get a significant boost to sound fuller.
1027
+ # The boost level has a floor of 0.2 and a ceiling of 1.0.
1028
+ params['bass_boost_level'] = max(0.2, 1.0 - (features['instruments_count'] - 1) * 0.15)
1029
+
1030
+ # Round all float values for cleaner output
1031
+ for key, value in params.items():
1032
+ if isinstance(value, float):
1033
+ params[key] = round(value, 3)
1034
+
1035
+ return params
1036
+
1037
+
1038
  # =================================================================================================
1039
  # === Main Application Logic ===
1040
  # =================================================================================================
1041
 
1042
+ def process_and_render_file(input_file,
1043
+ # --- Pass the preset selector value ---
1044
+ s8bit_preset_selector,
1045
  # --- Transcription params ---
1046
  enable_stereo_processing,
1047
  transcription_method,
 
1053
  # --- 8-bit synth params ---
1054
  s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
1055
  s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth,
1056
+ s8bit_bass_boost_level, s8bit_smooth_notes_level, s8bit_continuous_vibrato_level,
1057
  s8bit_noise_level, s8bit_distortion_level, s8bit_fm_modulation_depth, s8bit_fm_modulation_rate
1058
  ):
1059
  """
 
1062
  """
1063
  start_time = reqtime.time()
1064
  if input_file is None:
1065
+ # Return a list of updates to clear all output fields and UI controls
1066
+ return [gr.update(value=None)] * (7 + 13) # 7 results + 13 synth controls
1067
 
1068
  # The input_file from gr.Audio(type="filepath") is now the direct path (a string),
1069
  # not a temporary file object. We no longer need to access the .name attribute.
 
1072
  print(f"Processing new file: {filename}")
1073
 
1074
  try:
1075
+ # Mono=False is required to correctly detect stereo channels
1076
  audio_data, native_sample_rate = librosa.load(input_file_path, sr=None, mono=False)
1077
  except Exception as e:
1078
+ # If loading fails, it might be a MIDI file, which librosa cannot handle.
1079
+ # We will proceed, assuming it's a MIDI, and let pretty_midi handle it later.
1080
+ print(f"Could not load as audio: {e}. Assuming it is a MIDI file.")
1081
+ pass
1082
 
1083
  # --- Step 1: Check file type and transcribe if necessary ---
1084
  if filename.lower().endswith(('.mid', '.midi', '.kar')):
 
1093
 
1094
  # === STEREO PROCESSING LOGIC ===
1095
  if enable_stereo_processing:
1096
+ if 'audio_data' not in locals() or audio_data.ndim != 2 or audio_data.shape[0] != 2:
1097
  print("Warning: Audio is not stereo or could not be loaded as stereo. Falling back to mono transcription.")
1098
  enable_stereo_processing = False # Disable stereo processing if audio is not stereo
1099
 
 
1115
  print(f"Saved left channel to: {temp_left_wav_path}")
1116
  print(f"Saved right channel to: {temp_right_wav_path}")
1117
 
1118
+ print("Transcribing left and right channel...")
1119
  if transcription_method == "General Purpose":
1120
  midi_path_left = TranscribeGeneralAudio(temp_left_wav_path, onset_thresh, frame_thresh, min_note_len, min_freq, max_freq, infer_onsets_bool, melodia_trick_bool, multiple_bends_bool)
 
 
 
 
 
1121
  midi_path_right = TranscribeGeneralAudio(temp_right_wav_path, onset_thresh, frame_thresh, min_note_len, min_freq, max_freq, infer_onsets_bool, melodia_trick_bool, multiple_bends_bool)
1122
  else:
1123
+ midi_path_left = TranscribePianoAudio(temp_left_wav_path)
1124
  midi_path_right = TranscribePianoAudio(temp_right_wav_path)
1125
 
1126
  if midi_path_left and midi_path_right:
 
1138
  except Exception as e:
1139
  print(f"An error occurred during stereo processing: {e}")
1140
  raise gr.Error(f"Stereo Processing Failed: {e}")
1141
+ else: # Standard mono transcription
1142
  print("Stereo processing disabled. Using standard mono transcription.")
1143
+ if 'audio_data' in locals():
1144
+ if audio_data.ndim == 1:
1145
+ mono_signal = audio_data
1146
+ else:
1147
+ mono_signal = np.mean(audio_data, axis=0)
 
1148
 
1149
+ normalized_mono = normalize_loudness(mono_signal, native_sample_rate)
1150
+
1151
+ temp_mono_wav_path = os.path.join(temp_dir, f"{base_name}_mono.wav")
1152
+ sf.write(temp_mono_wav_path, normalized_mono, native_sample_rate)
1153
+ input_file_path = temp_mono_wav_path # Use the normalized mono file for transcription
1154
 
1155
  try:
1156
  if transcription_method == "General Purpose":
1157
+ midi_path_for_rendering = TranscribeGeneralAudio(input_file_path, onset_thresh, frame_thresh, min_note_len, min_freq, max_freq, infer_onsets_bool, melodia_trick_bool, multiple_bends_bool)
 
 
 
1158
  else: # Piano-Specific
1159
+ midi_path_for_rendering = TranscribePianoAudio(input_file_path)
 
1160
  except Exception as e:
1161
  print(f"An error occurred during transcription: {e}")
1162
  raise gr.Error(f"Transcription Failed: {e}")
1163
 
1164
  # --- Step 2: Render the MIDI file with selected options ---
1165
+
1166
+ # --- Auto-Recommendation Logic ---
1167
+ # Store the original parameters from the UI sliders into a dictionary.
1168
+ # The keys in this dictionary match the keys returned by recommend_8bit_params.
1169
+ synth_params = {
1170
+ 'waveform_type': s8bit_waveform_type, 'pulse_width': s8bit_pulse_width, 'envelope_type': s8bit_envelope_type,
1171
+ 'decay_time_s': s8bit_decay_time_s, 'vibrato_rate': s8bit_vibrato_rate, 'vibrato_depth': s8bit_vibrato_depth,
1172
+ 'bass_boost_level': s8bit_bass_boost_level, 'smooth_notes_level': s8bit_smooth_notes_level, 'continuous_vibrato_level': s8bit_continuous_vibrato_level,
1173
+ 'noise_level': s8bit_noise_level, 'distortion_level': s8bit_distortion_level,
1174
+ 'fm_modulation_depth': s8bit_fm_modulation_depth, 'fm_modulation_rate': s8bit_fm_modulation_rate,
1175
+ }
1176
+
1177
+ # This variable will hold the values to update the UI sliders
1178
+ ui_updates = {}
1179
+
1180
+ # If the user selected the auto-recommend option, override the parameters
1181
+ if s8bit_preset_selector == "Auto-Recommend (Analyze MIDI)":
1182
+ print("Auto-Recommendation is enabled. Analyzing MIDI features...")
1183
+ try:
1184
+ midi_to_analyze = pretty_midi.PrettyMIDI(midi_path_for_rendering)
1185
+ default_params = S8BIT_PRESETS[FALLBACK_PRESET_NAME]
1186
+ recommended_params = recommend_8bit_params(midi_to_analyze, default_params)
1187
+
1188
+ print("Recommended parameters:", recommended_params)
1189
+ # Both the synthesis parameters and the UI update values are set to the recommendations
1190
+ synth_params.update(recommended_params)
1191
+ ui_updates = recommended_params.copy() # Use a copy for UI updates
1192
+ except Exception as e:
1193
+ print(f"Could not auto-recommend parameters: {e}. Using default values from UI.")
1194
+
1195
  print(f"Proceeding to render MIDI file: {os.path.basename(midi_path_for_rendering)}")
1196
+
1197
+ # --- Correctly pass parameters to Render_MIDI ---
1198
+ # The Render_MIDI function expects positional arguments, not keyword arguments.
1199
+ # We must unpack the values from our synth_params dictionary in the correct order.
1200
  results = Render_MIDI(midi_path_for_rendering,
1201
  render_type, soundfont_bank, render_sample_rate,
1202
  render_with_sustains, merge_misaligned_notes, custom_render_patch, render_align,
1203
  render_transpose_value, render_transpose_to_C4, render_output_as_solo_piano, render_remove_drums,
1204
+ # Unpack the values from the dictionary as positional arguments
1205
+ synth_params['waveform_type'],
1206
+ synth_params['envelope_type'],
1207
+ synth_params['decay_time_s'],
1208
+ synth_params['pulse_width'],
1209
+ synth_params['vibrato_rate'],
1210
+ synth_params['vibrato_depth'],
1211
+ synth_params['bass_boost_level'],
1212
+ synth_params['smooth_notes_level'],
1213
+ synth_params['continuous_vibrato_level'],
1214
+ synth_params['noise_level'],
1215
+ synth_params['distortion_level'],
1216
+ synth_params['fm_modulation_depth'],
1217
+ synth_params['fm_modulation_rate']
1218
  )
1219
 
1220
  print(f'Total processing time: {(reqtime.time() - start_time):.2f} sec')
1221
  print('*' * 70)
1222
 
1223
+ # --- Prepare the final return value for Gradio ---
1224
+
1225
+ # This list defines the order of UI components to be updated.
1226
+ # IT MUST MATCH THE ORDER IN `s8bit_updater_outputs` IN THE MAIN BLOCK.
1227
+ param_order = [
1228
+ 'waveform_type', 'pulse_width', 'envelope_type', 'decay_time_s', 'vibrato_rate',
1229
+ 'vibrato_depth', 'bass_boost_level', 'smooth_notes_level', 'continuous_vibrato_level',
1230
+ 'noise_level', 'distortion_level', 'fm_modulation_depth', 'fm_modulation_rate'
1231
+ ]
1232
+
1233
+ final_ui_updates = []
1234
+ if ui_updates: # If auto-recommendation was successful
1235
+ # We have new values, so we create a list of these values in the correct order.
1236
+ for param in param_order:
1237
+ final_ui_updates.append(ui_updates.get(param))
1238
+ else:
1239
+ # No auto-recommendation, so we tell Gradio not to change the UI.
1240
+ # We send a gr.update() for each UI component.
1241
+ for _ in param_order:
1242
+ final_ui_updates.append(gr.update())
1243
+
1244
+ # The final return is a combination of the result values and the UI update values.
1245
+ return list(results) + final_ui_updates
1246
 
1247
  # =================================================================================================
1248
  # === Gradio UI Setup ===
 
1261
  }
1262
 
1263
  # --- Function to apply 8-bit synthesizer presets ---
1264
+ # --- This function must be defined before the UI components that use it ---
1265
  def apply_8bit_preset(preset_name):
1266
  """
1267
  Takes the name of a preset and returns a dictionary of gr.update objects
1268
+ to set the values of all 13 of the 8-bit synthesizer's UI components.
1269
  """
1270
+ # --- Use a list of keys for consistent updates ---
1271
+ param_keys = [
1272
+ 'waveform_type', 'pulse_width', 'envelope_type', 'decay_time_s', 'vibrato_rate',
1273
+ 'vibrato_depth', 'bass_boost_level', 'smooth_notes_level', 'continuous_vibrato_level',
1274
+ 'noise_level', 'distortion_level', 'fm_modulation_depth', 'fm_modulation_rate'
1275
+ ]
1276
+
1277
  # If the user selects "Custom" or the preset is not found, do not change the values.
1278
  if preset_name == "Custom" or preset_name not in S8BIT_PRESETS:
1279
+ # When switching to custom, don't change any values, just return empty updates.
1280
+ return {comp: gr.update() for comp in s8bit_ui_components}
 
 
 
 
 
 
 
 
 
1281
 
1282
  # Get the settings dictionary for the chosen preset.
1283
  settings = S8BIT_PRESETS[preset_name]
1284
 
1285
+ # Create a dictionary mapping UI components to their new values from the preset.
1286
+ update_dict = {}
1287
+ for i, key in enumerate(param_keys):
1288
+ component = s8bit_ui_components[i]
1289
+ value = settings.get(key)
1290
+ if value is not None:
1291
+ update_dict[component] = gr.update(value=value)
1292
+ else:
1293
+ update_dict[component] = gr.update()
1294
+ return update_dict
1295
+
 
1296
 
1297
  if __name__ == "__main__":
1298
  # Initialize the app: download model (if needed) and apply patches
 
1309
  print("\nWARNING: No SoundFonts were found or could be downloaded.")
1310
  print("Rendering with SoundFonts will fail. Only the 8-bit synthesizer will be available.")
1311
 
1312
+ # --- Define a constant for the fallback preset name ---
1313
+ # This prevents errors if the preset name is changed in the dictionary.
1314
+ FALLBACK_PRESET_NAME = "Generic Chiptune Loop"
1315
+
1316
  # --- Data structure for 8-bit synthesizer presets ---
1317
  # Comprehensive preset dictionary with new FX parameters for all presets
1318
  # Comprehensive preset dictionary including new JRPG and Handheld classics
1319
  # Note: Vibrato depth is mapped to a representative value on the 0-50 Hz slider.
1320
  S8BIT_PRESETS = {
1321
  # --- Rhythmic & Action ---
1322
+ "Rhythm Pop Lead (Rhythm Tengoku / リズム天国)": {
1323
  # Description: A clean, round square wave perfect for the snappy, catchy feel of rhythm games.
1324
+ 'waveform_type': 'Square', 'pulse_width': 0.5, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.18,
1325
+ 'vibrato_rate': 4.5, 'vibrato_depth': 4,
1326
+ 'smooth_notes_level': 0.9, # Formerly True -> 1.0; slightly reduced for a bit more attack.
1327
+ 'continuous_vibrato_level': 0.8, # Formerly True -> 1.0; slightly weakened for more defined note transitions.
1328
+ 'bass_boost_level': 0.3, 'noise_level': 0.0, 'distortion_level': 0.0,
1329
+ 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
1330
  },
1331
+ "Arcade Brawler Lead (Street Fighter / ストリートファイター)": {
1332
  # Description: A gritty sawtooth lead with a hard attack, capturing the high-energy feel of classic fighting games.
1333
+ 'waveform_type': 'Sawtooth', 'pulse_width': 0.5, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.15,
1334
+ 'vibrato_rate': 5.0, 'vibrato_depth': 6,
1335
+ 'smooth_notes_level': 0.8,
1336
+ 'continuous_vibrato_level': 0.7,
1337
+ 'bass_boost_level': 0.4, 'noise_level': 0.05, 'distortion_level': 0.1,
1338
+ 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
1339
  },
1340
+ "Mega Man (Rockman / ロックマン)": {
1341
  # Description: A thin, sharp square wave lead with fast vibrato, iconic for its driving, heroic melodies.
1342
+ 'waveform_type': 'Square', 'pulse_width': 0.2, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.15,
1343
+ 'vibrato_rate': 6.0, 'vibrato_depth': 8,
1344
+ 'smooth_notes_level': 0.9,
1345
+ 'continuous_vibrato_level': 0.85,
1346
+ 'bass_boost_level': 0.3, 'noise_level': 0.0, 'distortion_level': 0.05,
1347
+ 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
1348
  },
1349
+ "Kirby's Bubbly Melody (Hoshi no Kirby / 星のカービィ)": {
1350
  # Description: A soft, round square wave with a bouncy vibrato, creating a cheerful and adorable sound.
1351
+ 'waveform_type': 'Square', 'pulse_width': 0.4, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.2,
1352
+ 'vibrato_rate': 6.0, 'vibrato_depth': 4,
1353
+ 'smooth_notes_level': 0.85,
1354
+ 'continuous_vibrato_level': 0.3, # Formerly False (0.0); adds a hint of continuity for more liveliness.
1355
+ 'bass_boost_level': 0.1, 'noise_level': 0.0, 'distortion_level': 0.0,
1356
+ 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
1357
  },
1358
+ "Mario (Super Mario Bros / スーパーマリオブラザーズ)": {
1359
  # Description: A bright square wave with a per-note vibrato, producing the classic bouncy platformer sound.
1360
+ 'waveform_type': 'Square', 'pulse_width': 0.3, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.25,
1361
+ 'vibrato_rate': 5.0, 'vibrato_depth': 5,
1362
+ 'smooth_notes_level': 0.8,
1363
+ 'continuous_vibrato_level': 0.25,
1364
+ 'bass_boost_level': 0.2, 'noise_level': 0.0, 'distortion_level': 0.0,
1365
+ 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
1366
  },
1367
  # --- Epic & Atmospheric ---
1368
+ "Mecha & Tactics Brass (Super Robot Wars / スーパーロボット大戦)": {
1369
  # Description: A powerful, sustained sawtooth emulating the bold, heroic synth-brass of strategy and mecha anime themes.
1370
+ 'waveform_type': 'Sawtooth', 'pulse_width': 0.5, 'envelope_type': 'Sustained (Full Decay)', 'decay_time_s': 0.4,
1371
+ 'vibrato_rate': 3.5, 'vibrato_depth': 5,
1372
+ 'smooth_notes_level': 0.95,
1373
+ 'continuous_vibrato_level': 0.9,
1374
+ 'bass_boost_level': 0.5, 'noise_level': 0.1, 'distortion_level': 0.15,
1375
+ 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
1376
  },
1377
+ "Mystic Mana Pad (Secret of Mana / 聖剣伝説2)": {
1378
+ # Description: A warm, ethereal square wave pad with slow vibrato, capturing a feeling of fantasy and wonder.
1379
+ 'waveform_type': 'Square', 'pulse_width': 0.5, 'envelope_type': 'Sustained (Full Decay)', 'decay_time_s': 0.5,
1380
+ 'vibrato_rate': 2.5, 'vibrato_depth': 4,
1381
+ 'smooth_notes_level': 1.0,
1382
+ 'continuous_vibrato_level': 0.95,
1383
+ 'bass_boost_level': 0.3, 'noise_level': 0.0, 'distortion_level': 0.0,
1384
+ 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
1385
  },
1386
+ "Dragon Quest (ドラゴンクエスト)": {
1387
  # Description: A pure triangle wave with a long decay, mimicking the grand, orchestral feel of a classical flute or string section.
1388
+ 'waveform_type': 'Triangle', 'pulse_width': 0.5, 'envelope_type': 'Sustained (Full Decay)', 'decay_time_s': 0.6,
1389
+ 'vibrato_rate': 3.0, 'vibrato_depth': 4,
1390
+ 'smooth_notes_level': 0.9,
1391
+ 'continuous_vibrato_level': 0.9,
1392
+ 'bass_boost_level': 0.3, 'noise_level': 0.0, 'distortion_level': 0.0,
1393
+ 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
1394
  },
1395
+ "ONI V (Wafu Mystic / ONI V 隠忍を継ぐ者)": {
1396
  # Description: A solemn triangle wave with a slow, expressive vibrato, evoking the mysterious atmosphere of Japanese folklore.
1397
+ 'waveform_type': 'Triangle', 'pulse_width': 0.5, 'envelope_type': 'Sustained (Full Decay)', 'decay_time_s': 0.4,
1398
+ 'vibrato_rate': 3.5, 'vibrato_depth': 3,
1399
+ 'smooth_notes_level': 0.9,
1400
+ 'continuous_vibrato_level': 0.85,
1401
+ 'bass_boost_level': 0.4, 'noise_level': 0.0, 'distortion_level': 0.0,
1402
+ 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
1403
  },
1404
+ "Zelda (The Legend of Zelda / ゼルダの伝説)": {
1405
  # Description: The classic pure triangle wave lead, perfect for heroic and adventurous overworld themes.
1406
+ 'waveform_type': 'Triangle', 'pulse_width': 0.5, 'envelope_type': 'Sustained (Full Decay)', 'decay_time_s': 0.3,
1407
+ 'vibrato_rate': 4.5, 'vibrato_depth': 4,
1408
+ 'smooth_notes_level': 0.9,
1409
+ 'continuous_vibrato_level': 0.9,
1410
+ 'bass_boost_level': 0.15, 'noise_level': 0.0, 'distortion_level': 0.0,
1411
+ 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
1412
  },
1413
  # --- JRPG & System Classics ---
1414
+ "Falcom Ys (Ys / イース)": {
1415
  # Description: A powerful sawtooth with slight distortion, emulating the driving rock organ and guitar leads of action JRPGs.
1416
+ 'waveform_type': 'Sawtooth', 'pulse_width': 0.5, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.15,
1417
+ 'vibrato_rate': 5.5, 'vibrato_depth': 6,
1418
+ 'smooth_notes_level': 0.85,
1419
+ 'continuous_vibrato_level': 0.8,
1420
+ 'bass_boost_level': 0.4, 'noise_level': 0.05, 'distortion_level': 0.15,
1421
+ 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
1422
  },
1423
+ "Final Fantasy (ファイナルファンタジー)": {
1424
  # Description: A perfect, clean square wave with zero vibrato, creating the iconic, crystal-clear arpeggio sound.
1425
+ 'waveform_type': 'Square', 'pulse_width': 0.5, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.22,
1426
+ 'vibrato_rate': 5.0, 'vibrato_depth': 0,
1427
+ 'smooth_notes_level': 0.9,
1428
+ 'continuous_vibrato_level': 0.2,
1429
+ 'bass_boost_level': 0.2, 'noise_level': 0.0, 'distortion_level': 0.0,
1430
+ 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
1431
  },
1432
+ "Castlevania (Akumajō Dracula / 悪魔城ドラキュラ)": {
1433
  # Description: A sharp square wave with dramatic vibrato, ideal for fast, gothic, and baroque-inspired melodies.
1434
+ 'waveform_type': 'Square', 'pulse_width': 0.25, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.18,
1435
+ 'vibrato_rate': 6.5, 'vibrato_depth': 6,
1436
+ 'smooth_notes_level': 0.85,
1437
+ 'continuous_vibrato_level': 0.85,
1438
+ 'bass_boost_level': 0.35, 'noise_level': 0.0, 'distortion_level': 0.0,
1439
+ 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
1440
  },
1441
+ "Pokémon (Game Boy Classics / ポケットモンスター)": {
1442
  # Description: A full, friendly square wave sound, capturing the cheerful and adventurous spirit of early handheld RPGs.
1443
+ 'waveform_type': 'Square', 'pulse_width': 0.5, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.22,
1444
+ 'vibrato_rate': 5.0, 'vibrato_depth': 5,
1445
+ 'smooth_notes_level': 0.9,
1446
+ 'continuous_vibrato_level': 0.9,
1447
+ 'bass_boost_level': 0.25, 'noise_level': 0.0, 'distortion_level': 0.0,
1448
+ 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
1449
  },
1450
  # --- Advanced System Impressions ---
1451
  "Commodore 64 (SID Feel)": {
1452
  # Description: (Impression) Uses high-speed, shallow vibrato to mimic the characteristic "buzzy" texture of the SID chip's PWM.
1453
+ 'waveform_type': 'Square', 'pulse_width': 0.25, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.25,
1454
+ 'vibrato_rate': 8.0, 'vibrato_depth': 4,
1455
+ 'smooth_notes_level': 0.9,
1456
+ 'continuous_vibrato_level': 0.3,
1457
+ 'bass_boost_level': 0.2, 'noise_level': 0.05, 'distortion_level': 0.1,
1458
+ 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
1459
  },
1460
  "Megadrive/Genesis (FM Grit)": {
1461
  # Description: (Impression) Uses FM, distortion, and noise to capture the gritty, metallic, and aggressive tone of the YM2612 chip.
1462
+ 'waveform_type': 'Sawtooth', 'pulse_width': 0.5, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.18,
1463
+ 'vibrato_rate': 0.0, 'vibrato_depth': 0,
1464
+ 'smooth_notes_level': 0.0,
1465
+ 'continuous_vibrato_level': 0.9,
1466
+ 'bass_boost_level': 0.4, 'noise_level': 0.1, 'distortion_level': 0.2,
1467
+ 'fm_modulation_depth': 0.2, 'fm_modulation_rate': 150
1468
  },
1469
+ "PC-98 (Touhou Feel / 東方Project)": {
1470
  # Description: (Impression) A very sharp square wave with fast FM, emulating the bright, high-energy leads of Japanese PC games.
1471
+ 'waveform_type': 'Square', 'pulse_width': 0.15, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.12,
1472
+ 'vibrato_rate': 7.5, 'vibrato_depth': 7,
1473
+ 'smooth_notes_level': 0.95,
1474
+ 'continuous_vibrato_level': 0.85,
1475
+ 'bass_boost_level': 0.3, 'noise_level': 0.0, 'distortion_level': 0.0,
1476
+ 'fm_modulation_depth': 0.1, 'fm_modulation_rate': 200
1477
  },
1478
  "Roland SC-88 (GM Vibe)": {
1479
  # Description: (Impression) A clean, stable triangle wave with no effects, mimicking the polished, sample-based sounds of General MIDI.
1480
+ 'waveform_type': 'Triangle', 'pulse_width': 0.5, 'envelope_type': 'Sustained (Full Decay)', 'decay_time_s': 0.35,
1481
+ 'vibrato_rate': 0, 'vibrato_depth': 0,
1482
+ 'smooth_notes_level': 1.0,
1483
+ 'continuous_vibrato_level': 0.0,
1484
+ 'bass_boost_level': 0.1, 'noise_level': 0.0, 'distortion_level': 0.0,
1485
+ 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
1486
  },
1487
  # --- Experimental & Sound FX ---
1488
  "Sci-Fi Energy Field": {
1489
  # Description: (SFX) High-speed vibrato and noise create a constant, shimmering hum suitable for energy shields or force fields.
1490
+ 'waveform_type': 'Triangle', 'pulse_width': 0.5, 'envelope_type': 'Sustained (Full Decay)', 'decay_time_s': 0.4,
1491
+ 'vibrato_rate': 10.0, 'vibrato_depth': 3,
1492
+ 'smooth_notes_level': 0.85,
1493
+ 'continuous_vibrato_level': 0.9,
1494
+ 'bass_boost_level': 0.1, 'noise_level': 0.1, 'distortion_level': 0.0,
1495
+ 'fm_modulation_depth': 0.05, 'fm_modulation_rate': 50
1496
  },
1497
  "Industrial Alarm": {
1498
  # Description: (SFX) Extreme vibrato rate on a sawtooth wave produces a harsh, metallic, dissonant alarm sound.
1499
+ 'waveform_type': 'Sawtooth', 'pulse_width': 0.5, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.2,
1500
+ 'vibrato_rate': 15.0, 'vibrato_depth': 8,
1501
+ 'smooth_notes_level': 0.0,
1502
+ 'continuous_vibrato_level': 0.0,
1503
+ 'bass_boost_level': 0.3, 'noise_level': 0.2, 'distortion_level': 0.3,
1504
+ 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
1505
  },
1506
  "Laser Charge-Up": {
1507
  # Description: (SFX) Extreme vibrato depth creates a dramatic, rising pitch effect, perfect for sci-fi weapon sounds.
1508
+ 'waveform_type': 'Sawtooth', 'pulse_width': 0.5, 'envelope_type': 'Sustained (Full Decay)', 'decay_time_s': 0.3,
1509
+ 'vibrato_rate': 4.0, 'vibrato_depth': 25,
1510
+ 'smooth_notes_level': 0.9,
1511
+ 'continuous_vibrato_level': 0.95,
1512
+ 'bass_boost_level': 0.2, 'noise_level': 0.0, 'distortion_level': 0.0,
1513
+ 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
1514
  },
1515
  "Unstable Machine Core": {
1516
  # Description: (SFX) Maximum depth and distortion create a chaotic, atonal noise, simulating a machine on the verge of exploding.
1517
+ 'waveform_type': 'Sawtooth', 'pulse_width': 0.5, 'envelope_type': 'Sustained (Full Decay)', 'decay_time_s': 0.5,
1518
+ 'vibrato_rate': 1.0, 'vibrato_depth': 50,
1519
+ 'smooth_notes_level': 0.0,
1520
+ 'continuous_vibrato_level': 0.9,
1521
+ 'bass_boost_level': 0.5, 'noise_level': 0.3, 'distortion_level': 0.4,
1522
+ 'fm_modulation_depth': 0.5, 'fm_modulation_rate': 10
1523
  },
1524
  "Hardcore Gabber Kick": {
1525
  # Description: (Experimental) Maximum bass boost and distortion create an overwhelmingly powerful, clipped kick drum sound.
1526
+ 'waveform_type': 'Sawtooth', 'pulse_width': 0.5, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.1,
1527
+ 'vibrato_rate': 0, 'vibrato_depth': 0,
1528
+ 'smooth_notes_level': 0.0,
1529
+ 'continuous_vibrato_level': 0.0,
1530
+ 'bass_boost_level': 0.8, 'noise_level': 0.2, 'distortion_level': 0.5,
1531
+ 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
1532
  },
1533
  # --- Utility ---
1534
  "Generic Chiptune Loop": {
1535
  # Description: A well-balanced, pleasant square wave lead that serves as a great starting point for custom sounds.
1536
+ 'waveform_type': 'Square', 'pulse_width': 0.25, 'envelope_type': 'Plucky (AD Envelope)', 'decay_time_s': 0.2,
1537
+ 'vibrato_rate': 5.5, 'vibrato_depth': 4,
1538
+ 'smooth_notes_level': 0.9,
1539
+ 'continuous_vibrato_level': 0.85,
1540
+ 'bass_boost_level': 0.25, 'noise_level': 0.0, 'distortion_level': 0.0,
1541
+ 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
1542
  },
1543
+ "Dark/Boss Atmosphere (Shin Megami Tensei / 真・女神転生)": {
1544
+ # Description: An aggressive sawtooth, inspired by the dark, rock-infused themes of SMT.
1545
+ 'waveform_type': 'Sawtooth', 'pulse_width': 0.5, 'envelope_type': 'Sustained (Full Decay)', 'decay_time_s': 0.35,
1546
+ 'vibrato_rate': 7.0, 'vibrato_depth': 12,
1547
+ 'smooth_notes_level': 0.1,
1548
+ 'continuous_vibrato_level': 0.0,
1549
+ 'bass_boost_level': 0.4, 'noise_level': 0.15, 'distortion_level': 0.25,
1550
+ 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
1551
+ },
1552
+ "Modern JRPG Pad (Persona / ペルソナ)": {
1553
+ # Description: A warm, stylish square wave pad, capturing the modern, pop/jazz-infused feel of the Persona series.
1554
+ 'waveform_type': 'Square', 'pulse_width': 0.5, 'envelope_type': 'Sustained (Full Decay)', 'decay_time_s': 0.5,
1555
+ 'vibrato_rate': 2.5, 'vibrato_depth': 4,
1556
+ 'smooth_notes_level': 1.0,
1557
+ 'continuous_vibrato_level': 0.95,
1558
+ 'bass_boost_level': 0.3, 'noise_level': 0.0, 'distortion_level': 0.0,
1559
+ 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
1560
+ },
1561
+ "Tactical Brass (Fire Emblem / ファイアーエムブレム)": {
1562
+ # Description: A powerful, sustained sawtooth emulating the bold, heroic synth-brass of Fire Emblem's tactical themes.
1563
+ 'waveform_type': 'Sawtooth', 'pulse_width': 0.5, 'envelope_type': 'Sustained (Full Decay)', 'decay_time_s': 0.4,
1564
+ 'vibrato_rate': 3.5, 'vibrato_depth': 5,
1565
+ 'smooth_notes_level': 0.95,
1566
+ 'continuous_vibrato_level': 0.9,
1567
+ 'bass_boost_level': 0.5, 'noise_level': 0.1, 'distortion_level': 0.15,
1568
+ 'fm_modulation_depth': 0.0, 'fm_modulation_rate': 0.0
1569
  }
1570
  }
1571
 
1572
  app = gr.Blocks(theme=gr.themes.Base())
1573
+
1574
  with app:
1575
  gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>Audio-to-MIDI & Advanced Renderer</h1>")
1576
  gr.Markdown(
 
1713
  # - High: Creates fast modulation, resulting in bright, complex, often metallic harmonics and sidebands.
1714
  # =================================================================================
1715
  #
1716
+ # --- New option for auto-recommendation ---
1717
+ # Define the 8-bit UI components in one place for easy reference
1718
  with gr.Accordion("8-bit Synthesizer Settings", open=False, visible=False) as synth_8bit_settings:
1719
+ # --- Preset selector dropdown ---
1720
  s8bit_preset_selector = gr.Dropdown(
1721
+ choices=["Custom", "Auto-Recommend (Analyze MIDI)"] + list(S8BIT_PRESETS.keys()),
1722
  value="Custom",
1723
  label="Style Preset",
1724
  info="Select a preset to auto-fill the settings below. Choose 'Custom' for manual control.\nFor reference and entertainment only. These presets are not guaranteed to be perfectly accurate."
 
1727
  s8bit_waveform_type = gr.Dropdown(['Square', 'Sawtooth', 'Triangle'], value='Square', label="Waveform Type")
1728
  s8bit_pulse_width = gr.Slider(0.01, 0.99, value=0.5, step=0.01, label="Pulse Width (Square Wave Only)")
1729
  s8bit_envelope_type = gr.Dropdown(['Plucky (AD Envelope)', 'Sustained (Full Decay)'], value='Plucky (AD Envelope)', label="Envelope Type")
1730
+ s8bit_decay_time_s = gr.Slider(0.01, 0.6, value=0.1, step=0.01, label="Decay Time (s)") # Increased max to 0.6 for DQ style
1731
  s8bit_vibrato_rate = gr.Slider(0, 20, value=5, label="Vibrato Rate (Hz)")
1732
  s8bit_vibrato_depth = gr.Slider(0, 50, value=0, label="Vibrato Depth (Hz)")
1733
+ s8bit_bass_boost_level = gr.Slider(minimum=0.0, maximum=1.0, value=0.0, step=0.05, label="Bass Boost Level", info="Adjusts the volume of the sub-octave. 0 is off.")
1734
+ s8bit_smooth_notes_level = gr.Slider(minimum=0.0, maximum=1.0, value=0.0, step=0.05, label="Smooth Notes Level", info="Level of fade-in/out to reduce clicks. 0=off, 1=max.")
1735
+ s8bit_continuous_vibrato_level = gr.Slider(minimum=0.0, maximum=1.0, value=0.0, step=0.05, label="Continuous Vibrato Level", info="Controls vibrato continuity. 0=resets per note, 1=fully continuous.")
1736
+
1737
  # --- New accordion for advanced effects ---
1738
  with gr.Accordion("Advanced Synthesis & FX", open=False):
1739
  s8bit_noise_level = gr.Slider(minimum=0.0, maximum=1.0, value=0.0, step=0.05, label="Noise Level", info="Mixes in white noise. Great for percussion or adding 'air'.")
1740
  s8bit_distortion_level = gr.Slider(minimum=0.0, maximum=0.9, value=0.0, step=0.05, label="Distortion Level", info="Applies wave-shaping distortion for a grittier, harsher sound.")
1741
  s8bit_fm_modulation_depth = gr.Slider(minimum=0.0, maximum=1.0, value=0.0, step=0.05, label="FM Depth", info="Depth of Frequency Modulation. Creates complex, metallic, or bell-like tones.")
1742
  s8bit_fm_modulation_rate = gr.Slider(minimum=0.0, maximum=500.0, value=0.0, step=1.0, label="FM Rate", info="Rate of Frequency Modulation. Higher values create brighter, more complex harmonics.")
1743
+
1744
  # --- Original Advanced Options (Now tied to Piano-Specific) ---
1745
  with gr.Accordion("Advanced MIDI Rendering Options", open=False) as advanced_rendering_options:
1746
  render_with_sustains = gr.Checkbox(label="Apply sustain pedal effects (if present)", value=True)
 
1771
  output_midi_summary = gr.Textbox(label="MIDI metadata summary", lines=4)
1772
 
1773
  # Define all input components for the click event, excluding the preset selector which is not a direct input to the final processing.
1774
+ # all_inputs now includes the preset selector itself
1775
+ # Inputs for the main processing function
1776
  all_inputs = [
1777
+ input_file, s8bit_preset_selector, enable_stereo_processing,
1778
+ transcription_method, onset_threshold, frame_threshold, minimum_note_length,
1779
+ minimum_frequency, maximum_frequency, infer_onsets, melodia_trick, multiple_pitch_bends,
1780
+ render_type, soundfont_bank, render_sample_rate, render_with_sustains,
1781
+ merge_misaligned_notes, custom_render_patch, render_align, render_transpose_value,
1782
+ render_transpose_to_C4, render_output_as_solo_piano, render_remove_drums,
 
 
1783
  s8bit_waveform_type, s8bit_envelope_type, s8bit_decay_time_s,
1784
  s8bit_pulse_width, s8bit_vibrato_rate, s8bit_vibrato_depth, s8bit_bass_boost_level,
1785
+ s8bit_smooth_notes_level, s8bit_continuous_vibrato_level,
1786
  s8bit_noise_level, s8bit_distortion_level, s8bit_fm_modulation_depth, s8bit_fm_modulation_rate
1787
  ]
1788
+
1789
+ # Outputs for the main results
1790
+ result_outputs = [
1791
  output_midi_md5, output_midi_title, output_midi_summary,
1792
  output_midi, output_audio, output_plot, output_song_description
1793
  ]
1794
+
1795
+ # The list of 8-bit UI components that can be updated
1796
+ # This MUST be defined after the components themselves are created in the UI.
1797
+ s8bit_ui_components = [
1798
+ s8bit_waveform_type, s8bit_pulse_width, s8bit_envelope_type, s8bit_decay_time_s, s8bit_vibrato_rate,
1799
+ s8bit_vibrato_depth, s8bit_bass_boost_level,
1800
+ s8bit_smooth_notes_level, s8bit_continuous_vibrato_level,
1801
  s8bit_noise_level, s8bit_distortion_level, s8bit_fm_modulation_depth, s8bit_fm_modulation_rate
1802
  ]
1803
 
1804
+ # all_outputs now includes both results AND the UI controls to be updated
1805
+ all_outputs = result_outputs + s8bit_ui_components
1806
+
1807
+ # Event Handling
1808
  submit_btn.click(
1809
  process_and_render_file,
1810
  inputs=all_inputs,
1811
+ outputs=all_outputs # Pass the combined list
1812
  )
1813
 
1814
  # --- Listeners for dynamic UI updates ---
 
1823
  outputs=[general_transcription_settings, synth_8bit_settings]
1824
  )
1825
 
1826
+ # This listener now correctly handles only the named presets, ignoring "Auto-Recommend"
1827
  # --- Event listener for the preset selector ---
1828
  # When the preset dropdown changes, it calls the `apply_8bit_preset` function.
1829
  # The input to the function is the selected preset name.
1830
  # The outputs are all the individual 8-bit setting components that need to be updated.
1831
+ # This listener is for manual preset selection (e.g., choosing "Mega Man")
1832
  s8bit_preset_selector.change(
1833
  fn=apply_8bit_preset,
1834
  inputs=[s8bit_preset_selector],
1835
+ outputs=s8bit_ui_components # This now correctly targets the new sliders
1836
  )
1837
 
1838