Artificial-superintelligence commited on
Commit
20d8ce9
·
verified ·
1 Parent(s): 375b8bc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -113
app.py CHANGED
@@ -3,129 +3,123 @@ import librosa
3
  import soundfile as sf
4
  import numpy as np
5
  import scipy.signal as signal
6
- from scipy.io import wavfile
7
  from io import BytesIO
8
  import tempfile
9
 
10
- def modify_formants(y, sr, formant_shift_factor=1.2):
11
- # Get the power spectrum
12
- D = librosa.stft(y)
13
- S = np.abs(D)
14
-
15
- # Use frame-based processing for LPC
16
- frame_length = 2048
17
- hop_length = 512
18
- frames = librosa.util.frame(y, frame_length=frame_length, hop_length=hop_length)
19
-
20
- # Process each frame
21
- modified_frames = []
22
- for frame in frames.T:
23
- # Calculate LPC coefficients
24
- a = librosa.lpc(frame, order=12)
25
-
26
- # Shift formants
27
- new_a = np.zeros_like(a)
28
- new_a[0] = a[0]
29
- for i in range(1, len(a)):
30
- new_a[i] = a[i] * (formant_shift_factor ** i)
31
-
32
- # Apply modified LPC filter
33
- modified_frame = signal.lfilter([1], new_a, frame)
34
- modified_frames.append(modified_frame)
35
-
36
- # Reconstruct the signal
37
- y_formant = np.concatenate([frame[:hop_length] for frame in modified_frames[:-1]] +
38
- [modified_frames[-1]])
39
-
40
- return librosa.util.normalize(y_formant)
41
 
42
- def enhance_harmonics(y, sr):
43
- # Extract harmonics using harmonic-percussive source separation
44
- y_harmonic = librosa.effects.hpss(y)[0]
 
 
 
 
 
 
 
 
 
 
45
 
46
- # Enhance the harmonics
47
- y_enhanced = y_harmonic * 1.2 + y * 0.3
48
- return librosa.util.normalize(y_enhanced)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
  def process_audio_advanced(audio_file, settings):
51
- # Load audio
52
- y, sr = librosa.load(audio_file)
53
 
54
- # Pitch shifting with formant preservation
55
- y_shifted = librosa.effects.pitch_shift(
56
- y,
57
- sr=sr,
58
- n_steps=settings['pitch_shift']
59
- )
60
 
61
- # Modify formants
62
- y_formant = modify_formants(
63
- y_shifted,
64
  sr,
65
- settings['formant_shift']
66
  )
67
 
68
- # Enhance harmonics
69
- y_harmonic = enhance_harmonics(y_formant, sr)
70
 
71
- # Apply vocal tract length modification through resampling
72
- y_vtln = librosa.effects.time_stretch(
73
- y_harmonic,
74
- rate=settings['vtln_factor']
75
- )
76
 
77
- # Smooth the output
78
- y_smooth = signal.savgol_filter(y_vtln, 1001, 2)
79
 
80
- # Final normalization
81
- y_final = librosa.util.normalize(y_smooth)
82
 
83
  return y_final, sr
84
 
85
  def create_voice_preset(preset_name):
86
  presets = {
87
  'Young Female': {
88
- 'pitch_shift': 8.0,
89
- 'formant_shift': 1.3,
90
- 'vtln_factor': 1.1,
91
- 'breathiness': 0.3
92
  },
93
  'Mature Female': {
94
- 'pitch_shift': 6.0,
95
- 'formant_shift': 1.2,
96
- 'vtln_factor': 1.05,
97
- 'breathiness': 0.2
98
  },
99
  'Soft Female': {
100
- 'pitch_shift': 7.0,
101
- 'formant_shift': 1.25,
102
- 'vtln_factor': 1.15,
103
- 'breathiness': 0.4
104
  }
105
  }
106
  return presets.get(preset_name)
107
 
108
- def add_breathiness(y, sr, amount=0.3):
109
- # Generate breath noise
110
- noise = np.random.normal(0, 0.01, len(y))
111
- noise_filtered = signal.lfilter([1], [1, -0.98], noise)
112
-
113
- # Mix with original signal
114
- y_breathy = y * (1 - amount) + noise_filtered * amount
115
- return librosa.util.normalize(y_breathy)
116
 
117
- st.title("Advanced Female Voice Converter")
118
-
119
- # File uploader
120
  uploaded_file = st.file_uploader("Upload an audio file", type=['wav', 'mp3'])
121
 
122
  if uploaded_file is not None:
123
- # Save uploaded file temporarily
124
  with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
125
  tmp_file.write(uploaded_file.getvalue())
126
  tmp_path = tmp_file.name
127
 
128
- # Voice preset selector
129
  preset_name = st.selectbox(
130
  "Select Voice Preset",
131
  ['Young Female', 'Mature Female', 'Soft Female', 'Custom']
@@ -133,10 +127,9 @@ if uploaded_file is not None:
133
 
134
  if preset_name == 'Custom':
135
  settings = {
136
- 'pitch_shift': st.slider("Pitch Shift", 0.0, 12.0, 8.0, 0.5),
137
- 'formant_shift': st.slider("Formant Shift", 1.0, 1.5, 1.2, 0.05),
138
- 'vtln_factor': st.slider("Vocal Tract Length", 0.9, 1.2, 1.1, 0.05),
139
- 'breathiness': st.slider("Breathiness", 0.0, 1.0, 0.3, 0.1)
140
  }
141
  else:
142
  settings = create_voice_preset(preset_name)
@@ -144,16 +137,8 @@ if uploaded_file is not None:
144
  if st.button("Convert Voice"):
145
  with st.spinner("Processing audio..."):
146
  try:
147
- # Process audio
148
  processed_audio, sr = process_audio_advanced(tmp_path, settings)
149
 
150
- # Add breathiness
151
- processed_audio = add_breathiness(
152
- processed_audio,
153
- sr,
154
- settings['breathiness']
155
- )
156
-
157
  # Save to buffer
158
  buffer = BytesIO()
159
  sf.write(buffer, processed_audio, sr, format='WAV')
@@ -173,20 +158,15 @@ if uploaded_file is not None:
173
  st.error(f"Error processing audio: {str(e)}")
174
 
175
  st.markdown("""
176
- ### Voice Conversion Features:
177
- - Pitch shifting with formant preservation
178
- - Harmonic enhancement
179
- - Vocal tract length modification
180
- - Natural breathiness addition
181
- - Multiple voice presets
182
- - Custom parameter controls
183
-
184
  ### Tips for Best Results:
185
- 1. Start with a clear audio recording
186
- 2. Try different presets to find the best match
187
- 3. For custom settings:
188
- - Pitch shift: 6-8 for natural female voice
189
- - Formant shift: 1.1-1.3 for feminine resonance
190
- - Vocal tract length: 1.05-1.15 for realistic results
191
- - Breathiness: 0.2-0.4 for natural sound
 
 
 
192
  """)
 
3
  import soundfile as sf
4
  import numpy as np
5
  import scipy.signal as signal
 
6
  from io import BytesIO
7
  import tempfile
8
 
9
+ def pitch_shift_with_formant_preservation(y, sr, n_steps):
10
+ # Use a smaller frame size for better quality
11
+ frame_length = 1024
12
+ hop_length = 256
13
+
14
+ # Apply pitch shifting with smaller frame size
15
+ y_shifted = librosa.effects.pitch_shift(
16
+ y=y,
17
+ sr=sr,
18
+ n_steps=n_steps,
19
+ bins_per_octave=12,
20
+ res_type='kaiser_fast'
21
+ )
22
+
23
+ return y_shifted
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
+ def enhance_female_characteristics(y, sr, settings):
26
+ # Extract harmonics more gently
27
+ y_harmonic, y_percussive = librosa.effects.hpss(
28
+ y,
29
+ margin=3.0,
30
+ kernel_size=31
31
+ )
32
+
33
+ # Enhance harmonics subtly
34
+ y_enhanced = y_harmonic * settings['harmonic_boost'] + y * (1 - settings['harmonic_boost'])
35
+
36
+ # Apply subtle EQ to enhance female characteristics
37
+ y_filtered = apply_female_eq(y_enhanced, sr)
38
 
39
+ return y_filtered
40
+
41
+ def apply_female_eq(y, sr):
42
+ # Design filters for female voice enhancement
43
+ # Boost frequencies around 1kHz-2kHz for feminine resonance
44
+ b1, a1 = signal.butter(2, [1000/(sr/2), 2000/(sr/2)], btype='band')
45
+ y_filtered = signal.filtfilt(b1, a1, y)
46
+
47
+ # Slight boost in high frequencies (3kHz-5kHz)
48
+ b2, a2 = signal.butter(2, [3000/(sr/2), 5000/(sr/2)], btype='band')
49
+ y_filtered += 0.3 * signal.filtfilt(b2, a2, y)
50
+
51
+ return librosa.util.normalize(y_filtered)
52
+
53
+ def add_breathiness(y, sr, amount):
54
+ # Generate more natural breath noise
55
+ noise = np.random.normal(0, 0.005, len(y))
56
+
57
+ # Filter the noise to sound more like breath
58
+ b, a = signal.butter(2, 2000/(sr/2), btype='lowpass')
59
+ breath_noise = signal.filtfilt(b, a, noise)
60
+
61
+ # Add filtered noise
62
+ y_breathy = y * (1 - amount) + breath_noise * amount
63
+ return librosa.util.normalize(y_breathy)
64
 
65
  def process_audio_advanced(audio_file, settings):
66
+ # Load audio with a higher sample rate
67
+ y, sr = librosa.load(audio_file, sr=24000)
68
 
69
+ # Remove DC offset
70
+ y = librosa.util.normalize(y - np.mean(y))
 
 
 
 
71
 
72
+ # Apply pitch shifting
73
+ y_shifted = pitch_shift_with_formant_preservation(
74
+ y,
75
  sr,
76
+ settings['pitch_shift']
77
  )
78
 
79
+ # Enhance female characteristics
80
+ y_enhanced = enhance_female_characteristics(y_shifted, sr, settings)
81
 
82
+ # Add breathiness
83
+ if settings['breathiness'] > 0:
84
+ y_enhanced = add_breathiness(y_enhanced, sr, settings['breathiness'])
 
 
85
 
86
+ # Final normalization and cleaning
87
+ y_final = librosa.util.normalize(y_enhanced)
88
 
89
+ # Apply final smoothing
90
+ y_final = signal.savgol_filter(y_final, 1001, 2)
91
 
92
  return y_final, sr
93
 
94
  def create_voice_preset(preset_name):
95
  presets = {
96
  'Young Female': {
97
+ 'pitch_shift': 4.0,
98
+ 'harmonic_boost': 0.3,
99
+ 'breathiness': 0.15
 
100
  },
101
  'Mature Female': {
102
+ 'pitch_shift': 3.0,
103
+ 'harmonic_boost': 0.2,
104
+ 'breathiness': 0.1
 
105
  },
106
  'Soft Female': {
107
+ 'pitch_shift': 3.5,
108
+ 'harmonic_boost': 0.25,
109
+ 'breathiness': 0.2
 
110
  }
111
  }
112
  return presets.get(preset_name)
113
 
114
+ st.title("Improved Female Voice Converter")
 
 
 
 
 
 
 
115
 
 
 
 
116
  uploaded_file = st.file_uploader("Upload an audio file", type=['wav', 'mp3'])
117
 
118
  if uploaded_file is not None:
 
119
  with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
120
  tmp_file.write(uploaded_file.getvalue())
121
  tmp_path = tmp_file.name
122
 
 
123
  preset_name = st.selectbox(
124
  "Select Voice Preset",
125
  ['Young Female', 'Mature Female', 'Soft Female', 'Custom']
 
127
 
128
  if preset_name == 'Custom':
129
  settings = {
130
+ 'pitch_shift': st.slider("Pitch Shift", 0.0, 6.0, 4.0, 0.5),
131
+ 'harmonic_boost': st.slider("Harmonic Enhancement", 0.0, 0.5, 0.3, 0.05),
132
+ 'breathiness': st.slider("Breathiness", 0.0, 0.3, 0.15, 0.05)
 
133
  }
134
  else:
135
  settings = create_voice_preset(preset_name)
 
137
  if st.button("Convert Voice"):
138
  with st.spinner("Processing audio..."):
139
  try:
 
140
  processed_audio, sr = process_audio_advanced(tmp_path, settings)
141
 
 
 
 
 
 
 
 
142
  # Save to buffer
143
  buffer = BytesIO()
144
  sf.write(buffer, processed_audio, sr, format='WAV')
 
158
  st.error(f"Error processing audio: {str(e)}")
159
 
160
  st.markdown("""
 
 
 
 
 
 
 
 
161
  ### Tips for Best Results:
162
+ 1. Use high-quality input audio with clear speech
163
+ 2. Start with presets and adjust if needed
164
+ 3. Keep pitch shift between 3-5 for most natural results
165
+ 4. Use minimal breathiness (0.1-0.2) for realistic sound
166
+ 5. Record in a quiet environment with minimal background noise
167
+
168
+ ### Recommended Settings:
169
+ - For younger female voice: pitch shift 4.0, harmonic boost 0.3
170
+ - For mature female voice: pitch shift 3.0, harmonic boost 0.2
171
+ - For soft female voice: pitch shift 3.5, harmonic boost 0.25
172
  """)