Artificial-superintelligence commited on
Commit
8d24163
·
verified ·
1 Parent(s): 5a29c84

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +158 -51
app.py CHANGED
@@ -2,32 +2,113 @@ import streamlit as st
2
  import librosa
3
  import soundfile as sf
4
  import numpy as np
 
 
 
 
 
5
  from io import BytesIO
6
  import tempfile
7
 
8
- def process_audio(audio_file, pitch_factor=8):
9
- # Load the audio file
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  y, sr = librosa.load(audio_file)
11
 
12
- # Pitch shift using librosa (female voice typically higher pitch)
13
- y_shifted = librosa.effects.pitch_shift(y, sr=sr, n_steps=pitch_factor)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
- # Apply some feminine characteristics
16
- # Smooth the audio slightly
17
- y_smooth = librosa.effects.preemphasis(y_shifted)
18
 
19
- # Normalize audio
20
- y_normalized = librosa.util.normalize(y_smooth)
 
 
 
21
 
22
- return y_normalized, sr
 
 
 
 
 
 
23
 
24
- def save_audio(audio_data, sr):
25
- # Save processed audio to BytesIO object
26
- buffer = BytesIO()
27
- sf.write(buffer, audio_data, sr, format='WAV')
28
- return buffer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
- st.title("Voice Changer - Female Voice Conversion")
31
 
32
  # File uploader
33
  uploaded_file = st.file_uploader("Upload an audio file", type=['wav', 'mp3'])
@@ -38,42 +119,68 @@ if uploaded_file is not None:
38
  tmp_file.write(uploaded_file.getvalue())
39
  tmp_path = tmp_file.name
40
 
41
- # Pitch adjustment slider
42
- pitch_factor = st.slider("Pitch Adjustment", 0.0, 12.0, 8.0, 0.5)
43
-
44
- if st.button("Convert to Female Voice"):
45
- # Process the audio
46
- try:
47
- processed_audio, sr = process_audio(tmp_path, pitch_factor)
48
-
49
- # Save processed audio
50
- audio_buffer = save_audio(processed_audio, sr)
51
-
52
- # Create download button
53
- st.audio(audio_buffer, format='audio/wav')
54
-
55
- # Add download button
56
- st.download_button(
57
- label="Download Converted Audio",
58
- data=audio_buffer,
59
- file_name="female_voice.wav",
60
- mime="audio/wav"
61
- )
62
-
63
- except Exception as e:
64
- st.error(f"Error processing audio: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
- # Add instructions
67
  st.markdown("""
68
- ### Instructions:
69
- 1. Upload a WAV or MP3 audio file
70
- 2. Adjust the pitch slider (higher values = more feminine voice)
71
- 3. Click 'Convert to Female Voice'
72
- 4. Play the converted audio
73
- 5. Download the result if satisfied
 
74
 
75
- ### Notes:
76
- - Best results with clear audio input
77
- - Recommended pitch adjustment: 6-8 for natural-sounding results
78
- - Larger files may take longer to process
 
 
 
 
79
  """)
 
2
  import librosa
3
  import soundfile as sf
4
  import numpy as np
5
+ import scipy.signal as signal
6
+ from scipy.io import wavfile
7
+ import pyworld as world
8
+ import torch
9
+ import torchaudio
10
  from io import BytesIO
11
  import tempfile
12
 
13
+ def enhance_harmonics(y, sr):
14
+ # Extract harmonics using harmonic-percussive source separation
15
+ y_harmonic = librosa.effects.hpss(y)[0]
16
+
17
+ # Enhance the harmonics
18
+ y_enhanced = y_harmonic * 1.2 + y * 0.3
19
+ return librosa.util.normalize(y_enhanced)
20
+
21
+ def modify_formants(y, sr, formant_shift_factor=1.2):
22
+ # Get the power spectrum
23
+ D = librosa.stft(y)
24
+ S = np.abs(D)
25
+
26
+ # Estimate formants using LPC
27
+ order = 12
28
+ a = librosa.lpc(y, order)
29
+
30
+ # Shift formants
31
+ new_a = np.zeros_like(a)
32
+ new_a[0] = a[0]
33
+ for i in range(1, len(a)):
34
+ new_a[i] = a[i] * (formant_shift_factor ** i)
35
+
36
+ # Apply modified LPC filter
37
+ y_formant = signal.lfilter([1], new_a, y)
38
+ return librosa.util.normalize(y_formant)
39
+
40
+ def process_audio_advanced(audio_file, settings):
41
+ # Load audio
42
  y, sr = librosa.load(audio_file)
43
 
44
+ # Extract F0 and spectral envelope using WORLD vocoder
45
+ _f0, t = librosa.piptrack(y=y, sr=sr)
46
+ f0 = np.mean(_f0[_f0 > 0], axis=0)
47
+
48
+ # Pitch shifting with formant preservation
49
+ y_shifted = librosa.effects.pitch_shift(
50
+ y,
51
+ sr=sr,
52
+ n_steps=settings['pitch_shift']
53
+ )
54
+
55
+ # Modify formants
56
+ y_formant = modify_formants(
57
+ y_shifted,
58
+ sr,
59
+ settings['formant_shift']
60
+ )
61
 
62
+ # Enhance harmonics
63
+ y_harmonic = enhance_harmonics(y_formant, sr)
 
64
 
65
+ # Apply vocal tract length normalization
66
+ y_vtln = librosa.effects.time_stretch(
67
+ y_harmonic,
68
+ rate=settings['vtln_factor']
69
+ )
70
 
71
+ # Smooth the output
72
+ y_smooth = signal.savgol_filter(y_vtln, 1001, 2)
73
+
74
+ # Final normalization
75
+ y_final = librosa.util.normalize(y_smooth)
76
+
77
+ return y_final, sr
78
 
79
+ def create_voice_preset(preset_name):
80
+ presets = {
81
+ 'Young Female': {
82
+ 'pitch_shift': 8.0,
83
+ 'formant_shift': 1.3,
84
+ 'vtln_factor': 1.1,
85
+ 'breathiness': 0.3
86
+ },
87
+ 'Mature Female': {
88
+ 'pitch_shift': 6.0,
89
+ 'formant_shift': 1.2,
90
+ 'vtln_factor': 1.05,
91
+ 'breathiness': 0.2
92
+ },
93
+ 'Soft Female': {
94
+ 'pitch_shift': 7.0,
95
+ 'formant_shift': 1.25,
96
+ 'vtln_factor': 1.15,
97
+ 'breathiness': 0.4
98
+ }
99
+ }
100
+ return presets.get(preset_name)
101
+
102
+ def add_breathiness(y, sr, amount=0.3):
103
+ # Generate breath noise
104
+ noise = np.random.normal(0, 0.01, len(y))
105
+ noise_filtered = signal.lfilter([1], [1, -0.98], noise)
106
+
107
+ # Mix with original signal
108
+ y_breathy = y * (1 - amount) + noise_filtered * amount
109
+ return librosa.util.normalize(y_breathy)
110
 
111
+ st.title("Advanced Female Voice Converter")
112
 
113
  # File uploader
114
  uploaded_file = st.file_uploader("Upload an audio file", type=['wav', 'mp3'])
 
119
  tmp_file.write(uploaded_file.getvalue())
120
  tmp_path = tmp_file.name
121
 
122
+ # Voice preset selector
123
+ preset_name = st.selectbox(
124
+ "Select Voice Preset",
125
+ ['Young Female', 'Mature Female', 'Soft Female', 'Custom']
126
+ )
127
+
128
+ if preset_name == 'Custom':
129
+ settings = {
130
+ 'pitch_shift': st.slider("Pitch Shift", 0.0, 12.0, 8.0, 0.5),
131
+ 'formant_shift': st.slider("Formant Shift", 1.0, 1.5, 1.2, 0.05),
132
+ 'vtln_factor': st.slider("Vocal Tract Length", 0.9, 1.2, 1.1, 0.05),
133
+ 'breathiness': st.slider("Breathiness", 0.0, 1.0, 0.3, 0.1)
134
+ }
135
+ else:
136
+ settings = create_voice_preset(preset_name)
137
+
138
+ if st.button("Convert Voice"):
139
+ with st.spinner("Processing audio..."):
140
+ try:
141
+ # Process audio
142
+ processed_audio, sr = process_audio_advanced(tmp_path, settings)
143
+
144
+ # Add breathiness
145
+ processed_audio = add_breathiness(
146
+ processed_audio,
147
+ sr,
148
+ settings['breathiness']
149
+ )
150
+
151
+ # Save to buffer
152
+ buffer = BytesIO()
153
+ sf.write(buffer, processed_audio, sr, format='WAV')
154
+
155
+ # Display audio player
156
+ st.audio(buffer, format='audio/wav')
157
+
158
+ # Download button
159
+ st.download_button(
160
+ label="Download Converted Audio",
161
+ data=buffer,
162
+ file_name="female_voice_converted.wav",
163
+ mime="audio/wav"
164
+ )
165
+
166
+ except Exception as e:
167
+ st.error(f"Error processing audio: {str(e)}")
168
 
 
169
  st.markdown("""
170
+ ### Advanced Features:
171
+ - Formant preservation and shifting
172
+ - Harmonic enhancement
173
+ - Vocal tract length normalization
174
+ - Natural breathiness addition
175
+ - Multiple voice presets
176
+ - Custom parameter adjustment
177
 
178
+ ### Tips for Best Results:
179
+ 1. Use high-quality input audio
180
+ 2. Start with presets and adjust if needed
181
+ 3. For custom settings:
182
+ - Pitch shift: 6-8 for natural female voice
183
+ - Formant shift: 1.1-1.3 for feminine resonance
184
+ - Vocal tract length: 1.05-1.15 for realistic results
185
+ - Breathiness: 0.2-0.4 for natural sound
186
  """)