File size: 5,452 Bytes
aaed37a 2158d6f f034b93 aaed37a 2158d6f 20d8ce9 f034b93 20d8ce9 f034b93 20d8ce9 f034b93 20d8ce9 f034b93 20d8ce9 f034b93 20d8ce9 f034b93 20d8ce9 f034b93 20d8ce9 f034b93 20d8ce9 f034b93 20d8ce9 f034b93 20d8ce9 f034b93 20d8ce9 f034b93 20d8ce9 f034b93 20d8ce9 f034b93 20d8ce9 f034b93 aaed37a f034b93 20d8ce9 f034b93 2158d6f f034b93 8d24163 f034b93 8d24163 f034b93 8d24163 f034b93 aaed37a 8d24163 20d8ce9 f034b93 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 |
import streamlit as st
import librosa
import soundfile as sf
import numpy as np
import scipy.signal as signal
from io import BytesIO
import tempfile
def pitch_shift_with_formant_preservation(y, sr, n_steps):
# Use a smaller frame size for better quality
frame_length = 1024
hop_length = 256
# Apply pitch shifting with smaller frame size
y_shifted = librosa.effects.pitch_shift(
y=y,
sr=sr,
n_steps=n_steps,
bins_per_octave=12,
res_type='kaiser_fast'
)
return y_shifted
def enhance_female_characteristics(y, sr, settings):
# Extract harmonics more gently
y_harmonic, y_percussive = librosa.effects.hpss(
y,
margin=3.0,
kernel_size=31
)
# Enhance harmonics subtly
y_enhanced = y_harmonic * settings['harmonic_boost'] + y * (1 - settings['harmonic_boost'])
# Apply subtle EQ to enhance female characteristics
y_filtered = apply_female_eq(y_enhanced, sr)
return y_filtered
def apply_female_eq(y, sr):
# Design filters for female voice enhancement
# Boost frequencies around 1kHz-2kHz for feminine resonance
b1, a1 = signal.butter(2, [1000/(sr/2), 2000/(sr/2)], btype='band')
y_filtered = signal.filtfilt(b1, a1, y)
# Slight boost in high frequencies (3kHz-5kHz)
b2, a2 = signal.butter(2, [3000/(sr/2), 5000/(sr/2)], btype='band')
y_filtered += 0.3 * signal.filtfilt(b2, a2, y)
return librosa.util.normalize(y_filtered)
def add_breathiness(y, sr, amount):
# Generate more natural breath noise
noise = np.random.normal(0, 0.005, len(y))
# Filter the noise to sound more like breath
b, a = signal.butter(2, 2000/(sr/2), btype='lowpass')
breath_noise = signal.filtfilt(b, a, noise)
# Add filtered noise
y_breathy = y * (1 - amount) + breath_noise * amount
return librosa.util.normalize(y_breathy)
def process_audio_advanced(audio_file, settings):
# Load audio with a higher sample rate
y, sr = librosa.load(audio_file, sr=24000)
# Remove DC offset
y = librosa.util.normalize(y - np.mean(y))
# Apply pitch shifting
y_shifted = pitch_shift_with_formant_preservation(
y,
sr,
settings['pitch_shift']
)
# Enhance female characteristics
y_enhanced = enhance_female_characteristics(y_shifted, sr, settings)
# Add breathiness
if settings['breathiness'] > 0:
y_enhanced = add_breathiness(y_enhanced, sr, settings['breathiness'])
# Final normalization and cleaning
y_final = librosa.util.normalize(y_enhanced)
# Apply final smoothing
y_final = signal.savgol_filter(y_final, 1001, 2)
return y_final, sr
def create_voice_preset(preset_name):
presets = {
'Young Female': {
'pitch_shift': 4.0,
'harmonic_boost': 0.3,
'breathiness': 0.15
},
'Mature Female': {
'pitch_shift': 3.0,
'harmonic_boost': 0.2,
'breathiness': 0.1
},
'Soft Female': {
'pitch_shift': 3.5,
'harmonic_boost': 0.25,
'breathiness': 0.2
}
}
return presets.get(preset_name)
st.title("Improved Female Voice Converter")
uploaded_file = st.file_uploader("Upload an audio file", type=['wav', 'mp3'])
if uploaded_file is not None:
with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
tmp_file.write(uploaded_file.getvalue())
tmp_path = tmp_file.name
preset_name = st.selectbox(
"Select Voice Preset",
['Young Female', 'Mature Female', 'Soft Female', 'Custom']
)
if preset_name == 'Custom':
settings = {
'pitch_shift': st.slider("Pitch Shift", 0.0, 6.0, 4.0, 0.5),
'harmonic_boost': st.slider("Harmonic Enhancement", 0.0, 0.5, 0.3, 0.05),
'breathiness': st.slider("Breathiness", 0.0, 0.3, 0.15, 0.05)
}
else:
settings = create_voice_preset(preset_name)
if st.button("Convert Voice"):
with st.spinner("Processing audio..."):
try:
processed_audio, sr = process_audio_advanced(tmp_path, settings)
# Save to buffer
buffer = BytesIO()
sf.write(buffer, processed_audio, sr, format='WAV')
# Display audio player
st.audio(buffer, format='audio/wav')
# Download button
st.download_button(
label="Download Converted Audio",
data=buffer,
file_name="female_voice_converted.wav",
mime="audio/wav"
)
except Exception as e:
st.error(f"Error processing audio: {str(e)}")
st.markdown("""
### Tips for Best Results:
1. Use high-quality input audio with clear speech
2. Start with presets and adjust if needed
3. Keep pitch shift between 3-5 for most natural results
4. Use minimal breathiness (0.1-0.2) for realistic sound
5. Record in a quiet environment with minimal background noise
### Recommended Settings:
- For younger female voice: pitch shift 4.0, harmonic boost 0.3
- For mature female voice: pitch shift 3.0, harmonic boost 0.2
- For soft female voice: pitch shift 3.5, harmonic boost 0.25
""") |