Algorithmvoice / app.py
Artificial-superintelligence's picture
Update app.py
20d8ce9 verified
raw
history blame
5.45 kB
import streamlit as st
import librosa
import soundfile as sf
import numpy as np
import scipy.signal as signal
from io import BytesIO
import tempfile
def pitch_shift_with_formant_preservation(y, sr, n_steps):
# Use a smaller frame size for better quality
frame_length = 1024
hop_length = 256
# Apply pitch shifting with smaller frame size
y_shifted = librosa.effects.pitch_shift(
y=y,
sr=sr,
n_steps=n_steps,
bins_per_octave=12,
res_type='kaiser_fast'
)
return y_shifted
def enhance_female_characteristics(y, sr, settings):
# Extract harmonics more gently
y_harmonic, y_percussive = librosa.effects.hpss(
y,
margin=3.0,
kernel_size=31
)
# Enhance harmonics subtly
y_enhanced = y_harmonic * settings['harmonic_boost'] + y * (1 - settings['harmonic_boost'])
# Apply subtle EQ to enhance female characteristics
y_filtered = apply_female_eq(y_enhanced, sr)
return y_filtered
def apply_female_eq(y, sr):
# Design filters for female voice enhancement
# Boost frequencies around 1kHz-2kHz for feminine resonance
b1, a1 = signal.butter(2, [1000/(sr/2), 2000/(sr/2)], btype='band')
y_filtered = signal.filtfilt(b1, a1, y)
# Slight boost in high frequencies (3kHz-5kHz)
b2, a2 = signal.butter(2, [3000/(sr/2), 5000/(sr/2)], btype='band')
y_filtered += 0.3 * signal.filtfilt(b2, a2, y)
return librosa.util.normalize(y_filtered)
def add_breathiness(y, sr, amount):
# Generate more natural breath noise
noise = np.random.normal(0, 0.005, len(y))
# Filter the noise to sound more like breath
b, a = signal.butter(2, 2000/(sr/2), btype='lowpass')
breath_noise = signal.filtfilt(b, a, noise)
# Add filtered noise
y_breathy = y * (1 - amount) + breath_noise * amount
return librosa.util.normalize(y_breathy)
def process_audio_advanced(audio_file, settings):
# Load audio with a higher sample rate
y, sr = librosa.load(audio_file, sr=24000)
# Remove DC offset
y = librosa.util.normalize(y - np.mean(y))
# Apply pitch shifting
y_shifted = pitch_shift_with_formant_preservation(
y,
sr,
settings['pitch_shift']
)
# Enhance female characteristics
y_enhanced = enhance_female_characteristics(y_shifted, sr, settings)
# Add breathiness
if settings['breathiness'] > 0:
y_enhanced = add_breathiness(y_enhanced, sr, settings['breathiness'])
# Final normalization and cleaning
y_final = librosa.util.normalize(y_enhanced)
# Apply final smoothing
y_final = signal.savgol_filter(y_final, 1001, 2)
return y_final, sr
def create_voice_preset(preset_name):
presets = {
'Young Female': {
'pitch_shift': 4.0,
'harmonic_boost': 0.3,
'breathiness': 0.15
},
'Mature Female': {
'pitch_shift': 3.0,
'harmonic_boost': 0.2,
'breathiness': 0.1
},
'Soft Female': {
'pitch_shift': 3.5,
'harmonic_boost': 0.25,
'breathiness': 0.2
}
}
return presets.get(preset_name)
st.title("Improved Female Voice Converter")
uploaded_file = st.file_uploader("Upload an audio file", type=['wav', 'mp3'])
if uploaded_file is not None:
with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
tmp_file.write(uploaded_file.getvalue())
tmp_path = tmp_file.name
preset_name = st.selectbox(
"Select Voice Preset",
['Young Female', 'Mature Female', 'Soft Female', 'Custom']
)
if preset_name == 'Custom':
settings = {
'pitch_shift': st.slider("Pitch Shift", 0.0, 6.0, 4.0, 0.5),
'harmonic_boost': st.slider("Harmonic Enhancement", 0.0, 0.5, 0.3, 0.05),
'breathiness': st.slider("Breathiness", 0.0, 0.3, 0.15, 0.05)
}
else:
settings = create_voice_preset(preset_name)
if st.button("Convert Voice"):
with st.spinner("Processing audio..."):
try:
processed_audio, sr = process_audio_advanced(tmp_path, settings)
# Save to buffer
buffer = BytesIO()
sf.write(buffer, processed_audio, sr, format='WAV')
# Display audio player
st.audio(buffer, format='audio/wav')
# Download button
st.download_button(
label="Download Converted Audio",
data=buffer,
file_name="female_voice_converted.wav",
mime="audio/wav"
)
except Exception as e:
st.error(f"Error processing audio: {str(e)}")
st.markdown("""
### Tips for Best Results:
1. Use high-quality input audio with clear speech
2. Start with presets and adjust if needed
3. Keep pitch shift between 3-5 for most natural results
4. Use minimal breathiness (0.1-0.2) for realistic sound
5. Record in a quiet environment with minimal background noise
### Recommended Settings:
- For younger female voice: pitch shift 4.0, harmonic boost 0.3
- For mature female voice: pitch shift 3.0, harmonic boost 0.2
- For soft female voice: pitch shift 3.5, harmonic boost 0.25
""")