import streamlit as st import librosa import soundfile as sf import numpy as np import scipy.signal as signal from io import BytesIO import tempfile def pitch_shift_with_formant_preservation(y, sr, n_steps): # Use a smaller frame size for better quality frame_length = 1024 hop_length = 256 # Apply pitch shifting with smaller frame size y_shifted = librosa.effects.pitch_shift( y=y, sr=sr, n_steps=n_steps, bins_per_octave=12, res_type='kaiser_fast' ) return y_shifted def enhance_female_characteristics(y, sr, settings): # Extract harmonics more gently y_harmonic, y_percussive = librosa.effects.hpss( y, margin=3.0, kernel_size=31 ) # Enhance harmonics subtly y_enhanced = y_harmonic * settings['harmonic_boost'] + y * (1 - settings['harmonic_boost']) # Apply subtle EQ to enhance female characteristics y_filtered = apply_female_eq(y_enhanced, sr) return y_filtered def apply_female_eq(y, sr): # Design filters for female voice enhancement # Boost frequencies around 1kHz-2kHz for feminine resonance b1, a1 = signal.butter(2, [1000/(sr/2), 2000/(sr/2)], btype='band') y_filtered = signal.filtfilt(b1, a1, y) # Slight boost in high frequencies (3kHz-5kHz) b2, a2 = signal.butter(2, [3000/(sr/2), 5000/(sr/2)], btype='band') y_filtered += 0.3 * signal.filtfilt(b2, a2, y) return librosa.util.normalize(y_filtered) def add_breathiness(y, sr, amount): # Generate more natural breath noise noise = np.random.normal(0, 0.005, len(y)) # Filter the noise to sound more like breath b, a = signal.butter(2, 2000/(sr/2), btype='lowpass') breath_noise = signal.filtfilt(b, a, noise) # Add filtered noise y_breathy = y * (1 - amount) + breath_noise * amount return librosa.util.normalize(y_breathy) def process_audio_advanced(audio_file, settings): # Load audio with a higher sample rate y, sr = librosa.load(audio_file, sr=24000) # Remove DC offset y = librosa.util.normalize(y - np.mean(y)) # Apply pitch shifting y_shifted = pitch_shift_with_formant_preservation( y, sr, settings['pitch_shift'] ) # Enhance female characteristics y_enhanced = enhance_female_characteristics(y_shifted, sr, settings) # Add breathiness if settings['breathiness'] > 0: y_enhanced = add_breathiness(y_enhanced, sr, settings['breathiness']) # Final normalization and cleaning y_final = librosa.util.normalize(y_enhanced) # Apply final smoothing y_final = signal.savgol_filter(y_final, 1001, 2) return y_final, sr def create_voice_preset(preset_name): presets = { 'Young Female': { 'pitch_shift': 4.0, 'harmonic_boost': 0.3, 'breathiness': 0.15 }, 'Mature Female': { 'pitch_shift': 3.0, 'harmonic_boost': 0.2, 'breathiness': 0.1 }, 'Soft Female': { 'pitch_shift': 3.5, 'harmonic_boost': 0.25, 'breathiness': 0.2 } } return presets.get(preset_name) st.title("Improved Female Voice Converter") uploaded_file = st.file_uploader("Upload an audio file", type=['wav', 'mp3']) if uploaded_file is not None: with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file: tmp_file.write(uploaded_file.getvalue()) tmp_path = tmp_file.name preset_name = st.selectbox( "Select Voice Preset", ['Young Female', 'Mature Female', 'Soft Female', 'Custom'] ) if preset_name == 'Custom': settings = { 'pitch_shift': st.slider("Pitch Shift", 0.0, 6.0, 4.0, 0.5), 'harmonic_boost': st.slider("Harmonic Enhancement", 0.0, 0.5, 0.3, 0.05), 'breathiness': st.slider("Breathiness", 0.0, 0.3, 0.15, 0.05) } else: settings = create_voice_preset(preset_name) if st.button("Convert Voice"): with st.spinner("Processing audio..."): try: processed_audio, sr = process_audio_advanced(tmp_path, settings) # Save to buffer buffer = BytesIO() sf.write(buffer, processed_audio, sr, format='WAV') # Display audio player st.audio(buffer, format='audio/wav') # Download button st.download_button( label="Download Converted Audio", data=buffer, file_name="female_voice_converted.wav", mime="audio/wav" ) except Exception as e: st.error(f"Error processing audio: {str(e)}") st.markdown(""" ### Tips for Best Results: 1. Use high-quality input audio with clear speech 2. Start with presets and adjust if needed 3. Keep pitch shift between 3-5 for most natural results 4. Use minimal breathiness (0.1-0.2) for realistic sound 5. Record in a quiet environment with minimal background noise ### Recommended Settings: - For younger female voice: pitch shift 4.0, harmonic boost 0.3 - For mature female voice: pitch shift 3.0, harmonic boost 0.2 - For soft female voice: pitch shift 3.5, harmonic boost 0.25 """)