import streamlit as st import librosa import soundfile as sf import numpy as np import scipy.signal as signal from scipy.io import wavfile from io import BytesIO import tempfile def modify_formants(y, sr, formant_shift_factor=1.2): # Get the power spectrum D = librosa.stft(y) S = np.abs(D) # Use frame-based processing for LPC frame_length = 2048 hop_length = 512 frames = librosa.util.frame(y, frame_length=frame_length, hop_length=hop_length) # Process each frame modified_frames = [] for frame in frames.T: # Calculate LPC coefficients a = librosa.lpc(frame, order=12) # Shift formants new_a = np.zeros_like(a) new_a[0] = a[0] for i in range(1, len(a)): new_a[i] = a[i] * (formant_shift_factor ** i) # Apply modified LPC filter modified_frame = signal.lfilter([1], new_a, frame) modified_frames.append(modified_frame) # Reconstruct the signal y_formant = np.concatenate([frame[:hop_length] for frame in modified_frames[:-1]] + [modified_frames[-1]]) return librosa.util.normalize(y_formant) def enhance_harmonics(y, sr): # Extract harmonics using harmonic-percussive source separation y_harmonic = librosa.effects.hpss(y)[0] # Enhance the harmonics y_enhanced = y_harmonic * 1.2 + y * 0.3 return librosa.util.normalize(y_enhanced) def process_audio_advanced(audio_file, settings): # Load audio y, sr = librosa.load(audio_file) # Pitch shifting with formant preservation y_shifted = librosa.effects.pitch_shift( y, sr=sr, n_steps=settings['pitch_shift'] ) # Modify formants y_formant = modify_formants( y_shifted, sr, settings['formant_shift'] ) # Enhance harmonics y_harmonic = enhance_harmonics(y_formant, sr) # Apply vocal tract length modification through resampling y_vtln = librosa.effects.time_stretch( y_harmonic, rate=settings['vtln_factor'] ) # Smooth the output y_smooth = signal.savgol_filter(y_vtln, 1001, 2) # Final normalization y_final = librosa.util.normalize(y_smooth) return y_final, sr def create_voice_preset(preset_name): presets = { 'Young Female': { 'pitch_shift': 8.0, 'formant_shift': 1.3, 'vtln_factor': 1.1, 'breathiness': 0.3 }, 'Mature Female': { 'pitch_shift': 6.0, 'formant_shift': 1.2, 'vtln_factor': 1.05, 'breathiness': 0.2 }, 'Soft Female': { 'pitch_shift': 7.0, 'formant_shift': 1.25, 'vtln_factor': 1.15, 'breathiness': 0.4 } } return presets.get(preset_name) def add_breathiness(y, sr, amount=0.3): # Generate breath noise noise = np.random.normal(0, 0.01, len(y)) noise_filtered = signal.lfilter([1], [1, -0.98], noise) # Mix with original signal y_breathy = y * (1 - amount) + noise_filtered * amount return librosa.util.normalize(y_breathy) st.title("Advanced Female Voice Converter") # File uploader uploaded_file = st.file_uploader("Upload an audio file", type=['wav', 'mp3']) if uploaded_file is not None: # Save uploaded file temporarily with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file: tmp_file.write(uploaded_file.getvalue()) tmp_path = tmp_file.name # Voice preset selector preset_name = st.selectbox( "Select Voice Preset", ['Young Female', 'Mature Female', 'Soft Female', 'Custom'] ) if preset_name == 'Custom': settings = { 'pitch_shift': st.slider("Pitch Shift", 0.0, 12.0, 8.0, 0.5), 'formant_shift': st.slider("Formant Shift", 1.0, 1.5, 1.2, 0.05), 'vtln_factor': st.slider("Vocal Tract Length", 0.9, 1.2, 1.1, 0.05), 'breathiness': st.slider("Breathiness", 0.0, 1.0, 0.3, 0.1) } else: settings = create_voice_preset(preset_name) if st.button("Convert Voice"): with st.spinner("Processing audio..."): try: # Process audio processed_audio, sr = process_audio_advanced(tmp_path, settings) # Add breathiness processed_audio = add_breathiness( processed_audio, sr, settings['breathiness'] ) # Save to buffer buffer = BytesIO() sf.write(buffer, processed_audio, sr, format='WAV') # Display audio player st.audio(buffer, format='audio/wav') # Download button st.download_button( label="Download Converted Audio", data=buffer, file_name="female_voice_converted.wav", mime="audio/wav" ) except Exception as e: st.error(f"Error processing audio: {str(e)}") st.markdown(""" ### Voice Conversion Features: - Pitch shifting with formant preservation - Harmonic enhancement - Vocal tract length modification - Natural breathiness addition - Multiple voice presets - Custom parameter controls ### Tips for Best Results: 1. Start with a clear audio recording 2. Try different presets to find the best match 3. For custom settings: - Pitch shift: 6-8 for natural female voice - Formant shift: 1.1-1.3 for feminine resonance - Vocal tract length: 1.05-1.15 for realistic results - Breathiness: 0.2-0.4 for natural sound """)