Spaces:

Artificial-superintelligence
/

Algorithmvoice

Running

File size: 5,452 Bytes

import streamlit as st
import librosa
import soundfile as sf
import numpy as np
import scipy.signal as signal
from io import BytesIO
import tempfile

def pitch_shift_with_formant_preservation(y, sr, n_steps):
    # Use a smaller frame size for better quality
    frame_length = 1024
    hop_length = 256
    
    # Apply pitch shifting with smaller frame size
    y_shifted = librosa.effects.pitch_shift(
        y=y,
        sr=sr,
        n_steps=n_steps,
        bins_per_octave=12,
        res_type='kaiser_fast'
    )
    
    return y_shifted

def enhance_female_characteristics(y, sr, settings):
    # Extract harmonics more gently
    y_harmonic, y_percussive = librosa.effects.hpss(
        y,
        margin=3.0,
        kernel_size=31
    )
    
    # Enhance harmonics subtly
    y_enhanced = y_harmonic * settings['harmonic_boost'] + y * (1 - settings['harmonic_boost'])
    
    # Apply subtle EQ to enhance female characteristics
    y_filtered = apply_female_eq(y_enhanced, sr)
    
    return y_filtered

def apply_female_eq(y, sr):
    # Design filters for female voice enhancement
    # Boost frequencies around 1kHz-2kHz for feminine resonance
    b1, a1 = signal.butter(2, [1000/(sr/2), 2000/(sr/2)], btype='band')
    y_filtered = signal.filtfilt(b1, a1, y)
    
    # Slight boost in high frequencies (3kHz-5kHz)
    b2, a2 = signal.butter(2, [3000/(sr/2), 5000/(sr/2)], btype='band')
    y_filtered += 0.3 * signal.filtfilt(b2, a2, y)
    
    return librosa.util.normalize(y_filtered)

def add_breathiness(y, sr, amount):
    # Generate more natural breath noise
    noise = np.random.normal(0, 0.005, len(y))
    
    # Filter the noise to sound more like breath
    b, a = signal.butter(2, 2000/(sr/2), btype='lowpass')
    breath_noise = signal.filtfilt(b, a, noise)
    
    # Add filtered noise
    y_breathy = y * (1 - amount) + breath_noise * amount
    return librosa.util.normalize(y_breathy)

def process_audio_advanced(audio_file, settings):
    # Load audio with a higher sample rate
    y, sr = librosa.load(audio_file, sr=24000)
    
    # Remove DC offset
    y = librosa.util.normalize(y - np.mean(y))
    
    # Apply pitch shifting
    y_shifted = pitch_shift_with_formant_preservation(
        y, 
        sr, 
        settings['pitch_shift']
    )
    
    # Enhance female characteristics
    y_enhanced = enhance_female_characteristics(y_shifted, sr, settings)
    
    # Add breathiness
    if settings['breathiness'] > 0:
        y_enhanced = add_breathiness(y_enhanced, sr, settings['breathiness'])
    
    # Final normalization and cleaning
    y_final = librosa.util.normalize(y_enhanced)
    
    # Apply final smoothing
    y_final = signal.savgol_filter(y_final, 1001, 2)
    
    return y_final, sr

def create_voice_preset(preset_name):
    presets = {
        'Young Female': {
            'pitch_shift': 4.0,
            'harmonic_boost': 0.3,
            'breathiness': 0.15
        },
        'Mature Female': {
            'pitch_shift': 3.0,
            'harmonic_boost': 0.2,
            'breathiness': 0.1
        },
        'Soft Female': {
            'pitch_shift': 3.5,
            'harmonic_boost': 0.25,
            'breathiness': 0.2
        }
    }
    return presets.get(preset_name)

st.title("Improved Female Voice Converter")

uploaded_file = st.file_uploader("Upload an audio file", type=['wav', 'mp3'])

if uploaded_file is not None:
    with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
        tmp_file.write(uploaded_file.getvalue())
        tmp_path = tmp_file.name

    preset_name = st.selectbox(
        "Select Voice Preset",
        ['Young Female', 'Mature Female', 'Soft Female', 'Custom']
    )

    if preset_name == 'Custom':
        settings = {
            'pitch_shift': st.slider("Pitch Shift", 0.0, 6.0, 4.0, 0.5),
            'harmonic_boost': st.slider("Harmonic Enhancement", 0.0, 0.5, 0.3, 0.05),
            'breathiness': st.slider("Breathiness", 0.0, 0.3, 0.15, 0.05)
        }
    else:
        settings = create_voice_preset(preset_name)

    if st.button("Convert Voice"):
        with st.spinner("Processing audio..."):
            try:
                processed_audio, sr = process_audio_advanced(tmp_path, settings)
                
                # Save to buffer
                buffer = BytesIO()
                sf.write(buffer, processed_audio, sr, format='WAV')
                
                # Display audio player
                st.audio(buffer, format='audio/wav')
                
                # Download button
                st.download_button(
                    label="Download Converted Audio",
                    data=buffer,
                    file_name="female_voice_converted.wav",
                    mime="audio/wav"
                )
                
            except Exception as e:
                st.error(f"Error processing audio: {str(e)}")

st.markdown("""
### Tips for Best Results:
1. Use high-quality input audio with clear speech
2. Start with presets and adjust if needed
3. Keep pitch shift between 3-5 for most natural results
4. Use minimal breathiness (0.1-0.2) for realistic sound
5. Record in a quiet environment with minimal background noise

### Recommended Settings:
- For younger female voice: pitch shift 4.0, harmonic boost 0.3
- For mature female voice: pitch shift 3.0, harmonic boost 0.2
- For soft female voice: pitch shift 3.5, harmonic boost 0.25
""")