import streamlit as st
import librosa
import soundfile as sf
import numpy as np
import scipy.signal as signal
from scipy.io import wavfile
from io import BytesIO
import tempfile

def modify_formants(y, sr, formant_shift_factor=1.2):
    # Get the power spectrum
    D = librosa.stft(y)
    S = np.abs(D)
    
    # Use frame-based processing for LPC
    frame_length = 2048
    hop_length = 512
    frames = librosa.util.frame(y, frame_length=frame_length, hop_length=hop_length)
    
    # Process each frame
    modified_frames = []
    for frame in frames.T:
        # Calculate LPC coefficients
        a = librosa.lpc(frame, order=12)
        
        # Shift formants
        new_a = np.zeros_like(a)
        new_a[0] = a[0]
        for i in range(1, len(a)):
            new_a[i] = a[i] * (formant_shift_factor ** i)
        
        # Apply modified LPC filter
        modified_frame = signal.lfilter([1], new_a, frame)
        modified_frames.append(modified_frame)
    
    # Reconstruct the signal
    y_formant = np.concatenate([frame[:hop_length] for frame in modified_frames[:-1]] +
                              [modified_frames[-1]])
    
    return librosa.util.normalize(y_formant)

def enhance_harmonics(y, sr):
    # Extract harmonics using harmonic-percussive source separation
    y_harmonic = librosa.effects.hpss(y)[0]
    
    # Enhance the harmonics
    y_enhanced = y_harmonic * 1.2 + y * 0.3
    return librosa.util.normalize(y_enhanced)

def process_audio_advanced(audio_file, settings):
    # Load audio
    y, sr = librosa.load(audio_file)
    
    # Pitch shifting with formant preservation
    y_shifted = librosa.effects.pitch_shift(
        y, 
        sr=sr,
        n_steps=settings['pitch_shift']
    )
    
    # Modify formants
    y_formant = modify_formants(
        y_shifted, 
        sr, 
        settings['formant_shift']
    )
    
    # Enhance harmonics
    y_harmonic = enhance_harmonics(y_formant, sr)
    
    # Apply vocal tract length modification through resampling
    y_vtln = librosa.effects.time_stretch(
        y_harmonic,
        rate=settings['vtln_factor']
    )
    
    # Smooth the output
    y_smooth = signal.savgol_filter(y_vtln, 1001, 2)
    
    # Final normalization
    y_final = librosa.util.normalize(y_smooth)
    
    return y_final, sr

def create_voice_preset(preset_name):
    presets = {
        'Young Female': {
            'pitch_shift': 8.0,
            'formant_shift': 1.3,
            'vtln_factor': 1.1,
            'breathiness': 0.3
        },
        'Mature Female': {
            'pitch_shift': 6.0,
            'formant_shift': 1.2,
            'vtln_factor': 1.05,
            'breathiness': 0.2
        },
        'Soft Female': {
            'pitch_shift': 7.0,
            'formant_shift': 1.25,
            'vtln_factor': 1.15,
            'breathiness': 0.4
        }
    }
    return presets.get(preset_name)

def add_breathiness(y, sr, amount=0.3):
    # Generate breath noise
    noise = np.random.normal(0, 0.01, len(y))
    noise_filtered = signal.lfilter([1], [1, -0.98], noise)
    
    # Mix with original signal
    y_breathy = y * (1 - amount) + noise_filtered * amount
    return librosa.util.normalize(y_breathy)

st.title("Advanced Female Voice Converter")

# File uploader
uploaded_file = st.file_uploader("Upload an audio file", type=['wav', 'mp3'])

if uploaded_file is not None:
    # Save uploaded file temporarily
    with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
        tmp_file.write(uploaded_file.getvalue())
        tmp_path = tmp_file.name

    # Voice preset selector
    preset_name = st.selectbox(
        "Select Voice Preset",
        ['Young Female', 'Mature Female', 'Soft Female', 'Custom']
    )

    if preset_name == 'Custom':
        settings = {
            'pitch_shift': st.slider("Pitch Shift", 0.0, 12.0, 8.0, 0.5),
            'formant_shift': st.slider("Formant Shift", 1.0, 1.5, 1.2, 0.05),
            'vtln_factor': st.slider("Vocal Tract Length", 0.9, 1.2, 1.1, 0.05),
            'breathiness': st.slider("Breathiness", 0.0, 1.0, 0.3, 0.1)
        }
    else:
        settings = create_voice_preset(preset_name)

    if st.button("Convert Voice"):
        with st.spinner("Processing audio..."):
            try:
                # Process audio
                processed_audio, sr = process_audio_advanced(tmp_path, settings)
                
                # Add breathiness
                processed_audio = add_breathiness(
                    processed_audio, 
                    sr, 
                    settings['breathiness']
                )
                
                # Save to buffer
                buffer = BytesIO()
                sf.write(buffer, processed_audio, sr, format='WAV')
                
                # Display audio player
                st.audio(buffer, format='audio/wav')
                
                # Download button
                st.download_button(
                    label="Download Converted Audio",
                    data=buffer,
                    file_name="female_voice_converted.wav",
                    mime="audio/wav"
                )
                
            except Exception as e:
                st.error(f"Error processing audio: {str(e)}")

st.markdown("""
### Voice Conversion Features:
- Pitch shifting with formant preservation
- Harmonic enhancement
- Vocal tract length modification
- Natural breathiness addition
- Multiple voice presets
- Custom parameter controls

### Tips for Best Results:
1. Start with a clear audio recording
2. Try different presets to find the best match
3. For custom settings:
   - Pitch shift: 6-8 for natural female voice
   - Formant shift: 1.1-1.3 for feminine resonance
   - Vocal tract length: 1.05-1.15 for realistic results
   - Breathiness: 0.2-0.4 for natural sound
""")