Spaces:

Artificial-superintelligence
/

Algorithmvoice

Running

App Files Files Community

Artificial-superintelligence commited on Oct 17, 2024

Commit

8d24163

verified ·

1 Parent(s): 5a29c84

Update app.py

Browse files

Files changed (1) hide show

app.py +158 -51

app.py CHANGED Viewed

@@ -2,32 +2,113 @@ import streamlit as st
 import librosa
 import soundfile as sf
 import numpy as np
 from io import BytesIO
 import tempfile
-def process_audio(audio_file, pitch_factor=8):
-    # Load the audio file
     y, sr = librosa.load(audio_file)
-    # Pitch shift using librosa (female voice typically higher pitch)
-    y_shifted = librosa.effects.pitch_shift(y, sr=sr, n_steps=pitch_factor)
-    # Apply some feminine characteristics
-    # Smooth the audio slightly
-    y_smooth = librosa.effects.preemphasis(y_shifted)
-    # Normalize audio
-    y_normalized = librosa.util.normalize(y_smooth)
-    return y_normalized, sr
-def save_audio(audio_data, sr):
-    # Save processed audio to BytesIO object
-    buffer = BytesIO()
-    sf.write(buffer, audio_data, sr, format='WAV')
-    return buffer
-st.title("Voice Changer - Female Voice Conversion")
 # File uploader
 uploaded_file = st.file_uploader("Upload an audio file", type=['wav', 'mp3'])
@@ -38,42 +119,68 @@ if uploaded_file is not None:
         tmp_file.write(uploaded_file.getvalue())
         tmp_path = tmp_file.name
-    # Pitch adjustment slider
-    pitch_factor = st.slider("Pitch Adjustment", 0.0, 12.0, 8.0, 0.5)
-    if st.button("Convert to Female Voice"):
-        # Process the audio
-        try:
-            processed_audio, sr = process_audio(tmp_path, pitch_factor)
-            # Save processed audio
-            audio_buffer = save_audio(processed_audio, sr)
-            # Create download button
-            st.audio(audio_buffer, format='audio/wav')
-            # Add download button
-            st.download_button(
-                label="Download Converted Audio",
-                data=audio_buffer,
-                file_name="female_voice.wav",
-                mime="audio/wav"
-            )
-        except Exception as e:
-            st.error(f"Error processing audio: {str(e)}")
-# Add instructions
 st.markdown("""
-### Instructions:
-1. Upload a WAV or MP3 audio file
-2. Adjust the pitch slider (higher values = more feminine voice)
-3. Click 'Convert to Female Voice'
-4. Play the converted audio
-5. Download the result if satisfied
-### Notes:
-- Best results with clear audio input
-- Recommended pitch adjustment: 6-8 for natural-sounding results
-- Larger files may take longer to process
 """)

 import librosa
 import soundfile as sf
 import numpy as np
+import scipy.signal as signal
+from scipy.io import wavfile
+import pyworld as world
+import torch
+import torchaudio
 from io import BytesIO
 import tempfile
+def enhance_harmonics(y, sr):
+    # Extract harmonics using harmonic-percussive source separation
+    y_harmonic = librosa.effects.hpss(y)[0]
+    # Enhance the harmonics
+    y_enhanced = y_harmonic * 1.2 + y * 0.3
+    return librosa.util.normalize(y_enhanced)
+def modify_formants(y, sr, formant_shift_factor=1.2):
+    # Get the power spectrum
+    D = librosa.stft(y)
+    S = np.abs(D)
+    # Estimate formants using LPC
+    order = 12
+    a = librosa.lpc(y, order)
+    # Shift formants
+    new_a = np.zeros_like(a)
+    new_a[0] = a[0]
+    for i in range(1, len(a)):
+        new_a[i] = a[i] * (formant_shift_factor ** i)
+    # Apply modified LPC filter
+    y_formant = signal.lfilter([1], new_a, y)
+    return librosa.util.normalize(y_formant)
+def process_audio_advanced(audio_file, settings):
+    # Load audio
     y, sr = librosa.load(audio_file)
+    # Extract F0 and spectral envelope using WORLD vocoder
+    _f0, t = librosa.piptrack(y=y, sr=sr)
+    f0 = np.mean(_f0[_f0 > 0], axis=0)
+    # Pitch shifting with formant preservation
+    y_shifted = librosa.effects.pitch_shift(
+        y,
+        sr=sr,
+        n_steps=settings['pitch_shift']
+    )
+    # Modify formants
+    y_formant = modify_formants(
+        y_shifted,
+        sr,
+        settings['formant_shift']
+    )
+    # Enhance harmonics
+    y_harmonic = enhance_harmonics(y_formant, sr)
+    # Apply vocal tract length normalization
+    y_vtln = librosa.effects.time_stretch(
+        y_harmonic,
+        rate=settings['vtln_factor']
+    )
+    # Smooth the output
+    y_smooth = signal.savgol_filter(y_vtln, 1001, 2)
+    # Final normalization
+    y_final = librosa.util.normalize(y_smooth)
+    return y_final, sr
+def create_voice_preset(preset_name):
+    presets = {
+        'Young Female': {
+            'pitch_shift': 8.0,
+            'formant_shift': 1.3,
+            'vtln_factor': 1.1,
+            'breathiness': 0.3
+        },
+        'Mature Female': {
+            'pitch_shift': 6.0,
+            'formant_shift': 1.2,
+            'vtln_factor': 1.05,
+            'breathiness': 0.2
+        },
+        'Soft Female': {
+            'pitch_shift': 7.0,
+            'formant_shift': 1.25,
+            'vtln_factor': 1.15,
+            'breathiness': 0.4
+        }
+    }
+    return presets.get(preset_name)
+def add_breathiness(y, sr, amount=0.3):
+    # Generate breath noise
+    noise = np.random.normal(0, 0.01, len(y))
+    noise_filtered = signal.lfilter([1], [1, -0.98], noise)
+    # Mix with original signal
+    y_breathy = y * (1 - amount) + noise_filtered * amount
+    return librosa.util.normalize(y_breathy)
+st.title("Advanced Female Voice Converter")
 # File uploader
 uploaded_file = st.file_uploader("Upload an audio file", type=['wav', 'mp3'])
         tmp_file.write(uploaded_file.getvalue())
         tmp_path = tmp_file.name
+    # Voice preset selector
+    preset_name = st.selectbox(
+        "Select Voice Preset",
+        ['Young Female', 'Mature Female', 'Soft Female', 'Custom']
+    )
+    if preset_name == 'Custom':
+        settings = {
+            'pitch_shift': st.slider("Pitch Shift", 0.0, 12.0, 8.0, 0.5),
+            'formant_shift': st.slider("Formant Shift", 1.0, 1.5, 1.2, 0.05),
+            'vtln_factor': st.slider("Vocal Tract Length", 0.9, 1.2, 1.1, 0.05),
+            'breathiness': st.slider("Breathiness", 0.0, 1.0, 0.3, 0.1)
+        }
+    else:
+        settings = create_voice_preset(preset_name)
+    if st.button("Convert Voice"):
+        with st.spinner("Processing audio..."):
+            try:
+                # Process audio
+                processed_audio, sr = process_audio_advanced(tmp_path, settings)
+                # Add breathiness
+                processed_audio = add_breathiness(
+                    processed_audio,
+                    sr,
+                    settings['breathiness']
+                )
+                # Save to buffer
+                buffer = BytesIO()
+                sf.write(buffer, processed_audio, sr, format='WAV')
+                # Display audio player
+                st.audio(buffer, format='audio/wav')
+                # Download button
+                st.download_button(
+                    label="Download Converted Audio",
+                    data=buffer,
+                    file_name="female_voice_converted.wav",
+                    mime="audio/wav"
+                )
+            except Exception as e:
+                st.error(f"Error processing audio: {str(e)}")
 st.markdown("""
+### Advanced Features:
+- Formant preservation and shifting
+- Harmonic enhancement
+- Vocal tract length normalization
+- Natural breathiness addition
+- Multiple voice presets
+- Custom parameter adjustment
+### Tips for Best Results:
+1. Use high-quality input audio
+2. Start with presets and adjust if needed
+3. For custom settings:
+   - Pitch shift: 6-8 for natural female voice
+   - Formant shift: 1.1-1.3 for feminine resonance
+   - Vocal tract length: 1.05-1.15 for realistic results
+   - Breathiness: 0.2-0.4 for natural sound
 """)