Spaces:

Artificial-superintelligence
/

Algorithmvoice

Running

App Files Files Community

Artificial-superintelligence commited on Oct 17, 2024

Commit

6a86f73

verified ·

1 Parent(s): 982ba4e

Update app.py

Browse files

Files changed (1) hide show

app.py +83 -142

app.py CHANGED Viewed

@@ -2,171 +2,112 @@ import streamlit as st
 import librosa
 import soundfile as sf
 import numpy as np
-import scipy.signal as signal
-from io import BytesIO
 import tempfile
-def pitch_shift_with_formant_preservation(y, sr, n_steps):
-    # Use a smaller frame size for better quality
-    frame_length = 1024
-    hop_length = 256
-    # Apply pitch shifting with smaller frame size
-    y_shifted = librosa.effects.pitch_shift(
         y=y,
         sr=sr,
-        n_steps=n_steps,
-        bins_per_octave=12,
-        res_type='kaiser_fast'
     )
-    return y_shifted
-def enhance_female_characteristics(y, sr, settings):
-    # Extract harmonics more gently
-    y_harmonic, y_percussive = librosa.effects.hpss(
-        y,
-        margin=3.0,
-        kernel_size=31
-    )
-    # Enhance harmonics subtly
-    y_enhanced = y_harmonic * settings['harmonic_boost'] + y * (1 - settings['harmonic_boost'])
-    # Apply subtle EQ to enhance female characteristics
-    y_filtered = apply_female_eq(y_enhanced, sr)
-    return y_filtered
-def apply_female_eq(y, sr):
-    # Design filters for female voice enhancement
-    # Boost frequencies around 1kHz-2kHz for feminine resonance
-    b1, a1 = signal.butter(2, [1000/(sr/2), 2000/(sr/2)], btype='band')
-    y_filtered = signal.filtfilt(b1, a1, y)
-    # Slight boost in high frequencies (3kHz-5kHz)
-    b2, a2 = signal.butter(2, [3000/(sr/2), 5000/(sr/2)], btype='band')
-    y_filtered += 0.3 * signal.filtfilt(b2, a2, y)
-    return librosa.util.normalize(y_filtered)
-def add_breathiness(y, sr, amount):
-    # Generate more natural breath noise
-    noise = np.random.normal(0, 0.005, len(y))
-    # Filter the noise to sound more like breath
-    b, a = signal.butter(2, 2000/(sr/2), btype='lowpass')
-    breath_noise = signal.filtfilt(b, a, noise)
-    # Add filtered noise
-    y_breathy = y * (1 - amount) + breath_noise * amount
-    return librosa.util.normalize(y_breathy)
-def process_audio_advanced(audio_file, settings):
-    # Load audio with a higher sample rate
-    y, sr = librosa.load(audio_file, sr=24000)
-    # Remove DC offset
-    y = librosa.util.normalize(y - np.mean(y))
-    # Apply pitch shifting
-    y_shifted = pitch_shift_with_formant_preservation(
-        y,
-        sr,
-        settings['pitch_shift']
     )
-    # Enhance female characteristics
-    y_enhanced = enhance_female_characteristics(y_shifted, sr, settings)
-    # Add breathiness
-    if settings['breathiness'] > 0:
-        y_enhanced = add_breathiness(y_enhanced, sr, settings['breathiness'])
-    # Final normalization and cleaning
-    y_final = librosa.util.normalize(y_enhanced)
-    # Apply final smoothing
-    y_final = signal.savgol_filter(y_final, 1001, 2)
-    return y_final, sr
-def create_voice_preset(preset_name):
-    presets = {
-        'Young Female': {
-            'pitch_shift': 4.0,
-            'harmonic_boost': 0.3,
-            'breathiness': 0.15
-        },
-        'Mature Female': {
-            'pitch_shift': 3.0,
-            'harmonic_boost': 0.2,
             'breathiness': 0.1
-        },
-        'Soft Female': {
-            'pitch_shift': 3.5,
-            'harmonic_boost': 0.25,
-            'breathiness': 0.2
         }
-    }
-    return presets.get(preset_name)
-st.title("Improved Female Voice Converter")
-uploaded_file = st.file_uploader("Upload an audio file", type=['wav', 'mp3'])
-if uploaded_file is not None:
-    with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
-        tmp_file.write(uploaded_file.getvalue())
-        tmp_path = tmp_file.name
-    preset_name = st.selectbox(
-        "Select Voice Preset",
-        ['Young Female', 'Mature Female', 'Soft Female', 'Custom']
-    )
-    if preset_name == 'Custom':
         settings = {
-            'pitch_shift': st.slider("Pitch Shift", 0.0, 6.0, 4.0, 0.5),
-            'harmonic_boost': st.slider("Harmonic Enhancement", 0.0, 0.5, 0.3, 0.05),
-            'breathiness': st.slider("Breathiness", 0.0, 0.3, 0.15, 0.05)
         }
-    else:
-        settings = create_voice_preset(preset_name)
-    if st.button("Convert Voice"):
-        with st.spinner("Processing audio..."):
-            try:
-                processed_audio, sr = process_audio_advanced(tmp_path, settings)
-                # Save to buffer
-                buffer = BytesIO()
-                sf.write(buffer, processed_audio, sr, format='WAV')
-                # Display audio player
-                st.audio(buffer, format='audio/wav')
-                # Download button
                 st.download_button(
-                    label="Download Converted Audio",
-                    data=buffer,
-                    file_name="female_voice_converted.wav",
                     mime="audio/wav"
                 )
-            except Exception as e:
-                st.error(f"Error processing audio: {str(e)}")
 st.markdown("""
-### Tips for Best Results:
-1. Use high-quality input audio with clear speech
-2. Start with presets and adjust if needed
-3. Keep pitch shift between 3-5 for most natural results
-4. Use minimal breathiness (0.1-0.2) for realistic sound
-5. Record in a quiet environment with minimal background noise
-### Recommended Settings:
-- For younger female voice: pitch shift 4.0, harmonic boost 0.3
-- For mature female voice: pitch shift 3.0, harmonic boost 0.2
-- For soft female voice: pitch shift 3.5, harmonic boost 0.25
 """)

 import librosa
 import soundfile as sf
 import numpy as np
+from scipy import signal
 import tempfile
+import os
+def convert_to_female_voice(audio_path, settings):
+    # Load the audio file
+    y, sr = librosa.load(audio_path, sr=None)
+    # Step 1: Pitch shifting (female voice is typically higher)
+    y_pitched = librosa.effects.pitch_shift(
         y=y,
         sr=sr,
+        n_steps=settings['pitch_steps']
     )
+    # Step 2: Simple formant shifting using resampling
+    alpha = 1.2  # Formant scaling factor
+    y_formant = librosa.effects.time_stretch(y_pitched, rate=alpha)
+    # Step 3: Add slight breathiness
+    noise = np.random.normal(0, 0.005, len(y_formant))
+    noise_filtered = signal.filtfilt([1], [1, -0.99], noise)
+    y_breathy = y_formant + settings['breathiness'] * noise_filtered
+    # Final normalization
+    y_normalized = librosa.util.normalize(y_breathy)
+    return y_normalized, sr
+# Streamlit interface
+st.title("Simple Female Voice Converter")
+# File upload
+audio_file = st.file_uploader("Upload your audio file (WAV or MP3)", type=['wav', 'mp3'])
+if audio_file is not None:
+    # Save uploaded file temporarily
+    with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(audio_file.name)[1]) as tmp_file:
+        tmp_file.write(audio_file.getvalue())
+        temp_path = tmp_file.name
+    # Voice type selection
+    voice_type = st.selectbox(
+        "Select voice type",
+        ["Young Female", "Mature Female", "Custom"]
     )
+    # Settings based on voice type
+    if voice_type == "Young Female":
+        settings = {
+            'pitch_steps': 4.0,
             'breathiness': 0.1
         }
+    elif voice_type == "Mature Female":
         settings = {
+            'pitch_steps': 3.0,
+            'breathiness': 0.08
         }
+    else:  # Custom
+        settings = {
+            'pitch_steps': st.slider("Pitch (Higher = More Female)", 0.0, 6.0, 4.0, 0.5),
+            'breathiness': st.slider("Breathiness", 0.0, 0.2, 0.1, 0.01)
+        }
+    # Convert button
+    if st.button("Convert to Female Voice"):
+        try:
+            # Process the audio
+            st.write("Converting... Please wait...")
+            converted_audio, sr = convert_to_female_voice(temp_path, settings)
+            # Save the processed audio
+            output_path = "temp_output.wav"
+            sf.write(output_path, converted_audio, sr)
+            # Play the audio
+            st.audio(output_path)
+            # Provide download button
+            with open(output_path, 'rb') as audio_file:
                 st.download_button(
+                    label="Download converted audio",
+                    data=audio_file,
+                    file_name="female_voice.wav",
                     mime="audio/wav"
                 )
+            # Clean up
+            os.remove(temp_path)
+            os.remove(output_path)
+        except Exception as e:
+            st.error(f"An error occurred: {str(e)}")
 st.markdown("""
+### How to use:
+1. Upload an audio file (WAV or MP3)
+2. Choose a preset or custom settings
+3. Click 'Convert to Female Voice'
+4. Listen to the result
+5. Download if you like it
+### Tips for best results:
+- Use clear audio with minimal background noise
+- Speak in a neutral tone
+- Try different settings to find the best match
+- Young female voice works best with pitch 4-5
+- Mature female voice works best with pitch 3-4
 """)