import gradio as gr import numpy as np import librosa import soundfile as sf from TTS.api import TTS import torch import os import tempfile # Initialize TTS model tts = TTS("tts_models/multilingual/multi-dataset/your_tts", progress_bar=False).to("cuda" if torch.cuda.is_available() else "cpu") def load_audio(audio_path): audio, sr = librosa.load(audio_path, sr=None) return audio, sr def save_audio(audio, sr, path): sf.write(path, audio, sr) def pitch_shift(audio, sr, n_steps): return librosa.effects.pitch_shift(audio, sr=sr, n_steps=n_steps) def change_voice(audio_path, pitch_shift_amount, formant_shift_amount): # Load the audio audio, sr = load_audio(audio_path) # Apply pitch shifting pitched_audio = pitch_shift(audio, sr, pitch_shift_amount) # Use TTS model for voice conversion with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file: save_audio(pitched_audio, sr, temp_file.name) converted_audio_path = tts.voice_conversion( source_wav=temp_file.name, target_wav="path/to/female_target_voice.wav", # You need to provide a female target voice file output_wav=None ) # Load the converted audio converted_audio, _ = load_audio(converted_audio_path) # Apply formant shifting (simplified approach) formant_shifted_audio = librosa.effects.pitch_shift(converted_audio, sr=sr, n_steps=formant_shift_amount) # Clean up temporary files os.unlink(temp_file.name) os.unlink(converted_audio_path) return (sr, formant_shifted_audio) def process_audio(audio_file, pitch_shift_amount, formant_shift_amount): sr, audio = change_voice(audio_file.name, pitch_shift_amount, formant_shift_amount) output_path = "output_voice.wav" save_audio(audio, sr, output_path) return output_path # Custom CSS for improved design custom_css = """ .gradio-container { background-color: #f0f4f8; } .container { max-width: 900px; margin: auto; padding: 20px; border-radius: 10px; background-color: white; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); } h1 { color: #2c3e50; text-align: center; font-size: 2.5em; margin-bottom: 20px; } .description { text-align: center; color: #34495e; margin-bottom: 30px; } .input-section, .output-section { background-color: #ecf0f1; padding: 20px; border-radius: 8px; margin-bottom: 20px; } .input-section h3, .output-section h3 { color: #2980b9; margin-bottom: 15px; } """ # Gradio Interface with improved design with gr.Blocks(css=custom_css) as demo: gr.HTML( """

AI Voice Changer

Transform any voice into a realistic female voice using advanced AI technology

""" ) with gr.Row(): with gr.Column(elem_classes="input-section"): gr.Markdown("### Input") audio_input = gr.Audio(type="filepath", label="Upload Voice") pitch_shift = gr.Slider(-12, 12, step=0.5, label="Pitch Shift", value=0) formant_shift = gr.Slider(-5, 5, step=0.1, label="Formant Shift", value=0) submit_btn = gr.Button("Transform Voice", variant="primary") with gr.Column(elem_classes="output-section"): gr.Markdown("### Output") audio_output = gr.Audio(label="Transformed Voice") submit_btn.click( fn=process_audio, inputs=[audio_input, pitch_shift, formant_shift], outputs=audio_output, ) gr.Markdown( """ ### How to use: 1. Upload an audio file containing the voice you want to transform. 2. Adjust the Pitch Shift and Formant Shift sliders to fine-tune the voice (optional). 3. Click the "Transform Voice" button to process the audio. 4. Listen to the transformed voice in the output section. 5. Download the transformed audio file if desired. Note: This application uses AI to transform voices. The quality of the output may vary depending on the input audio quality and the chosen settings. """ ) if __name__ == "__main__": demo.launch()