import gradio as gr import numpy as np import librosa import soundfile as sf from TTS.api import TTS import torch import os import tempfile # Initialize TTS model try: tts = TTS("tts_models/multilingual/multi-dataset/your_tts", progress_bar=False) except Exception as e: print(f"Error initializing TTS model: {e}") tts = None def load_audio(audio_path): try: audio, sr = librosa.load(audio_path, sr=None) return audio, sr except Exception as e: print(f"Error loading audio: {e}") return None, None def save_audio(audio, sr, path): try: sf.write(path, audio, sr) except Exception as e: print(f"Error saving audio: {e}") def pitch_shift(audio, sr, n_steps): try: return librosa.effects.pitch_shift(audio, sr=sr, n_steps=n_steps) except Exception as e: print(f"Error in pitch shifting: {e}") return audio def change_voice(audio_path, pitch_shift_amount, formant_shift_amount): if tts is None: return None, None audio, sr = load_audio(audio_path) if audio is None or sr is None: return None, None pitched_audio = pitch_shift(audio, sr, pitch_shift_amount) try: with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file: save_audio(pitched_audio, sr, temp_file.name) converted_audio_path = tts.voice_conversion( source_wav=temp_file.name, target_wav="path/to/female_target_voice.wav", # You need to provide a female target voice file output_wav=None ) converted_audio, _ = load_audio(converted_audio_path) formant_shifted_audio = librosa.effects.pitch_shift(converted_audio, sr=sr, n_steps=formant_shift_amount) os.unlink(temp_file.name) os.unlink(converted_audio_path) return sr, formant_shifted_audio except Exception as e: print(f"Error in voice conversion: {e}") return None, None def process_audio(audio_file, pitch_shift_amount, formant_shift_amount): if audio_file is None: return None # Use the audio_file path directly sr, audio = change_voice(audio_file, pitch_shift_amount, formant_shift_amount) if sr is None or audio is None: return None output_path = "output_voice.wav" save_audio(audio, sr, output_path) return output_path # Custom CSS for improved design custom_css = """ .gradio-container { background-color: #f0f4f8; } .container { max-width: 900px; margin: auto; padding: 20px; border-radius: 10px; background-color: white; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); } h1 { color: #2c3e50; text-align: center; font-size: 2.5em; margin-bottom: 20px; } .description { text-align: center; color: #34495e; margin-bottom: 30px; } .input-section, .output-section { background-color: #ecf0f1; padding: 20px; border-radius: 8px; margin-bottom: 20px; } .input-section h3, .output-section h3 { color: #2980b9; margin-bottom: 15px; } """ # Gradio Interface with improved design with gr.Blocks(css=custom_css) as demo: gr.HTML( """

AI Voice Changer

Transform any voice into a realistic female voice using advanced AI technology

""" ) with gr.Row(): with gr.Column(elem_classes="input-section"): gr.Markdown("### Input") audio_input = gr.Audio(type="filepath", label="Upload Voice") pitch_shift = gr.Slider(-12, 12, step=0.5, label="Pitch Shift", value=0) formant_shift = gr.Slider(-5, 5, step=0.1, label="Formant Shift", value=0) submit_btn = gr.Button("Transform Voice", variant="primary") with gr.Column(elem_classes="output-section"): gr.Markdown("### Output") audio_output = gr.Audio(label="Transformed Voice") submit_btn.click( fn=process_audio, inputs=[audio_input, pitch_shift, formant_shift], outputs=audio_output, ) gr.Markdown( """ ### How to use: 1. Upload an audio file containing the voice you want to transform. 2. Adjust the Pitch Shift and Formant Shift sliders to fine-tune the voice (optional). 3. Click the "Transform Voice" button to process the audio. 4. Listen to the transformed voice in the output section. 5. Download the transformed audio file if desired. Note: This application uses AI to transform voices. The quality of the output may vary depending on the input audio quality and the chosen settings. """ ) if __name__ == "__main__": demo.launch()