import gradio as gr import numpy as np import librosa import soundfile as sf from TTS.api import TTS import torch import os import tempfile # Initialize TTS model try: tts = TTS("tts_models/multilingual/multi-dataset/your_tts", progress_bar=False) except Exception as e: print(f"Error initializing TTS model: {e}") tts = None def load_audio(audio_path): try: audio, sr = librosa.load(audio_path, sr=None) return audio, sr except Exception as e: print(f"Error loading audio: {e}") return None, None def save_audio(audio, sr, path): try: sf.write(path, audio, sr) except Exception as e: print(f"Error saving audio: {e}") def pitch_shift(audio, sr, n_steps): try: return librosa.effects.pitch_shift(audio, sr=sr, n_steps=n_steps) except Exception as e: print(f"Error in pitch shifting: {e}") return audio def change_voice(audio_path, pitch_shift_amount, formant_shift_amount): if tts is None: return None, None audio, sr = load_audio(audio_path) if audio is None or sr is None: return None, None pitched_audio = pitch_shift(audio, sr, pitch_shift_amount) try: with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file: save_audio(pitched_audio, sr, temp_file.name) converted_audio_path = tts.voice_conversion( source_wav=temp_file.name, target_wav="path/to/female_target_voice.wav", # You need to provide a female target voice file output_wav=None ) converted_audio, _ = load_audio(converted_audio_path) formant_shifted_audio = librosa.effects.pitch_shift(converted_audio, sr=sr, n_steps=formant_shift_amount) os.unlink(temp_file.name) os.unlink(converted_audio_path) return sr, formant_shifted_audio except Exception as e: print(f"Error in voice conversion: {e}") return None, None def process_audio(audio_file, pitch_shift_amount, formant_shift_amount): if audio_file is None: return None # Use the audio_file path directly sr, audio = change_voice(audio_file, pitch_shift_amount, formant_shift_amount) if sr is None or audio is None: return None output_path = "output_voice.wav" save_audio(audio, sr, output_path) return output_path # Custom CSS for improved design custom_css = """ .gradio-container { background-color: #f0f4f8; } .container { max-width: 900px; margin: auto; padding: 20px; border-radius: 10px; background-color: white; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); } h1 { color: #2c3e50; text-align: center; font-size: 2.5em; margin-bottom: 20px; } .description { text-align: center; color: #34495e; margin-bottom: 30px; } .input-section, .output-section { background-color: #ecf0f1; padding: 20px; border-radius: 8px; margin-bottom: 20px; } .input-section h3, .output-section h3 { color: #2980b9; margin-bottom: 15px; } """ # Gradio Interface with improved design with gr.Blocks(css=custom_css) as demo: gr.HTML( """
Transform any voice into a realistic female voice using advanced AI technology