Spaces:
Runtime error
Runtime error
| import os | |
| import torch | |
| import gradio as gr | |
| from openvoice import se_extractor | |
| from openvoice.api import ToneColorConverter | |
| from transformers import pipeline | |
| import scipy | |
| from pathlib import Path | |
| # Output directory setup | |
| output_dir = './openvoice_outputs' | |
| os.makedirs(output_dir, exist_ok=True) | |
| # Function to get model names from a directory | |
| def get_model_names(model_dir): | |
| model_paths = Path(model_dir).glob('*') | |
| return [model_path.name for model_path in model_paths if model_path.is_dir()] | |
| def generate_speech(text, model_path): | |
| synthesiser = pipeline("text-to-speech", model_path, device=0 if torch.cuda.is_available() else -1) | |
| speech = synthesiser(text) | |
| # Resample to 48kHz if needed | |
| if speech["sampling_rate"] != 48000: | |
| resampled_audio = scipy.signal.resample(speech["audio"][0], int(len(speech["audio"][0]) * 48000 / speech["sampling_rate"])) | |
| sampling_rate = 48000 | |
| else: | |
| resampled_audio = speech["audio"][0] | |
| sampling_rate = speech["sampling_rate"] | |
| return sampling_rate, resampled_audio | |
| def save_audio(sampling_rate, audio_data, filename="output.wav"): | |
| scipy.io.wavfile.write(filename, rate=sampling_rate, data=audio_data) | |
| return filename | |
| def voice_cloning(base_speaker, reference_speaker, model_version, device_choice, vad_select): | |
| try: | |
| # Determine paths and device | |
| ckpt_converter = f'./OPENVOICE_MODELS/{model_version}' | |
| device = "cuda:0" if device_choice == "GPU" and torch.cuda.is_available() else "cpu" | |
| print(f"Device: {device}") | |
| # Load the ToneColorConverter | |
| tone_color_converter = ToneColorConverter(f'{ckpt_converter}/config.json', device=device) | |
| tone_color_converter.load_ckpt(f'{ckpt_converter}/checkpoint.pth') | |
| # Extract speaker embeddings | |
| source_se, _ = se_extractor.get_se(base_speaker, tone_color_converter, vad=vad_select) | |
| target_se, _ = se_extractor.get_se(reference_speaker, tone_color_converter, vad=vad_select) | |
| # Define output file paths | |
| save_path = f'{output_dir}/output_cloned.wav' | |
| # Perform tone color conversion | |
| tone_color_converter.convert( | |
| audio_src_path=base_speaker, | |
| src_se=source_se, | |
| tgt_se=target_se, | |
| output_path=save_path, | |
| ) | |
| return save_path, "Voice cloning successful!" | |
| except Exception as e: | |
| return None, f"Error: {str(e)}" | |
| def ui_fn(text, model_dir, model_name, clone, reference_speaker, model_version, device_choice, vad_select): | |
| model_path = os.path.join(model_dir, model_name) | |
| sampling_rate, audio_data = generate_speech(text, model_path) | |
| audio_file = save_audio(sampling_rate, audio_data) | |
| if clone: | |
| cloned_audio_file, status = voice_cloning(audio_file, reference_speaker, model_version, device_choice, vad_select) | |
| return cloned_audio_file, status | |
| else: | |
| return audio_file, "Speech generation successful!" | |
| if __name__ == "__main__": | |
| #model_dir = "./models_mms" | |
| #model_names = get_model_names(model_dir) | |
| iface = gr.Interface( | |
| fn=ui_fn, | |
| inputs=[ | |
| gr.Textbox(label="Text to Synthesize"), | |
| gr.Textbox(label="Model Path or Id", value="VIZINTZOR/MMS-TTS-THAI-MALE-NARRATOR"), | |
| #gr.Dropdown(model_names, label="Model"), | |
| gr.Checkbox(label="Clone Voice", value=False), | |
| gr.Audio(label="Reference Speaker (Target Voice)", type="filepath"), | |
| gr.Dropdown(["v1", "v2"], value="v2", label="Model Version"), | |
| gr.Dropdown(["CPU", "GPU"], value="GPU" if torch.cuda.is_available() else "CPU", label="Device"), | |
| gr.Checkbox(value=False, label="VAD", interactive=True) | |
| ], | |
| outputs=[ | |
| gr.Audio(label="Generated Audio", type="filepath"), | |
| gr.Textbox(label="Status", interactive=False) | |
| ], | |
| title="Text-to-Speech Synthesizer with Voice Cloning", | |
| description="Enter text and model path to generate speech. Optionally, clone the voice using a reference speaker." | |
| ) | |
| iface.launch() | |