File size: 3,247 Bytes
068b7da
 
 
 
6d72e83
cd428c2
 
068b7da
 
 
 
 
 
 
6d72e83
 
 
 
 
 
609ffca
6d72e83
cd428c2
068b7da
6d72e83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
068b7da
6d72e83
068b7da
 
6d72e83
 
 
 
068b7da
 
 
 
 
c92f138
e185191
 
 
 
 
 
 
 
 
 
068b7da
6d72e83
 
 
 
 
068b7da
 
6d72e83
 
 
 
 
 
 
 
e185191
 
 
 
 
068b7da
 
95540a0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import gradio as gr
import torch
from TTS.api import TTS
import os
import librosa
from datetime import datetime


# Get device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Initialize TTS model
tts = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False).to(device)

def convert_audio_to_wav(file_path):
    """Convert any supported format (mp3, etc.) to wav using librosa"""
    output_path = "temp_input.wav"
    audio, sr = librosa.load(file_path, sr=None)  # Load file (wav, mp3, etc.)
    librosa.output.write_wav(output_path, audio, sr)  # Convert to wav
    return output_path

def voice_conversion(input_audio, target_voice, uploaded_target_voice):
    print(datetime.now())
    output_path = "output.wav"
    
    # Check if the user uploaded a target voice, otherwise use selected from examples
    if uploaded_target_voice is not None:
        target_voice_path = uploaded_target_voice
        # Convert uploaded target to wav if necessary
        if not uploaded_target_voice.endswith(".wav"):
            target_voice_path = convert_audio_to_wav(uploaded_target_voice)
    else:
        target_voice_path = os.path.join("Examples", target_voice)
        if not os.path.exists(target_voice_path):
            return "Error: Target voice file not found."
    
    # Convert input audio to wav if necessary
    if not input_audio.endswith(".wav"):
        input_audio = convert_audio_to_wav(input_audio)
    
    # Perform voice conversion
    tts.voice_conversion_to_file(source_wav=input_audio, target_wav=target_voice_path, file_path=output_path)
    return output_path

# Get examples from Examples folder
examples_folder = "Examples/"
example_files = [f for f in os.listdir(examples_folder) if f.endswith(".wav")]

# Define Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("## Voice Conversion using Coqui TTS")
    
    with gr.Row():
        input_audio = gr.Audio(label="Record or Upload Your Voice", type="filepath")
        target_voice = gr.Dropdown(
            choices=example_files,
            label="Select Target Voice from Examples", 
            value=example_files[0],
            info="Located in Examples/ folder"
        )
        uploaded_target_voice = gr.Audio(
            label="Or Upload Your Own Target Voice",
            type="filepath"
        )

    with gr.Row():
        play_button = gr.Button("Preview Selected Target Voice")
        preview_audio = gr.Audio(label="Preview Target Voice", type="filepath")
    
    # Add convert button and output audio
    convert_button = gr.Button("Convert Voice")
    output_audio = gr.Audio(label="Converted Voice", type="filepath")
    
    # Preview button for listening to the selected target voice from examples
    def preview_target_voice(selected_target_voice):
        return os.path.join(examples_folder, selected_target_voice)
    
    play_button.click(preview_target_voice, inputs=[target_voice], outputs=preview_audio)
    
    # Conversion process
    convert_button.click(
        voice_conversion,
        inputs=[input_audio, target_voice, uploaded_target_voice],
        outputs=output_audio
    )

# Launch with public=True for public URL access and share link
demo.launch(share=True)