Spaces:
Runtime error
Runtime error
import gradio as gr | |
import torch | |
from outetts.v0_1.interface import InterfaceHF | |
import soundfile as sf | |
import tempfile | |
import os | |
def initialize_model(): | |
"""Initialize the OuteTTS model""" | |
interface = InterfaceHF("OuteAI/OuteTTS-0.1-350M") | |
return interface | |
def process_audio_file(audio_path, reference_text, text_to_speak, temperature=0.1, repetition_penalty=1.1): | |
"""Process the audio file and generate speech with the cloned voice""" | |
try: | |
# Initialize model | |
interface = initialize_model() | |
# Create speaker from reference audio | |
speaker = interface.create_speaker( | |
audio_path, | |
reference_text | |
) | |
# Generate speech with cloned voice | |
output = interface.generate( | |
text=text_to_speak, | |
speaker=speaker, | |
temperature=temperature, | |
repetition_penalty=repetition_penalty, | |
max_lenght=4096 | |
) | |
# Save to temporary file and return path | |
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") | |
output.save(temp_file.name) | |
return temp_file.name, "Voice cloning successful!" | |
except Exception as e: | |
return None, f"Error: {str(e)}" | |
# Create Gradio interface | |
with gr.Blocks(title="Voice Cloning with OuteTTS") as demo: | |
gr.Markdown("# ποΈ Voice Cloning with OuteTTS") | |
gr.Markdown(""" | |
This app uses OuteTTS to clone voices. Upload a reference audio file, provide the text being spoken in that audio, | |
and enter the new text you want to be spoken in the cloned voice. | |
Note: For best results, use clear audio with minimal background noise. | |
""") | |
with gr.Row(): | |
with gr.Column(): | |
# Input components | |
audio_input = gr.Audio(label="Upload Reference Audio", type="filepath") | |
reference_text = gr.Textbox(label="Reference Text (what is being said in the audio)") | |
text_to_speak = gr.Textbox(label="Text to Speak (what you want the cloned voice to say)") | |
with gr.Row(): | |
temperature = gr.Slider(minimum=0.1, maximum=1.0, value=0.1, step=0.1, | |
label="Temperature (higher = more variation)") | |
repetition_penalty = gr.Slider(minimum=1.0, maximum=2.0, value=1.1, step=0.1, | |
label="Repetition Penalty") | |
# Submit button | |
submit_btn = gr.Button("Generate Voice", variant="primary") | |
with gr.Column(): | |
# Output components | |
output_audio = gr.Audio(label="Generated Speech") | |
output_message = gr.Textbox(label="Status") | |
# Handle submission | |
submit_btn.click( | |
fn=process_audio_file, | |
inputs=[audio_input, reference_text, text_to_speak, temperature, repetition_penalty], | |
outputs=[output_audio, output_message] | |
) | |
gr.Markdown(""" | |
### Tips for best results: | |
1. Use high-quality reference audio (clear speech, minimal background noise) | |
2. Ensure reference text matches the audio exactly | |
3. Keep generated text relatively short for better quality | |
4. Adjust temperature and repetition penalty if needed: | |
- Lower temperature (0.1-0.3) for more consistent output | |
- Higher repetition penalty (1.1-1.3) to avoid repetition | |
""") | |
if __name__ == "__main__": | |
demo.launch() |