File size: 1,545 Bytes
ce3fc4b
 
 
 
c81140c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import numpy as np
import gradio as gr
from bark import SAMPLE_RATE, generate_audio, preload_models

model_cache = {}

def load_model(model_name):
    if model_name not in model_cache:
        model_cache[model_name] = preload_models(model_name)
    return model_cache[model_name]

def validate_input(text):
    if len(text) == 0:
        raise ValueError("Input text cannot be empty.")
    if len(text) > 500:
        raise ValueError("Input text is too long (500 characters max).")

def generate_custom_audio(text, prompt, pitch, tempo):
    try:
        validate_input(text)
        model = load_model(prompt)
        audio = generate_audio(text, history_prompt=prompt)
        audio = adjust_audio(audio, pitch, tempo)
        return audio
    except Exception as e:
        return np.zeros(SAMPLE_RATE), str(e)

def adjust_audio(audio, pitch, tempo):
    # Apply pitch and tempo adjustments
    return audio * pitch * tempo

# Interface design
with gr.Blocks() as interface:
    gr.Markdown("# 🎤 Advanced Voice Generator")
    text_input = gr.Textbox(label="Enter Text")
    prompt_option = gr.Dropdown(choices=["Speaker 1", "Speaker 2"], label="Voice")
    pitch_slider = gr.Slider(min=0.5, max=2.0, label="Pitch")
    tempo_slider = gr.Slider(min=0.5, max=2.0, label="Tempo")
    generate_button = gr.Button("Generate Audio")
    audio_output = gr.Audio(label="Generated Audio")

    generate_button.click(generate_custom_audio, inputs=[text_input, prompt_option, pitch_slider, tempo_slider], outputs=audio_output)

interface.launch()