Spaces:
Running
Running
eaysu
commited on
Commit
·
7cbdddc
1
Parent(s):
204c3ad
model and language versions enhanced
Browse files
app.py
CHANGED
@@ -1,52 +1,81 @@
|
|
1 |
import gradio as gr
|
2 |
from transformers import AutoProcessor, BarkModel
|
3 |
import torch
|
4 |
-
import
|
5 |
|
6 |
-
# Limit CPU usage
|
7 |
torch.set_num_threads(1)
|
8 |
|
9 |
-
#
|
10 |
-
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
# Function to generate speech
|
14 |
-
def generate_speech(text, voice_preset):
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
|
|
20 |
audio_array = model.generate(**inputs)
|
21 |
-
audio_array = audio_array.cpu().numpy().astype('float32').squeeze()
|
22 |
-
|
23 |
-
# Return the audio with sample rate for Gradio's audio component
|
24 |
return (model.generation_config.sample_rate, audio_array)
|
25 |
|
26 |
# Gradio app setup
|
27 |
with gr.Blocks() as app:
|
28 |
-
gr.Markdown("#
|
29 |
-
gr.Markdown("Enter text, select a Turkish voice preset, and click 'Generate Voice' to play the generated audio.")
|
30 |
|
31 |
-
#
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
voice_preset_input = gr.Dropdown(
|
36 |
-
["v2/tr_speaker_0", "v2/tr_speaker_1", "v2/tr_speaker_2", "v2/tr_speaker_3",
|
37 |
-
"v2/tr_speaker_4", "v2/tr_speaker_5", "v2/tr_speaker_6",
|
38 |
-
"v2/tr_speaker_7", "v2/tr_speaker_8", "v2/tr_speaker_9"],
|
39 |
-
label="Select Turkish Voice Preset"
|
40 |
)
|
41 |
|
42 |
-
#
|
43 |
-
|
44 |
-
|
45 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
generate_button = gr.Button("Generate Voice")
|
47 |
|
48 |
-
#
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
-
|
52 |
-
app.launch(share=True)
|
|
|
1 |
import gradio as gr
|
2 |
from transformers import AutoProcessor, BarkModel
|
3 |
import torch
|
4 |
+
import numpy as np
|
5 |
|
|
|
6 |
torch.set_num_threads(1)
|
7 |
|
8 |
+
# Preload available models to optimize switching
|
9 |
+
models = {
|
10 |
+
"suno/bark": BarkModel.from_pretrained("suno/bark"),
|
11 |
+
"suno/bark-small": BarkModel.from_pretrained("suno/bark-small")
|
12 |
+
}
|
13 |
+
|
14 |
+
# Voice presets for each language
|
15 |
+
voice_presets = {
|
16 |
+
"English": ["v2/en_speaker_0", "v2/en_speaker_1", "v2/en_speaker_2", "v2/en_speaker_3",
|
17 |
+
"v2/en_speaker_4", "v2/en_speaker_5", "v2/en_speaker_6",
|
18 |
+
"v2/en_speaker_7", "v2/en_speaker_8", "v2/en_speaker_9"],
|
19 |
+
"French": ["v2/fr_speaker_0", "v2/fr_speaker_1", "v2/fr_speaker_2", "v2/fr_speaker_3",
|
20 |
+
"v2/fr_speaker_4", "v2/fr_speaker_5", "v2/fr_speaker_6",
|
21 |
+
"v2/fr_speaker_7", "v2/fr_speaker_8", "v2/fr_speaker_9"],
|
22 |
+
"German": ["v2/de_speaker_0", "v2/de_speaker_1", "v2/de_speaker_2", "v2/de_speaker_3",
|
23 |
+
"v2/de_speaker_4", "v2/de_speaker_5", "v2/de_speaker_6",
|
24 |
+
"v2/de_speaker_7", "v2/de_speaker_8", "v2/de_speaker_9"],
|
25 |
+
"Turkish": ["v2/tr_speaker_0", "v2/tr_speaker_1", "v2/tr_speaker_2", "v2/tr_speaker_3",
|
26 |
+
"v2/tr_speaker_4", "v2/tr_speaker_5", "v2/tr_speaker_6",
|
27 |
+
"v2/tr_speaker_7", "v2/tr_speaker_8", "v2/tr_speaker_9"]
|
28 |
+
}
|
29 |
+
|
30 |
+
# Function to update voice presets based on selected language
|
31 |
+
def update_voice_presets(language):
|
32 |
+
return gr.Dropdown.update(choices=voice_presets[language])
|
33 |
|
34 |
# Function to generate speech
|
35 |
+
def generate_speech(text, model_name, voice_preset):
|
36 |
+
model = models[model_name]
|
37 |
+
processor = AutoProcessor.from_pretrained(model_name) # Load processor for the selected model
|
38 |
+
inputs = processor(text, voice_preset=voice_preset, return_tensors="pt")
|
39 |
+
inputs['attention_mask'] = torch.ones_like(inputs['input_ids'])
|
40 |
+
|
41 |
+
with torch.no_grad():
|
42 |
audio_array = model.generate(**inputs)
|
43 |
+
audio_array = audio_array.cpu().numpy().astype('float32').squeeze()
|
|
|
|
|
44 |
return (model.generation_config.sample_rate, audio_array)
|
45 |
|
46 |
# Gradio app setup
|
47 |
with gr.Blocks() as app:
|
48 |
+
gr.Markdown("# Multilingual Text-to-Speech with Bark")
|
|
|
49 |
|
50 |
+
# Language selection
|
51 |
+
language_input = gr.Dropdown(
|
52 |
+
["English", "French", "German", "Turkish"],
|
53 |
+
label="Select Language"
|
|
|
|
|
|
|
|
|
|
|
54 |
)
|
55 |
|
56 |
+
# Textbox for user input
|
57 |
+
text_input = gr.Textbox(label="Enter Text", placeholder="Type something to synthesize...")
|
58 |
+
|
59 |
+
# Model selection
|
60 |
+
model_preset_input = gr.Dropdown(["suno/bark", "suno/bark-small"], label="Select Model")
|
61 |
+
|
62 |
+
# Voice preset dropdown (will be updated based on language)
|
63 |
+
voice_preset_input = gr.Dropdown(choices=[], label="Select Voice Preset")
|
64 |
+
|
65 |
+
# Button to generate voice
|
66 |
generate_button = gr.Button("Generate Voice")
|
67 |
|
68 |
+
# Output audio
|
69 |
+
audio_output = gr.Audio(label="Generated Voice", type="numpy")
|
70 |
+
|
71 |
+
# Set dynamic update on language selection
|
72 |
+
language_input.change(update_voice_presets, inputs=language_input, outputs=voice_preset_input)
|
73 |
+
|
74 |
+
# Generate voice on button click
|
75 |
+
generate_button.click(
|
76 |
+
generate_speech,
|
77 |
+
inputs=[text_input, model_preset_input, voice_preset_input],
|
78 |
+
outputs=audio_output
|
79 |
+
)
|
80 |
|
81 |
+
app.launch()
|
|