Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
c90b394
1
Parent(s):
f89f703
update gradio cached examples
Browse files- .gitignore +2 -1
- tts/gradio_api.py +16 -1
.gitignore
CHANGED
@@ -1 +1,2 @@
|
|
1 |
-
checkpoints
|
|
|
|
1 |
+
checkpoints
|
2 |
+
official_test_case
|
tts/gradio_api.py
CHANGED
@@ -26,7 +26,7 @@ os.system('huggingface-cli download ByteDance/MegaTTS3 --local-dir ./checkpoints
|
|
26 |
CUDA_AVAILABLE = torch.cuda.is_available()
|
27 |
infer_pipe = MegaTTS3DiTInfer(device='cuda' if CUDA_AVAILABLE else 'cpu')
|
28 |
|
29 |
-
@spaces.GPU(duration=
|
30 |
def forward_gpu(file_content, latent_file, inp_text, time_step, p_w, t_w):
|
31 |
resource_context = infer_pipe.preprocess(file_content, latent_file)
|
32 |
wav_bytes = infer_pipe.forward(resource_context, inp_text, time_step=time_step, p_w=p_w, t_w=t_w)
|
@@ -36,6 +36,14 @@ def model_worker(input_queue, output_queue, device_id):
|
|
36 |
while True:
|
37 |
task = input_queue.get()
|
38 |
inp_audio_path, inp_npy_path, inp_text, infer_timestep, p_w, t_w = task
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
try:
|
40 |
convert_to_wav(inp_audio_path)
|
41 |
wav_path = os.path.splitext(inp_audio_path)[0] + '.wav'
|
@@ -48,6 +56,7 @@ def model_worker(input_queue, output_queue, device_id):
|
|
48 |
traceback.print_exc()
|
49 |
print(task, str(e))
|
50 |
output_queue.put(None)
|
|
|
51 |
|
52 |
|
53 |
def main(inp_audio, inp_npy, inp_text, infer_timestep, p_w, t_w, processes, input_queue, output_queue):
|
@@ -85,6 +94,12 @@ if __name__ == '__main__':
|
|
85 |
gr.Number(label="Intelligibility Weight", value=1.4),
|
86 |
gr.Number(label="Similarity Weight", value=3.0)], outputs=[gr.Audio(label="Synthesized Audio")],
|
87 |
title="MegaTTS3",
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
description="Upload a speech clip as a reference for timbre, " +
|
89 |
"upload the pre-extracted latent file, "+
|
90 |
"input the target text, and receive the cloned voice. "+
|
|
|
26 |
CUDA_AVAILABLE = torch.cuda.is_available()
|
27 |
infer_pipe = MegaTTS3DiTInfer(device='cuda' if CUDA_AVAILABLE else 'cpu')
|
28 |
|
29 |
+
@spaces.GPU(duration=60)
|
30 |
def forward_gpu(file_content, latent_file, inp_text, time_step, p_w, t_w):
|
31 |
resource_context = infer_pipe.preprocess(file_content, latent_file)
|
32 |
wav_bytes = infer_pipe.forward(resource_context, inp_text, time_step=time_step, p_w=p_w, t_w=t_w)
|
|
|
36 |
while True:
|
37 |
task = input_queue.get()
|
38 |
inp_audio_path, inp_npy_path, inp_text, infer_timestep, p_w, t_w = task
|
39 |
+
|
40 |
+
if inp_npy_path is None:
|
41 |
+
raise gr.Error("Please provide .npy file")
|
42 |
+
if (inp_audio_path[:-4] != inp_npy_path[:-4]):
|
43 |
+
raise gr.Error(".npy and .wav mismatch")
|
44 |
+
if len(inp_text) > 200:
|
45 |
+
raise gr.Error("input text is too long")
|
46 |
+
|
47 |
try:
|
48 |
convert_to_wav(inp_audio_path)
|
49 |
wav_path = os.path.splitext(inp_audio_path)[0] + '.wav'
|
|
|
56 |
traceback.print_exc()
|
57 |
print(task, str(e))
|
58 |
output_queue.put(None)
|
59 |
+
raise gr.Error("Generation failed")
|
60 |
|
61 |
|
62 |
def main(inp_audio, inp_npy, inp_text, infer_timestep, p_w, t_w, processes, input_queue, output_queue):
|
|
|
94 |
gr.Number(label="Intelligibility Weight", value=1.4),
|
95 |
gr.Number(label="Similarity Weight", value=3.0)], outputs=[gr.Audio(label="Synthesized Audio")],
|
96 |
title="MegaTTS3",
|
97 |
+
examples=[
|
98 |
+
['./official_test_case/范闲.wav', './official_test_case/范闲.npy', "你好呀,我是范闲。我给你读一段清泉石上流。"]
|
99 |
+
['./official_test_case/周杰伦1.wav', './official_test_case/周杰伦1.npy', "有的时候嘛,我去台湾开演唱会的时候,会很喜欢来一碗卤肉饭的。"]
|
100 |
+
['./official_test_case/keep_app.wav', './official_test_case/keep_app.npy', "Let do some exercise and practice more."]
|
101 |
+
],
|
102 |
+
cache_examples=True,
|
103 |
description="Upload a speech clip as a reference for timbre, " +
|
104 |
"upload the pre-extracted latent file, "+
|
105 |
"input the target text, and receive the cloned voice. "+
|