ZiyueJiang commited on
Commit
c90b394
·
1 Parent(s): f89f703

update gradio cached examples

Browse files
Files changed (2) hide show
  1. .gitignore +2 -1
  2. tts/gradio_api.py +16 -1
.gitignore CHANGED
@@ -1 +1,2 @@
1
- checkpoints
 
 
1
+ checkpoints
2
+ official_test_case
tts/gradio_api.py CHANGED
@@ -26,7 +26,7 @@ os.system('huggingface-cli download ByteDance/MegaTTS3 --local-dir ./checkpoints
26
  CUDA_AVAILABLE = torch.cuda.is_available()
27
  infer_pipe = MegaTTS3DiTInfer(device='cuda' if CUDA_AVAILABLE else 'cpu')
28
 
29
- @spaces.GPU(duration=120)
30
  def forward_gpu(file_content, latent_file, inp_text, time_step, p_w, t_w):
31
  resource_context = infer_pipe.preprocess(file_content, latent_file)
32
  wav_bytes = infer_pipe.forward(resource_context, inp_text, time_step=time_step, p_w=p_w, t_w=t_w)
@@ -36,6 +36,14 @@ def model_worker(input_queue, output_queue, device_id):
36
  while True:
37
  task = input_queue.get()
38
  inp_audio_path, inp_npy_path, inp_text, infer_timestep, p_w, t_w = task
 
 
 
 
 
 
 
 
39
  try:
40
  convert_to_wav(inp_audio_path)
41
  wav_path = os.path.splitext(inp_audio_path)[0] + '.wav'
@@ -48,6 +56,7 @@ def model_worker(input_queue, output_queue, device_id):
48
  traceback.print_exc()
49
  print(task, str(e))
50
  output_queue.put(None)
 
51
 
52
 
53
  def main(inp_audio, inp_npy, inp_text, infer_timestep, p_w, t_w, processes, input_queue, output_queue):
@@ -85,6 +94,12 @@ if __name__ == '__main__':
85
  gr.Number(label="Intelligibility Weight", value=1.4),
86
  gr.Number(label="Similarity Weight", value=3.0)], outputs=[gr.Audio(label="Synthesized Audio")],
87
  title="MegaTTS3",
 
 
 
 
 
 
88
  description="Upload a speech clip as a reference for timbre, " +
89
  "upload the pre-extracted latent file, "+
90
  "input the target text, and receive the cloned voice. "+
 
26
  CUDA_AVAILABLE = torch.cuda.is_available()
27
  infer_pipe = MegaTTS3DiTInfer(device='cuda' if CUDA_AVAILABLE else 'cpu')
28
 
29
+ @spaces.GPU(duration=60)
30
  def forward_gpu(file_content, latent_file, inp_text, time_step, p_w, t_w):
31
  resource_context = infer_pipe.preprocess(file_content, latent_file)
32
  wav_bytes = infer_pipe.forward(resource_context, inp_text, time_step=time_step, p_w=p_w, t_w=t_w)
 
36
  while True:
37
  task = input_queue.get()
38
  inp_audio_path, inp_npy_path, inp_text, infer_timestep, p_w, t_w = task
39
+
40
+ if inp_npy_path is None:
41
+ raise gr.Error("Please provide .npy file")
42
+ if (inp_audio_path[:-4] != inp_npy_path[:-4]):
43
+ raise gr.Error(".npy and .wav mismatch")
44
+ if len(inp_text) > 200:
45
+ raise gr.Error("input text is too long")
46
+
47
  try:
48
  convert_to_wav(inp_audio_path)
49
  wav_path = os.path.splitext(inp_audio_path)[0] + '.wav'
 
56
  traceback.print_exc()
57
  print(task, str(e))
58
  output_queue.put(None)
59
+ raise gr.Error("Generation failed")
60
 
61
 
62
  def main(inp_audio, inp_npy, inp_text, infer_timestep, p_w, t_w, processes, input_queue, output_queue):
 
94
  gr.Number(label="Intelligibility Weight", value=1.4),
95
  gr.Number(label="Similarity Weight", value=3.0)], outputs=[gr.Audio(label="Synthesized Audio")],
96
  title="MegaTTS3",
97
+ examples=[
98
+ ['./official_test_case/范闲.wav', './official_test_case/范闲.npy', "你好呀,我是范闲。我给你读一段清泉石上流。"]
99
+ ['./official_test_case/周杰伦1.wav', './official_test_case/周杰伦1.npy', "有的时候嘛,我去台湾开演唱会的时候,会很喜欢来一碗卤肉饭的。"]
100
+ ['./official_test_case/keep_app.wav', './official_test_case/keep_app.npy', "Let do some exercise and practice more."]
101
+ ],
102
+ cache_examples=True,
103
  description="Upload a speech clip as a reference for timbre, " +
104
  "upload the pre-extracted latent file, "+
105
  "input the target text, and receive the cloned voice. "+