OpenSound commited on
Commit
bc21a92
·
verified ·
1 Parent(s): 951dfe7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -6
app.py CHANGED
@@ -333,7 +333,7 @@ def run_edit_en(seed, sub_amount, aug_text, cfg_coef, cfg_stride, prompt_length,
333
  audio_tensors = []
334
  # save segments for comparison
335
  new_audio = new_audio[0].cpu()
336
- torchaudio.save(audio_path, new_audio, codec_audio_sr)
337
 
338
  audio_tensors.append(new_audio)
339
  output_audio = get_output_audio(audio_tensors, codec_audio_sr)
@@ -423,6 +423,7 @@ def run_tts_en(seed, sub_amount, aug_text, cfg_coef, cfg_stride, prompt_length,
423
  audio_tensors = []
424
  # save segments for comparison
425
  new_audio = new_audio[0].cpu()
 
426
  torchaudio.save(audio_path, new_audio, codec_audio_sr)
427
 
428
  [new_transcript, new_segments, _, _] = transcribe_en(audio_path)
@@ -532,7 +533,7 @@ def run_edit_zh(seed, sub_amount, aug_text, cfg_coef, cfg_stride, prompt_length,
532
  audio_tensors = []
533
  # save segments for comparison
534
  new_audio = new_audio[0].cpu()
535
- torchaudio.save(audio_path, new_audio, codec_audio_sr)
536
  audio_tensors.append(new_audio)
537
  output_audio = get_output_audio(audio_tensors, codec_audio_sr)
538
 
@@ -626,6 +627,7 @@ def run_tts_zh(seed, sub_amount, aug_text, cfg_coef, cfg_stride, prompt_length,
626
  audio_tensors = []
627
  # save segments for comparison
628
  new_audio = new_audio[0].cpu()
 
629
  torchaudio.save(audio_path, new_audio, codec_audio_sr)
630
 
631
  [new_transcript, new_segments, _,_] = transcribe_zh(audio_path)
@@ -837,8 +839,8 @@ if __name__ == "__main__":
837
  info="set to 1 to use classifer-free guidance, change if you don't like the results")
838
  cfg_coef3 = gr.Number(label="cfg_coef", value=1.5,
839
  info="cfg guidance scale, 1.5 is a good value, change if you don't like the results")
840
- cfg_stride3 = gr.Number(label="cfg_stride", value=1,
841
- info="cfg stride, 1 is a good value for Mandarin, change if you don't like the results")
842
  prompt_length3 = gr.Number(label="prompt_length", value=3,
843
  info="used for tts prompt, will automatically cut the prompt audio to this length")
844
  sub_amount3 = gr.Number(label="sub_amount", value=0.12, info="margin to the left and right of the editing segment, change if you don't like the results")
@@ -891,8 +893,8 @@ if __name__ == "__main__":
891
  info="set to 1 to use classifer-free guidance, change if you don't like the results")
892
  cfg_coef4 = gr.Number(label="cfg_coef", value=1.5,
893
  info="cfg guidance scale, 1.5 is a good value, change if you don't like the results")
894
- cfg_stride4 = gr.Number(label="cfg_stride", value=1,
895
- info="cfg stride, 1 is a good value for Mandarin, change if you don't like the results")
896
  prompt_length4 = gr.Number(label="prompt_length", value=3,
897
  info="used for tts prompt, will automatically cut the prompt audio to this length")
898
  sub_amount4 = gr.Number(label="sub_amount", value=0.12, info="margin to the left and right of the editing segment, change if you don't like the results")
 
333
  audio_tensors = []
334
  # save segments for comparison
335
  new_audio = new_audio[0].cpu()
336
+ # torchaudio.save(audio_path, new_audio, codec_audio_sr)
337
 
338
  audio_tensors.append(new_audio)
339
  output_audio = get_output_audio(audio_tensors, codec_audio_sr)
 
423
  audio_tensors = []
424
  # save segments for comparison
425
  new_audio = new_audio[0].cpu()
426
+ audio_path = audio_path.replace('.','_tmp.')
427
  torchaudio.save(audio_path, new_audio, codec_audio_sr)
428
 
429
  [new_transcript, new_segments, _, _] = transcribe_en(audio_path)
 
533
  audio_tensors = []
534
  # save segments for comparison
535
  new_audio = new_audio[0].cpu()
536
+ # torchaudio.save(audio_path, new_audio, codec_audio_sr)
537
  audio_tensors.append(new_audio)
538
  output_audio = get_output_audio(audio_tensors, codec_audio_sr)
539
 
 
627
  audio_tensors = []
628
  # save segments for comparison
629
  new_audio = new_audio[0].cpu()
630
+ audio_path = audio_path.replace('.','_tmp.')
631
  torchaudio.save(audio_path, new_audio, codec_audio_sr)
632
 
633
  [new_transcript, new_segments, _,_] = transcribe_zh(audio_path)
 
839
  info="set to 1 to use classifer-free guidance, change if you don't like the results")
840
  cfg_coef3 = gr.Number(label="cfg_coef", value=1.5,
841
  info="cfg guidance scale, 1.5 is a good value, change if you don't like the results")
842
+ cfg_stride3 = gr.Number(label="cfg_stride", value=3,
843
+ info="cfg stride, 3 is a good value for Mandarin, change if you don't like the results")
844
  prompt_length3 = gr.Number(label="prompt_length", value=3,
845
  info="used for tts prompt, will automatically cut the prompt audio to this length")
846
  sub_amount3 = gr.Number(label="sub_amount", value=0.12, info="margin to the left and right of the editing segment, change if you don't like the results")
 
893
  info="set to 1 to use classifer-free guidance, change if you don't like the results")
894
  cfg_coef4 = gr.Number(label="cfg_coef", value=1.5,
895
  info="cfg guidance scale, 1.5 is a good value, change if you don't like the results")
896
+ cfg_stride4 = gr.Number(label="cfg_stride", value=3,
897
+ info="cfg stride, 3 is a good value for Mandarin, change if you don't like the results")
898
  prompt_length4 = gr.Number(label="prompt_length", value=3,
899
  info="used for tts prompt, will automatically cut the prompt audio to this length")
900
  sub_amount4 = gr.Number(label="sub_amount", value=0.12, info="margin to the left and right of the editing segment, change if you don't like the results")