Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -333,7 +333,7 @@ def run_edit_en(seed, sub_amount, aug_text, cfg_coef, cfg_stride, prompt_length,
|
|
333 |
audio_tensors = []
|
334 |
# save segments for comparison
|
335 |
new_audio = new_audio[0].cpu()
|
336 |
-
torchaudio.save(audio_path, new_audio, codec_audio_sr)
|
337 |
|
338 |
audio_tensors.append(new_audio)
|
339 |
output_audio = get_output_audio(audio_tensors, codec_audio_sr)
|
@@ -423,6 +423,7 @@ def run_tts_en(seed, sub_amount, aug_text, cfg_coef, cfg_stride, prompt_length,
|
|
423 |
audio_tensors = []
|
424 |
# save segments for comparison
|
425 |
new_audio = new_audio[0].cpu()
|
|
|
426 |
torchaudio.save(audio_path, new_audio, codec_audio_sr)
|
427 |
|
428 |
[new_transcript, new_segments, _, _] = transcribe_en(audio_path)
|
@@ -532,7 +533,7 @@ def run_edit_zh(seed, sub_amount, aug_text, cfg_coef, cfg_stride, prompt_length,
|
|
532 |
audio_tensors = []
|
533 |
# save segments for comparison
|
534 |
new_audio = new_audio[0].cpu()
|
535 |
-
torchaudio.save(audio_path, new_audio, codec_audio_sr)
|
536 |
audio_tensors.append(new_audio)
|
537 |
output_audio = get_output_audio(audio_tensors, codec_audio_sr)
|
538 |
|
@@ -626,6 +627,7 @@ def run_tts_zh(seed, sub_amount, aug_text, cfg_coef, cfg_stride, prompt_length,
|
|
626 |
audio_tensors = []
|
627 |
# save segments for comparison
|
628 |
new_audio = new_audio[0].cpu()
|
|
|
629 |
torchaudio.save(audio_path, new_audio, codec_audio_sr)
|
630 |
|
631 |
[new_transcript, new_segments, _,_] = transcribe_zh(audio_path)
|
@@ -837,8 +839,8 @@ if __name__ == "__main__":
|
|
837 |
info="set to 1 to use classifer-free guidance, change if you don't like the results")
|
838 |
cfg_coef3 = gr.Number(label="cfg_coef", value=1.5,
|
839 |
info="cfg guidance scale, 1.5 is a good value, change if you don't like the results")
|
840 |
-
cfg_stride3 = gr.Number(label="cfg_stride", value=
|
841 |
-
info="cfg stride,
|
842 |
prompt_length3 = gr.Number(label="prompt_length", value=3,
|
843 |
info="used for tts prompt, will automatically cut the prompt audio to this length")
|
844 |
sub_amount3 = gr.Number(label="sub_amount", value=0.12, info="margin to the left and right of the editing segment, change if you don't like the results")
|
@@ -891,8 +893,8 @@ if __name__ == "__main__":
|
|
891 |
info="set to 1 to use classifer-free guidance, change if you don't like the results")
|
892 |
cfg_coef4 = gr.Number(label="cfg_coef", value=1.5,
|
893 |
info="cfg guidance scale, 1.5 is a good value, change if you don't like the results")
|
894 |
-
cfg_stride4 = gr.Number(label="cfg_stride", value=
|
895 |
-
info="cfg stride,
|
896 |
prompt_length4 = gr.Number(label="prompt_length", value=3,
|
897 |
info="used for tts prompt, will automatically cut the prompt audio to this length")
|
898 |
sub_amount4 = gr.Number(label="sub_amount", value=0.12, info="margin to the left and right of the editing segment, change if you don't like the results")
|
|
|
333 |
audio_tensors = []
|
334 |
# save segments for comparison
|
335 |
new_audio = new_audio[0].cpu()
|
336 |
+
# torchaudio.save(audio_path, new_audio, codec_audio_sr)
|
337 |
|
338 |
audio_tensors.append(new_audio)
|
339 |
output_audio = get_output_audio(audio_tensors, codec_audio_sr)
|
|
|
423 |
audio_tensors = []
|
424 |
# save segments for comparison
|
425 |
new_audio = new_audio[0].cpu()
|
426 |
+
audio_path = audio_path.replace('.','_tmp.')
|
427 |
torchaudio.save(audio_path, new_audio, codec_audio_sr)
|
428 |
|
429 |
[new_transcript, new_segments, _, _] = transcribe_en(audio_path)
|
|
|
533 |
audio_tensors = []
|
534 |
# save segments for comparison
|
535 |
new_audio = new_audio[0].cpu()
|
536 |
+
# torchaudio.save(audio_path, new_audio, codec_audio_sr)
|
537 |
audio_tensors.append(new_audio)
|
538 |
output_audio = get_output_audio(audio_tensors, codec_audio_sr)
|
539 |
|
|
|
627 |
audio_tensors = []
|
628 |
# save segments for comparison
|
629 |
new_audio = new_audio[0].cpu()
|
630 |
+
audio_path = audio_path.replace('.','_tmp.')
|
631 |
torchaudio.save(audio_path, new_audio, codec_audio_sr)
|
632 |
|
633 |
[new_transcript, new_segments, _,_] = transcribe_zh(audio_path)
|
|
|
839 |
info="set to 1 to use classifer-free guidance, change if you don't like the results")
|
840 |
cfg_coef3 = gr.Number(label="cfg_coef", value=1.5,
|
841 |
info="cfg guidance scale, 1.5 is a good value, change if you don't like the results")
|
842 |
+
cfg_stride3 = gr.Number(label="cfg_stride", value=3,
|
843 |
+
info="cfg stride, 3 is a good value for Mandarin, change if you don't like the results")
|
844 |
prompt_length3 = gr.Number(label="prompt_length", value=3,
|
845 |
info="used for tts prompt, will automatically cut the prompt audio to this length")
|
846 |
sub_amount3 = gr.Number(label="sub_amount", value=0.12, info="margin to the left and right of the editing segment, change if you don't like the results")
|
|
|
893 |
info="set to 1 to use classifer-free guidance, change if you don't like the results")
|
894 |
cfg_coef4 = gr.Number(label="cfg_coef", value=1.5,
|
895 |
info="cfg guidance scale, 1.5 is a good value, change if you don't like the results")
|
896 |
+
cfg_stride4 = gr.Number(label="cfg_stride", value=3,
|
897 |
+
info="cfg stride, 3 is a good value for Mandarin, change if you don't like the results")
|
898 |
prompt_length4 = gr.Number(label="prompt_length", value=3,
|
899 |
info="used for tts prompt, will automatically cut the prompt audio to this length")
|
900 |
sub_amount4 = gr.Number(label="sub_amount", value=0.12, info="margin to the left and right of the editing segment, change if you don't like the results")
|