Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -3,7 +3,6 @@ import os
|
|
3 |
from pathlib import Path
|
4 |
|
5 |
import logging
|
6 |
-
import uuid
|
7 |
import re_matching
|
8 |
|
9 |
logging.getLogger("numba").setLevel(logging.WARNING)
|
@@ -59,9 +58,16 @@ from bs4 import BeautifulSoup
|
|
59 |
import jieba
|
60 |
import romajitable
|
61 |
|
62 |
-
|
63 |
-
|
64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
net_g = None
|
66 |
|
67 |
device = (
|
@@ -91,18 +97,7 @@ BandList = {
|
|
91 |
"西克菲尔特音乐学院":["晶","未知留","八千代","栞","美帆"]
|
92 |
}
|
93 |
|
94 |
-
|
95 |
-
|
96 |
-
port = 7860
|
97 |
-
|
98 |
-
languages = [ "Auto", "ZH", "JP"]
|
99 |
-
modelPaths = []
|
100 |
-
modes = ['pyopenjtalk-V2.3-Katakana','fugashi-V2.3-Katakana','pyopenjtalk-V2.3-Katakana-Katakana','fugashi-V2.3-Katakana-Katakana','onnx-V2.3']
|
101 |
-
sentence_modes = ['sentence','paragraph']
|
102 |
-
for dirpath, dirnames, filenames in os.walk('Data/BangDream/models/'):
|
103 |
-
for filename in filenames:
|
104 |
-
modelPaths.append(os.path.join(dirpath, filename))
|
105 |
-
hps = utils.get_hparams_from_file('Data/BangDream/config.json')
|
106 |
|
107 |
def translate(Sentence: str, to_Language: str = "jp", from_Language: str = ""):
|
108 |
"""
|
@@ -508,14 +503,14 @@ def infer(
|
|
508 |
style_text=None,
|
509 |
style_weight=0.7,
|
510 |
language = "Auto",
|
511 |
-
mode = 'pyopenjtalk-V2.3
|
512 |
skip_start=False,
|
513 |
skip_end=False,
|
514 |
):
|
515 |
if style_text == None:
|
516 |
style_text = ""
|
517 |
style_weight=0,
|
518 |
-
if mode == 'fugashi-V2.3
|
519 |
text = kanji_to_hiragana(text) if is_japanese(text) else text
|
520 |
if language == "JP":
|
521 |
text = translate(text,"jp")
|
@@ -635,7 +630,7 @@ def generate_audio_and_srt_for_group(
|
|
635 |
"""
|
636 |
|
637 |
for sentence in group:
|
638 |
-
|
639 |
if len(sentence) > 1:
|
640 |
FakeSpeaker = sentence.split("|")[0]
|
641 |
print(FakeSpeaker)
|
@@ -647,7 +642,7 @@ def generate_audio_and_srt_for_group(
|
|
647 |
speaker = i.split("|")[0]
|
648 |
if sentence != '\n':
|
649 |
text = (remove_annotations(sentence.split("|")[-1]).replace(" ","")+"。").replace(",。","。")
|
650 |
-
if mode == 'pyopenjtalk-V2.3
|
651 |
#print(f'{text}:{sdp_ratio}:{noise_scale}:{noise_scale_w}:{length_scale}:{length_scale}:{speaker}:{language}:{mode}:{skip_start}:{skip_end}')
|
652 |
audio = infer(
|
653 |
text,
|
@@ -672,7 +667,8 @@ def generate_audio_and_srt_for_group(
|
|
672 |
end_time = start_time + duration + silenceTime
|
673 |
ass_entries.append("Dialogue: 0,{},{},".format(seconds_to_ass_time(start_time), seconds_to_ass_time(end_time)) + "Default,,0,0,0,,{}".format(sentence.replace("|",":")))
|
674 |
start_time = end_time
|
675 |
-
|
|
|
676 |
wav_filename = os.path.join(outputPath, f'audiobook_part_{group_index}.wav')
|
677 |
ass_filename = os.path.join(outputPath, f'audiobook_part_{group_index}.ass')
|
678 |
write(wav_filename, sampling_rate, gr.processing_utils.convert_to_16_bit_wav(np.concatenate(audio_fin)))
|
@@ -683,7 +679,7 @@ def generate_audio_and_srt_for_group(
|
|
683 |
|
684 |
def generate_audio(
|
685 |
inputFile,
|
686 |
-
|
687 |
filepath,
|
688 |
silenceTime,
|
689 |
speakerList,
|
@@ -696,12 +692,15 @@ def generate_audio(
|
|
696 |
style_text=None,
|
697 |
style_weight=0.7,
|
698 |
language = "Auto",
|
699 |
-
mode = 'pyopenjtalk-V2.3
|
700 |
sentence_mode = 'sentence',
|
701 |
skip_start=False,
|
702 |
skip_end=False,
|
703 |
):
|
704 |
-
if
|
|
|
|
|
|
|
705 |
if sentence_mode == 'sentence':
|
706 |
audio = infer(
|
707 |
text,
|
@@ -719,13 +718,11 @@ def generate_audio(
|
|
719 |
)
|
720 |
return (hps.data.sampling_rate,gr.processing_utils.convert_to_16_bit_wav(audio))
|
721 |
if sentence_mode == 'paragraph':
|
722 |
-
GROUP_SIZE =
|
723 |
directory_path = filepath if torch.cuda.is_available() else "books"
|
724 |
if os.path.exists(directory_path):
|
725 |
shutil.rmtree(directory_path)
|
726 |
os.makedirs(directory_path)
|
727 |
-
if inputFile:
|
728 |
-
text = extract_text_from_file(inputFile.name)
|
729 |
if language == 'Auto':
|
730 |
sentences = extrac(extract_and_convert(text))
|
731 |
else:
|
@@ -756,129 +753,167 @@ def generate_audio(
|
|
756 |
if not torch.cuda.is_available():
|
757 |
return result
|
758 |
return result
|
759 |
-
|
760 |
-
|
761 |
-
|
762 |
-
|
763 |
-
|
764 |
-
|
765 |
-
|
766 |
-
|
767 |
-
|
768 |
-
|
769 |
-
|
770 |
-
|
771 |
-
|
772 |
-
|
773 |
-
|
774 |
-
|
775 |
-
|
776 |
-
|
777 |
-
|
778 |
-
|
779 |
-
|
780 |
-
|
781 |
-
|
782 |
-
|
783 |
-
|
784 |
-
|
785 |
-
|
786 |
-
|
787 |
-
groupSize,
|
788 |
-
filepath,
|
789 |
-
silenceTime,
|
790 |
-
speakerList,
|
791 |
-
text,
|
792 |
-
sdp_ratio,
|
793 |
-
noise_scale,
|
794 |
-
noise_scale_w,
|
795 |
-
length_scale,
|
796 |
-
sid,
|
797 |
-
style_text,
|
798 |
-
style_weight,
|
799 |
-
language,
|
800 |
-
mode,
|
801 |
-
sentence_mode,
|
802 |
-
skip_start,
|
803 |
-
skip_end,
|
804 |
-
)
|
805 |
-
unique_filename = f"temp{uuid.uuid4()}.wav"
|
806 |
-
write(unique_filename, samplerate, audio)
|
807 |
-
with open(unique_filename ,'rb') as bit:
|
808 |
-
wav_bytes = bit.read()
|
809 |
-
os.remove(unique_filename)
|
810 |
-
headers = {
|
811 |
-
'Content-Type': 'audio/wav',
|
812 |
-
'Text': unique_filename .encode('utf-8')}
|
813 |
-
return wav_bytes, 200, headers
|
814 |
-
groupSize = request.args.get('groupSize', default = 50, type = int)
|
815 |
-
text = request.args.get('text', default = '', type = str)
|
816 |
-
sdp_ratio = request.args.get('sdp_ratio', default = 0.5, type = float)
|
817 |
-
noise_scale = request.args.get('noise_scale', default = 0.6, type = float)
|
818 |
-
noise_scale_w = request.args.get('noise_scale_w', default = 0.667, type = float)
|
819 |
-
length_scale = request.args.get('length_scale', default = 1, type = float)
|
820 |
-
sid = request.args.get('speaker', default = '八千代', type = str)
|
821 |
-
style_text = request.args.get('style_text', default = '', type = str)
|
822 |
-
style_weight = request.args.get('style_weight', default = 0.7, type = float)
|
823 |
-
language = request.args.get('language', default = 'Auto', type = str)
|
824 |
-
mode = request.args.get('mode', default = 'pyopenjtalk-V2.3-Katakana', type = str)
|
825 |
-
sentence_mode = request.args.get('sentence_mode', default = 'sentence', type = str)
|
826 |
-
skip_start = request.args.get('skip_start', default = False, type = bool)
|
827 |
-
skip_end = request.args.get('skip_end', default = False, type = bool)
|
828 |
-
speakerList = request.args.get('speakerList', default = '', type = str)
|
829 |
-
silenceTime = request.args.get('silenceTime', default = 0.1, type = float)
|
830 |
-
inputFile = None
|
831 |
-
if not sid or not text:
|
832 |
-
return render_template_string(f"""
|
833 |
-
<!DOCTYPE html>
|
834 |
-
<html>
|
835 |
-
<head>
|
836 |
-
<title>TTS API Documentation</title>
|
837 |
-
</head>
|
838 |
-
<body>
|
839 |
-
<iframe src={webBase} style="width:100%; height:100vh; border:none;"></iframe>
|
840 |
-
</body>
|
841 |
-
</html>
|
842 |
-
""")
|
843 |
-
samplerate, audio = generate_audio(
|
844 |
-
inputFile,
|
845 |
-
groupSize,
|
846 |
-
None,
|
847 |
-
silenceTime,
|
848 |
-
speakerList,
|
849 |
-
text,
|
850 |
-
sdp_ratio,
|
851 |
-
noise_scale,
|
852 |
-
noise_scale_w,
|
853 |
-
length_scale,
|
854 |
-
sid,
|
855 |
-
style_text,
|
856 |
-
style_weight,
|
857 |
-
language,
|
858 |
-
mode,
|
859 |
-
sentence_mode,
|
860 |
-
skip_start,
|
861 |
-
skip_end,
|
862 |
-
)
|
863 |
-
unique_filename = f"temp{uuid.uuid4()}.wav"
|
864 |
-
write(unique_filename, samplerate, audio)
|
865 |
-
with open(unique_filename ,'rb') as bit:
|
866 |
-
wav_bytes = bit.read()
|
867 |
-
os.remove(unique_filename)
|
868 |
-
headers = {
|
869 |
-
'Content-Type': 'audio/wav',
|
870 |
-
'Text': unique_filename .encode('utf-8')}
|
871 |
-
return wav_bytes, 200, headers
|
872 |
-
|
873 |
|
874 |
if __name__ == "__main__":
|
875 |
download_unidic()
|
876 |
tagger = Tagger()
|
|
|
|
|
|
|
|
|
877 |
net_g = get_net_g(
|
878 |
model_path=modelPaths[-1], device=device, hps=hps
|
879 |
)
|
880 |
speaker_ids = hps.data.spk2id
|
881 |
speakers = list(speaker_ids.keys())
|
882 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
883 |
print("推理页面已开启!")
|
884 |
-
|
|
|
3 |
from pathlib import Path
|
4 |
|
5 |
import logging
|
|
|
6 |
import re_matching
|
7 |
|
8 |
logging.getLogger("numba").setLevel(logging.WARNING)
|
|
|
58 |
import jieba
|
59 |
import romajitable
|
60 |
|
61 |
+
webBase = {
|
62 |
+
'pyopenjtalk-V2.3-Katakana': 'https://mahiruoshi-mygo-vits-bert.hf.space/',
|
63 |
+
'fugashi-V2.3-Katakana': 'https://mahiruoshi-mygo-vits-bert.hf.space/',
|
64 |
+
}
|
65 |
+
|
66 |
+
languages = [ "Auto", "ZH", "JP"]
|
67 |
+
modelPaths = []
|
68 |
+
modes = ['pyopenjtalk-V2.3','fugashi-V2.3']
|
69 |
+
sentence_modes = ['sentence','paragraph']
|
70 |
+
|
71 |
net_g = None
|
72 |
|
73 |
device = (
|
|
|
97 |
"西克菲尔特音乐学院":["晶","未知留","八千代","栞","美帆"]
|
98 |
}
|
99 |
|
100 |
+
#翻译
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
|
102 |
def translate(Sentence: str, to_Language: str = "jp", from_Language: str = ""):
|
103 |
"""
|
|
|
503 |
style_text=None,
|
504 |
style_weight=0.7,
|
505 |
language = "Auto",
|
506 |
+
mode = 'pyopenjtalk-V2.3',
|
507 |
skip_start=False,
|
508 |
skip_end=False,
|
509 |
):
|
510 |
if style_text == None:
|
511 |
style_text = ""
|
512 |
style_weight=0,
|
513 |
+
if mode == 'fugashi-V2.3':
|
514 |
text = kanji_to_hiragana(text) if is_japanese(text) else text
|
515 |
if language == "JP":
|
516 |
text = translate(text,"jp")
|
|
|
630 |
"""
|
631 |
|
632 |
for sentence in group:
|
633 |
+
try:
|
634 |
if len(sentence) > 1:
|
635 |
FakeSpeaker = sentence.split("|")[0]
|
636 |
print(FakeSpeaker)
|
|
|
642 |
speaker = i.split("|")[0]
|
643 |
if sentence != '\n':
|
644 |
text = (remove_annotations(sentence.split("|")[-1]).replace(" ","")+"。").replace(",。","。")
|
645 |
+
if mode == 'pyopenjtalk-V2.3' or mode == 'fugashi-V2.3':
|
646 |
#print(f'{text}:{sdp_ratio}:{noise_scale}:{noise_scale_w}:{length_scale}:{length_scale}:{speaker}:{language}:{mode}:{skip_start}:{skip_end}')
|
647 |
audio = infer(
|
648 |
text,
|
|
|
667 |
end_time = start_time + duration + silenceTime
|
668 |
ass_entries.append("Dialogue: 0,{},{},".format(seconds_to_ass_time(start_time), seconds_to_ass_time(end_time)) + "Default,,0,0,0,,{}".format(sentence.replace("|",":")))
|
669 |
start_time = end_time
|
670 |
+
except:
|
671 |
+
pass
|
672 |
wav_filename = os.path.join(outputPath, f'audiobook_part_{group_index}.wav')
|
673 |
ass_filename = os.path.join(outputPath, f'audiobook_part_{group_index}.ass')
|
674 |
write(wav_filename, sampling_rate, gr.processing_utils.convert_to_16_bit_wav(np.concatenate(audio_fin)))
|
|
|
679 |
|
680 |
def generate_audio(
|
681 |
inputFile,
|
682 |
+
groupSize,
|
683 |
filepath,
|
684 |
silenceTime,
|
685 |
speakerList,
|
|
|
692 |
style_text=None,
|
693 |
style_weight=0.7,
|
694 |
language = "Auto",
|
695 |
+
mode = 'pyopenjtalk-V2.3',
|
696 |
sentence_mode = 'sentence',
|
697 |
skip_start=False,
|
698 |
skip_end=False,
|
699 |
):
|
700 |
+
if inputFile:
|
701 |
+
text = extract_text_from_file(inputFile.name)
|
702 |
+
sentence_mode = 'paragraph'
|
703 |
+
if mode == 'pyopenjtalk-V2.3' or mode == 'fugashi-V2.3':
|
704 |
if sentence_mode == 'sentence':
|
705 |
audio = infer(
|
706 |
text,
|
|
|
718 |
)
|
719 |
return (hps.data.sampling_rate,gr.processing_utils.convert_to_16_bit_wav(audio))
|
720 |
if sentence_mode == 'paragraph':
|
721 |
+
GROUP_SIZE = groupSize
|
722 |
directory_path = filepath if torch.cuda.is_available() else "books"
|
723 |
if os.path.exists(directory_path):
|
724 |
shutil.rmtree(directory_path)
|
725 |
os.makedirs(directory_path)
|
|
|
|
|
726 |
if language == 'Auto':
|
727 |
sentences = extrac(extract_and_convert(text))
|
728 |
else:
|
|
|
753 |
if not torch.cuda.is_available():
|
754 |
return result
|
755 |
return result
|
756 |
+
#url = f'{webBase[mode]}?text={text}&speaker={sid}&sdp_ratio={sdp_ratio}&noise_scale={noise_scale}&noise_scale_w={noise_scale_w}&length_scale={length_scale}&language={language}&skip_start={skip_start}&skip_end={skip_end}'
|
757 |
+
#print(url)
|
758 |
+
#res = requests.get(url)
|
759 |
+
#改用post
|
760 |
+
res = requests.post(webBase[mode], json = {
|
761 |
+
"groupSize": groupSize,
|
762 |
+
"filepath": filepath,
|
763 |
+
"silenceTime": silenceTime,
|
764 |
+
"speakerList": speakerList,
|
765 |
+
"text": text,
|
766 |
+
"speaker": sid,
|
767 |
+
"sdp_ratio": sdp_ratio,
|
768 |
+
"noise_scale": noise_scale,
|
769 |
+
"noise_scale_w": noise_scale_w,
|
770 |
+
"length_scale": length_scale,
|
771 |
+
"language": language,
|
772 |
+
"skip_start": skip_start,
|
773 |
+
"skip_end": skip_end,
|
774 |
+
"mode": mode,
|
775 |
+
"sentence_mode": sentence_mode,
|
776 |
+
"style_text": style_text,
|
777 |
+
"style_weight": style_weight
|
778 |
+
})
|
779 |
+
audio = res.content
|
780 |
+
with open('output.wav', 'wb') as code:
|
781 |
+
code.write(audio)
|
782 |
+
file_path = "output.wav"
|
783 |
+
return file_path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
784 |
|
785 |
if __name__ == "__main__":
|
786 |
download_unidic()
|
787 |
tagger = Tagger()
|
788 |
+
for dirpath, dirnames, filenames in os.walk('Data/BangDream/models/'):
|
789 |
+
for filename in filenames:
|
790 |
+
modelPaths.append(os.path.join(dirpath, filename))
|
791 |
+
hps = utils.get_hparams_from_file('Data/BangDream/config.json')
|
792 |
net_g = get_net_g(
|
793 |
model_path=modelPaths[-1], device=device, hps=hps
|
794 |
)
|
795 |
speaker_ids = hps.data.spk2id
|
796 |
speakers = list(speaker_ids.keys())
|
797 |
+
with gr.Blocks() as app:
|
798 |
+
gr.Markdown(value="""
|
799 |
+
([Bert-Vits2](https://github.com/Stardust-minus/Bert-VITS2) V2.3)少歌邦邦全员在线语音合成\n
|
800 |
+
[好玩的](http://love.soyorin.top/)\n
|
801 |
+
该界面的真实链接(国内可用): https://mahiruoshi-bangdream-bert-vits2.hf.space/\n
|
802 |
+
API: https://mahiruoshi-bert-vits2-api.hf.space/ \n
|
803 |
+
调用方式: https://mahiruoshi-bert-vits2-api.hf.space/?text={{speakText}}&speaker=chosen_speaker\n
|
804 |
+
推荐搭配[Legado开源阅读](https://github.com/gedoor/legado)或[聊天bot](https://github.com/Paraworks/BangDreamAi)使用\n
|
805 |
+
二创请标注作者:B站@Mahiroshi: https://space.bilibili.com/19874615\n
|
806 |
+
训练数据集归属:BangDream及少歌手游,提取自BestDori,[数据集获取流程](https://nijigaku.top/2023/09/29/Bestbushiroad%E8%AE%A1%E5%88%92-vits-%E9%9F%B3%E9%A2%91%E6%8A%93%E5%8F%96%E5%8F%8A%E6%95%B0%E6%8D%AE%E9%9B%86%E5%AF%B9%E9%BD%90/)\n
|
807 |
+
BangDream数据集下载[链接](https://huggingface.co/spaces/Mahiruoshi/BangDream-Bert-VITS2/blob/main/%E7%88%AC%E8%99%AB/SortPathUrl.txt)\n
|
808 |
+
!!!注意:huggingface容器仅用作展示,建议在右上角更多选项中克隆本项目或Docker运行app.py/server.py,环境参考requirements.txt\n""")
|
809 |
+
for band in BandList:
|
810 |
+
with gr.TabItem(band):
|
811 |
+
for name in BandList[band]:
|
812 |
+
with gr.TabItem(name):
|
813 |
+
with gr.Row():
|
814 |
+
with gr.Column():
|
815 |
+
with gr.Row():
|
816 |
+
gr.Markdown(
|
817 |
+
'<div align="center">'
|
818 |
+
f'<img style="width:auto;height:400px;" src="https://mahiruoshi-bangdream-bert-vits2.hf.space/file/image/{name}.png">'
|
819 |
+
'</div>'
|
820 |
+
)
|
821 |
+
with gr.Accordion(label="参数设定", open=False):
|
822 |
+
sdp_ratio = gr.Slider(
|
823 |
+
minimum=0, maximum=1, value=0.5, step=0.01, label="SDP/DP混合比"
|
824 |
+
)
|
825 |
+
noise_scale = gr.Slider(
|
826 |
+
minimum=0.1, maximum=2, value=0.6, step=0.01, label="Noise:感情调节"
|
827 |
+
)
|
828 |
+
noise_scale_w = gr.Slider(
|
829 |
+
minimum=0.1, maximum=2, value=0.667, step=0.01, label="Noise_W:音素长度"
|
830 |
+
)
|
831 |
+
skip_start = gr.Checkbox(label="skip_start")
|
832 |
+
skip_end = gr.Checkbox(label="skip_end")
|
833 |
+
speaker = gr.Dropdown(
|
834 |
+
choices=speakers, value=name, label="说话人"
|
835 |
+
)
|
836 |
+
length_scale = gr.Slider(
|
837 |
+
minimum=0.1, maximum=2, value=1, step=0.01, label="语速调节"
|
838 |
+
)
|
839 |
+
language = gr.Dropdown(
|
840 |
+
choices=languages, value="Auto", label="语言选择,若不选自动则会将输入语言翻译为日语或中文"
|
841 |
+
)
|
842 |
+
mode = gr.Dropdown(
|
843 |
+
choices=modes, value="pyopenjtalk-V2.3", label="TTS模式,合成少歌角色需要切换成 pyopenjtalk-V2.3-Katakana "
|
844 |
+
)
|
845 |
+
sentence_mode = gr.Dropdown(
|
846 |
+
choices=sentence_modes, value="sentence", label="���本合成模式"
|
847 |
+
)
|
848 |
+
with gr.Accordion(label="扩展选项", open=False):
|
849 |
+
inputFile = gr.UploadButton(label="txt文件输入")
|
850 |
+
speakerList = gr.TextArea(
|
851 |
+
label="角色对应表,如果你记不住角色名可以这样,左边是你想要在每一句话合成中用到的speaker(见角色清单)右边是你上传文本时分隔符左边设置的说话人:{ChoseSpeakerFromConfigList}|{SeakerInUploadText}",
|
852 |
+
value = "ましろ|真白\n七深|七深\n透子|透子\nつくし|筑紫\n瑠唯|瑠唯\nそよ|素世\n祥子|祥子",
|
853 |
+
)
|
854 |
+
groupSize = gr.Slider(
|
855 |
+
minimum=10, maximum=1000 if torch.cuda.is_available() else 50,value = 50, step=1, label="单个音频文件包含的最大句子数"
|
856 |
+
)
|
857 |
+
filepath = gr.TextArea(
|
858 |
+
label="本地合成时的音频存储文件夹(会清空文件夹,别把C盘删了)",
|
859 |
+
value = "D:/audiobook/book1",
|
860 |
+
)
|
861 |
+
silenceTime = gr.Slider(
|
862 |
+
minimum=0, maximum=1, value=0.5, step=0.01, label="句子的间隔"
|
863 |
+
)
|
864 |
+
modelstrs = gr.Dropdown(label = "模型", choices = modelPaths, value = modelPaths[0], type = "value")
|
865 |
+
btnMod = gr.Button("载入模型")
|
866 |
+
statusa = gr.TextArea(label = "模型加载状态")
|
867 |
+
btnMod.click(loadmodel, inputs=[modelstrs], outputs = [statusa])
|
868 |
+
with gr.Column():
|
869 |
+
text = gr.TextArea(
|
870 |
+
label="文本输入,可用'|'分割说话人和文本,注意换行",
|
871 |
+
info="输入纯日语或者中文",
|
872 |
+
#placeholder=f"{name}|你觉得你是职业歌手吗\n真白|我觉得我是",
|
873 |
+
value=f"{name}|你觉得你是职业歌手吗\n真白|我觉得我是"
|
874 |
+
)
|
875 |
+
style_text = gr.Textbox(
|
876 |
+
label="情感辅助文本",
|
877 |
+
info="语言保持跟主文本一致,文本可以参考训练集:https://huggingface.co/spaces/Mahiruoshi/BangDream-Bert-VITS2/blob/main/filelists/Mygo.list)",
|
878 |
+
placeholder="使用辅助文本的语意来辅助生成对话(语言保持与主文本相同)\n\n"
|
879 |
+
"**注意**:不要使用**指令式文本**(如:开心),要使用**带有强烈情感的文本**(如:我好快乐!!!)"
|
880 |
+
)
|
881 |
+
style_weight = gr.Slider(
|
882 |
+
minimum=0,
|
883 |
+
maximum=1,
|
884 |
+
value=0.7,
|
885 |
+
step=0.1,
|
886 |
+
label="Weight",
|
887 |
+
info="主文本和辅助文本的bert混合比率,0表示仅主文本,1表示仅辅助文本",
|
888 |
+
)
|
889 |
+
btn = gr.Button("点击生成", variant="primary")
|
890 |
+
audio_output = gr.Audio(label="Output Audio")
|
891 |
+
btntran = gr.Button("快速中翻日")
|
892 |
+
translateResult = gr.TextArea(label="使用百度翻译",placeholder="从这里复制翻译后的文本")
|
893 |
+
btntran.click(translate, inputs=[text], outputs = [translateResult])
|
894 |
+
btn.click(
|
895 |
+
generate_audio,
|
896 |
+
inputs=[
|
897 |
+
inputFile,
|
898 |
+
groupSize,
|
899 |
+
filepath,
|
900 |
+
silenceTime,
|
901 |
+
speakerList,
|
902 |
+
text,
|
903 |
+
sdp_ratio,
|
904 |
+
noise_scale,
|
905 |
+
noise_scale_w,
|
906 |
+
length_scale,
|
907 |
+
speaker,
|
908 |
+
style_text,
|
909 |
+
style_weight,
|
910 |
+
language,
|
911 |
+
mode,
|
912 |
+
sentence_mode,
|
913 |
+
skip_start,
|
914 |
+
skip_end
|
915 |
+
],
|
916 |
+
outputs=[audio_output],
|
917 |
+
)
|
918 |
print("推理页面已开启!")
|
919 |
+
app.launch()
|