Spaces:
Runtime error
Runtime error
File size: 5,263 Bytes
ddb4de0 8f472c2 ddb4de0 fa8b4be ddb4de0 8f472c2 ddb4de0 8f472c2 ddb4de0 8f472c2 a3bf837 8f472c2 ddb4de0 8f472c2 ddb4de0 8f472c2 ddb4de0 fa8b4be 8f472c2 fa8b4be 8f472c2 ddb4de0 8f472c2 ddb4de0 fa8b4be 469dc1d fa8b4be 8f472c2 469dc1d ddb4de0 fa8b4be 8f472c2 ddb4de0 fa8b4be ddb4de0 8f472c2 ddb4de0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
import gradio as gr
import numpy as np
import torch
import os
import re_matching
from tools.sentence import split_by_language, sentence_split
import utils
from infer import infer, latest_version, get_net_g
import gradio as gr
import webbrowser
from config import config
from tools.translate import translate
from tools.webui import reload_javascript
device = config.webui_config.device
if device == "mps":
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
def speak_fn(
text: str,
exceed_flag,
speaker="TalkFlower_CNzh",
sdp_ratio=0.2, # SDP/DP混合比
noise_scale=0.6, # 感情
noise_scale_w=0.6, # 音素长度
length_scale=0.9, # 语速
language="ZH",
interval_between_para=0.2, # 段间间隔
interval_between_sent=1, # 句间间隔
):
while text.find("\n\n") != -1:
text = text.replace("\n\n", "\n")
if len(text) > 100:
print(f"Too Long Text: {text}")
gr.Warning("Too long! No more than 100 characters. 一口气不要超过 100 个字,憋坏我了。")
if exceed_flag:
return gr.update(value="./assets/audios/nomorethan100.wav", autoplay=True), False
else:
return gr.update(value="./assets/audios/overlength.wav", autoplay=True), True
audio_list = []
if len(text) > 42:
print(f"Long Text: {text}")
para_list = re_matching.cut_para(text)
for p in para_list:
audio_list_sent = []
sent_list = re_matching.cut_sent(p)
for s in sent_list:
audio = infer(
s,
sdp_ratio=sdp_ratio,
noise_scale=noise_scale,
noise_scale_w=noise_scale_w,
length_scale=length_scale,
sid=speaker,
language=language,
hps=hps,
net_g=net_g,
device=device,
)
audio_list_sent.append(audio)
silence = np.zeros((int)(44100 * interval_between_sent))
audio_list_sent.append(silence)
if (interval_between_para - interval_between_sent) > 0:
silence = np.zeros(
(int)(44100 * (interval_between_para - interval_between_sent))
)
audio_list_sent.append(silence)
audio16bit = gr.processing_utils.convert_to_16_bit_wav(
np.concatenate(audio_list_sent)
) # 对完整句子做音量归一
audio_list.append(audio16bit)
else:
print(f"Short Text: {text}")
silence = np.zeros(hps.data.sampling_rate // 2, dtype=np.int16)
with torch.no_grad():
for piece in text.split("|"):
audio = infer(
piece,
sdp_ratio=sdp_ratio,
noise_scale=noise_scale,
noise_scale_w=noise_scale_w,
length_scale=length_scale,
sid=speaker,
language=language,
hps=hps,
net_g=net_g,
device=device,
)
audio16bit = gr.processing_utils.convert_to_16_bit_wav(audio)
audio_list.append(audio16bit)
audio_list.append(silence) # 将静音添加到列表中
audio_concat = np.concatenate(audio_list)
return (hps.data.sampling_rate, audio_concat), exceed_flag
def init_fn():
gr.Info("2023-11-24: 优化长句生成效果;更新了一些小彩蛋。")
gr.Info("2023-11-23: Only support Chinese now. Trying to train a mutilingual model.")
with open("./css/style.css", "r", encoding="utf-8") as f:
customCSS = f.read()
with gr.Blocks(css=customCSS) as demo:
exceed_flag = gr.State(value=False)
talkingFlowerPic = gr.HTML("""<img src="file=assets/flower-2x.webp" alt="TalkingFlowerPic">""", elem_id="talking_flower_pic")
input_text = gr.Textbox(lines=1, label="Talking Flower will say:", elem_classes="wonder-card", elem_id="input_text")
speak_button = gr.Button("Speak!", elem_id="comfirm_button", elem_classes="button wonder-card")
audio_output = gr.Audio(label="输出音频", show_label=False, autoplay=True, elem_id="audio_output", elem_classes="wonder-card")
demo.load(
init_fn,
inputs=[],
outputs=[]
)
input_text.submit(
speak_fn,
inputs=[input_text, exceed_flag],
outputs=[audio_output, exceed_flag],
)
speak_button.click(
speak_fn,
inputs=[input_text, exceed_flag],
outputs=[audio_output, exceed_flag],
)
if __name__ == "__main__":
hps = utils.get_hparams_from_file(config.webui_config.config_path)
version = hps.version if hasattr(hps, "version") else latest_version
net_g = get_net_g(model_path=config.webui_config.model, version=version, device=device, hps=hps)
reload_javascript()
demo.queue().launch(
allowed_paths=["./assets"],
show_api=False,
# server_name=server_name,
# server_port=server_port,
inbrowser=True,
)
|