Spaces:
Runtime error
Runtime error
File size: 5,260 Bytes
425b9a7 c86f2b2 425b9a7 c86f2b2 425b9a7 c86f2b2 425b9a7 63bb040 425b9a7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import os, logging, datetime, json, random
import gradio as gr
import numpy as np
import torch
import re_matching
import utils
from infer import infer, latest_version, get_net_g
import gradio as gr
from config import config
from tools.webui import reload_javascript, get_character_html
logging.basicConfig(
level=logging.INFO,
format='[%(levelname)s|%(asctime)s]%(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)
device = config.webui_config.device
if device == "mps":
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
hps = utils.get_hparams_from_file(config.webui_config.config_path)
version = hps.version if hasattr(hps, "version") else latest_version
net_g = get_net_g(model_path=config.webui_config.model, version=version, device=device, hps=hps)
with open("./css/style.css", "r", encoding="utf-8") as f:
customCSS = f.read()
with open("./assets/lines.json", "r", encoding="utf-8") as f:
full_lines = json.load(f)
def speak_fn(
text: str,
exceed_flag,
speaker="TalkFlower_CNzh",
sdp_ratio=0.2, # SDP/DP混合比
noise_scale=0.6, # 感情
noise_scale_w=0.6, # 音素长度
length_scale=0.9, # 语速
language="ZH",
reference_audio=None,
emotion=4,
interval_between_para=0.2, # 段间间隔
interval_between_sent=1, # 句间间隔
):
while text.find("\n\n") != -1:
text = text.replace("\n\n", "\n")
if len(text) > 100:
logging.info(f"Too Long Text: {text}")
if exceed_flag:
text = "不要超过100字!"
audio_value = "./assets/audios/nomorethan100.wav"
else:
text = "这句太长了,憋坏我啦!"
audio_value = "./assets/audios/overlength.wav"
exceed_flag = not exceed_flag
else:
audio_list = []
if len(text) > 42:
logging.info(f"Long Text: {text}")
para_list = re_matching.cut_para(text)
for p in para_list:
audio_list_sent = []
sent_list = re_matching.cut_sent(p)
for s in sent_list:
audio = infer(
s,
sdp_ratio=sdp_ratio,
noise_scale=noise_scale,
noise_scale_w=noise_scale_w,
length_scale=length_scale,
sid=speaker,
language=language,
hps=hps,
net_g=net_g,
device=device,
reference_audio=reference_audio,
emotion=emotion,
)
audio_list_sent.append(audio)
silence = np.zeros((int)(44100 * interval_between_sent))
audio_list_sent.append(silence)
if (interval_between_para - interval_between_sent) > 0:
silence = np.zeros((int)(44100 * (interval_between_para - interval_between_sent)))
audio_list_sent.append(silence)
audio16bit = gr.processing_utils.convert_to_16_bit_wav(np.concatenate(audio_list_sent)) # 对完整句子做音量归一
audio_list.append(audio16bit)
else:
logging.info(f"Short Text: {text}")
silence = np.zeros(hps.data.sampling_rate // 2, dtype=np.int16)
with torch.no_grad():
for piece in text.split("|"):
audio = infer(
piece,
sdp_ratio=sdp_ratio,
noise_scale=noise_scale,
noise_scale_w=noise_scale_w,
length_scale=length_scale,
sid=speaker,
language=language,
hps=hps,
net_g=net_g,
device=device,
reference_audio=reference_audio,
emotion=emotion,
)
audio16bit = gr.processing_utils.convert_to_16_bit_wav(audio)
audio_list.append(audio16bit)
audio_list.append(silence) # 将静音添加到列表中
audio_concat = np.concatenate(audio_list)
audio_value = (hps.data.sampling_rate, audio_concat)
return gr.update(value=audio_value, autoplay=True), get_character_html(text), exceed_flag, gr.update(interactive=True)
def submit_lock_fn():
return gr.update(interactive=False)
def init_fn():
gr.Info("2023-11-24: 优化长句生成效果;增加示例;更新了一些小彩蛋;画了一些大饼)")
gr.Info("Only support Chinese now. Trying to train a mutilingual model. 欢迎在 Community 中提建议~")
index = random.randint(1,7)
welcome_text = get_sentence("Welcome", index)
return get_character_html(welcome_text) #gr.update(value=f"./assets/audios/Welcome{index}.wav", autoplay=False),
def get_sentence(category, index=-1):
if index == -1:
index = random.randint(1, len(full_lines[category]))
return full_lines[category][f"{index}"]
|