Spaces:
Build error
Build error
File size: 5,029 Bytes
69c7b60 06441c0 69c7b60 06441c0 69c7b60 06441c0 3d7a6b8 06441c0 f7c1f1e 06441c0 f7c1f1e 4c7f733 1ca47ff f7c1f1e 4c7f733 4149c50 24f022c 06441c0 1ca47ff 06441c0 4c7f733 eed69cb f7c1f1e 1ca47ff |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
import gradio as gr
# import matplotlib.pyplot as plt
import logging
# logger = logging.getLogger(__name__)
import os
import json
import math
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader
import commons
import utils
from data_utils import TextAudioLoader, TextAudioCollate, TextAudioSpeakerLoader, TextAudioSpeakerCollate
from models import SynthesizerTrn
from text.symbols import symbols
from text import text_to_sequence
import time
def get_text(text, hps):
# text_norm = requests.post("http://121.5.171.42:39001/texttosequence?text="+text).json()["text_norm"]
text_norm = text_to_sequence(text, hps.data.text_cleaners)
# print(hps.data.text_cleaners)
# print(text_norm)
if hps.data.add_blank:
text_norm = commons.intersperse(text_norm, 0)
text_norm = torch.LongTensor(text_norm)
return text_norm
def load_model(config_path, pth_path):
global dev, hps_ms, net_g
dev = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
hps_ms = utils.get_hparams_from_file(config_path)
net_g = SynthesizerTrn(
len(symbols),
hps_ms.data.filter_length // 2 + 1,
hps_ms.train.segment_size // hps_ms.data.hop_length,
**hps_ms.model).to(dev)
_ = net_g.eval()
_ = utils.load_checkpoint(pth_path, net_g)
print(f"{pth_path}加载成功!")
def infer(c_id, text):
stn_tst = get_text(text, hps_ms)
with torch.no_grad():
x_tst = stn_tst.to(dev).unsqueeze(0)
x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).to(dev)
sid = torch.LongTensor([c_id]).to(dev)
audio = net_g.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][0,0].data.cpu().float().numpy()
return audio
pth_path = "model/G_70000.pth"
config_path = "configs/config.json"
character_dict = {
"十香": 1,
"折纸": 2,
"狂三": 3,
"四糸乃": 4,
"琴里": 5,
"夕弦": 6,
"耶俱矢": 7,
"美九": 8,
"凛祢": 9,
"凛绪": 10,
"鞠亚": 11,
"鞠奈": 12,
"真那": 13,
}
load_model(config_path, pth_path)
'''app = gr.Blocks()
with app:
gr.HTML("""
<div
style="width: 100%;padding-top:116px;background-image: url('https://huggingface.co/spaces/tumuyan/vits-miki/resolve/main/bg.webp');;background-size:cover">
<div>
<div>
<h4 class="h-sign" style="font-size: 12px;">
这是一个使用<a href="https://github.com/thesupersonic16/DALTools" target="_blank">thesupersonic16/DALTools</a>提供的解包音频作为数据集,
使用<a href="https://github.com/jaywalnut310/vits" target="_blank">VITS</a>技术训练的语音合成demo。
</h4>
</div>
</div>
</div>
""")
tmp = gr.Markdown("")
with gr.Tabs():
with gr.TabItem("Basic"):
# with gr.Row():
# model_submit = gr.Button("加载/重载模型", variant="primary")
with gr.Row():
tts_input1 = gr.TextArea(
label="请输入文本(仅支持日语)", value="你好,世界!")
tts_input2 = gr.Dropdown([character_dict.keys], type="value",label="选择角色")
tts_submit = gr.Button("用文本合成", variant="primary")
tts_output2 = gr.Audio(label="Output")
# model_submit.click(load_model, [config_path, pth_path])
tts_submit.click(infer, [character_dict[tts_input2], tts_input1], [tts_output2])
gr.HTML("""
<div style="text-align:center">
仅供学习交流,不可用于商业或非法用途
<br/>
使用本项目模型直接或间接生成的音频,必须声明由AI技术或VITS技术合成
</div>
""")
app.launch()'''
gr.HTML("""
<div
style="width: 100%;padding-top:116px;background-image: url('https://huggingface.co/spaces/tumuyan/vits-miki/resolve/main/bg.webp');;background-size:cover">
<div>
<div>
<h4 class="h-sign" style="font-size: 12px;">
这是一个使用<a href="https://github.com/thesupersonic16/DALTools" target="_blank">thesupersonic16/DALTools</a>提供的解包音频作为数据集,
使用<a href="https://github.com/jaywalnut310/vits" target="_blank">VITS</a>技术训练的语音合成demo。
</h4>
</div>
</div>
</div>
""")
demo = gr.Interface(
fn=infer,
inputs=[gr.Dropdown([character_dict.keys], type="value",label="选择角色"), gr.TextArea(label="请输入文本(仅支持日语)", value="你好,世界!")],
outputs=[gr.Audio(label="Output")],
)
demo.launch()
gr.HTML("""
<div style="text-align:center">
仅供学习交流,不可用于商业或非法用途
<br/>
使用本项目模型直接或间接生成的音频,必须声明由AI技术或VITS技术合成
</div>
""") |