Spaces:
Build error
Build error
File size: 3,556 Bytes
69c7b60 06441c0 69c7b60 06441c0 69c7b60 06441c0 7c27bc6 06441c0 7c27bc6 06441c0 7c27bc6 06441c0 7c27bc6 06441c0 3d7a6b8 06441c0 a87d03d 7c27bc6 06441c0 7c27bc6 f7c1f1e 06441c0 f7c1f1e 4c7f733 8d569d2 f7c1f1e 4c7f733 4149c50 7c27bc6 4c7f733 a87d03d f7c1f1e 8d569d2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
import gradio as gr
# import matplotlib.pyplot as plt
import logging
# logger = logging.getLogger(__name__)
import os
import json
import math
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader
import commons
import utils
from data_utils import TextAudioLoader, TextAudioCollate, TextAudioSpeakerLoader, TextAudioSpeakerCollate
from models import SynthesizerTrn
from text.symbols import symbols
from text import text_to_sequence
import time
def get_text(text, hps):
# text_norm = requests.post("http://121.5.171.42:39001/texttosequence?text="+text).json()["text_norm"]
text_norm = text_to_sequence(text, hps.data.text_cleaners)
# print(hps.data.text_cleaners)
# print(text_norm)
if hps.data.add_blank:
text_norm = commons.intersperse(text_norm, 0)
text_norm = torch.LongTensor(text_norm)
return text_norm
def load_model(config_path, pth_path):
global dev, hps, net_g
dev = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
hps = utils.get_hparams_from_file(config_path)
net_g = SynthesizerTrn(
len(symbols),
hps.data.filter_length // 2 + 1,
hps.train.segment_size // hps.data.hop_length,
n_speakers=hps.data.n_speakers,
**hps.model).to(dev)
_ = net_g.eval()
_ = utils.load_checkpoint(pth_path, net_g)
print(f"{pth_path}加载成功!")
def infer(text):
c_id = 2
stn_tst = get_text(text, hps)
with torch.no_grad():
x_tst = stn_tst.to(dev).unsqueeze(0)
x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).to(dev)
sid = torch.LongTensor([c_id]).to(dev)
audio = net_g.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][0,0].data.cpu().float().numpy()
return (hps.data.sampling_rate, audio)
pth_path = "model/G_70000.pth"
config_path = "configs/config.json"
character_dict = {
"十香": 1,
"折纸": 2,
"狂三": 3,
"四糸乃": 4,
"琴里": 5,
"夕弦": 6,
"耶俱矢": 7,
"美九": 8,
"凛祢": 9,
"凛绪": 10,
"鞠亚": 11,
"鞠奈": 12,
"真那": 13,
}
load_model(config_path, pth_path)
app = gr.Blocks()
with app:
gr.HTML("""
<div>
<h4 class="h-sign" style="font-size: 12px;">
这是一个使用<a href="https://github.com/thesupersonic16/DALTools" target="_blank">thesupersonic16/DALTools</a>提供的解包音频作为数据集,
使用<a href="https://github.com/jaywalnut310/vits" target="_blank">VITS</a>技术训练的语音合成demo。
</h4>
</div>
</div>
""")
tmp = gr.Markdown("")
with gr.Tabs():
# with gr.Row():
# model_submit = gr.Button("加载/重载模型", variant="primary")
with gr.Row():
tts_input1 = gr.TextArea(
label="请输入文本(仅支持日语)", value="你好,世界!")
tts_submit = gr.Button("用文本合成", variant="primary")
tts_output2 = gr.Audio(label="Output")
# model_submit.click(load_model, [config_path, pth_path])
tts_submit.click(infer, [tts_input1], [tts_output2])
gr.HTML("""
<div style="text-align:center">
仅供学习交流,不可用于商业或非法用途
<br/>
使用本项目模型直接或间接生成的音频,必须声明由AI技术或VITS技术合成
</div>
""")
app.launch()
|