File size: 9,101 Bytes
3e1bb04 344b0ca 9f4e90e 344b0ca c1e96e4 33f847e 1ff8dc3 9328b57 53b7461 390e70a af9df7f 390e70a af9df7f 390e70a af9df7f 390e70a af9df7f 390e70a 53b7461 9328b57 390e70a 9328b57 7c99d61 1b0aa13 390e70a 93b1dd3 db46388 50171fd 302628d 1b0aa13 2cb7800 1b0aa13 2cb7800 1b0aa13 2cb7800 1b0aa13 2cb7800 5fc1133 2cb7800 5fc1133 db46388 5fc1133 d229c17 1b0aa13 302628d 5fc1133 a755182 5fc1133 db46388 8da7c99 794832b 8b0b8ec 3f83581 7f314ef 8da7c99 3f83581 c1e96e4 98c23d1 2f65f79 5b338ae 2f65f79 5f49f83 1ff8dc3 5f49f83 e32709c 86d568a e32709c 86d568a 6ce4056 daa59c1 c1e96e4 a7848ca 5f49f83 a7848ca 7c99d61 1ff8dc3 9c50642 daa59c1 30deb1f 782284f 9328b57 3456ab2 7c99d61 96ef7f3 0ff1a70 aa3375f 235b4a7 90ade3c ba77b50 fd4559f 9a290a3 2f65f79 9a290a3 7c99d61 9a290a3 d524ceb 86d568a 9a290a3 f6fec6a f904385 33f847e 4e820fe |
|
# Work in Progress by Marco Barnig
# credits
# https://piper.ttstool.com/
# https://huggingface.co/spaces/broadfield/piper-fast-tts
# https://github.com/rhasspy
# https://github.com/rhasspy/piper
# https://github.com/broadfield-dev/PyPiperTTS-win
# https://github.com/broadfield-dev/PyPiperTTS
import gradio as gr
import subprocess
import os
import json
import uuid
import requests
from pypipertts import PyPiper
pp=PyPiper()
my_examples = """
An der Zäit hunn sech den Nordwand an d’Sonn gestridden, wie vun hinnen zwee wuel méi staark wier,
wéi e Wanderer, deen an ee waarme Mantel agepak war, iwwert de Wee koum.
Si goufen sech eens, datt deejéinege fir dee Stäerkste gëlle sollt, deen de Wanderer forcéiere géif, säi Mantel auszedoen.
Den Nordwand huet mat aller Force geblosen, awer wat e méi geblosen huet, wat de Wanderer sech méi a säi Mantel agewéckelt huet.
Um Enn huet den Nordwand säi Kampf opginn. Dunn huet d’Sonn d’Loft mat hire frëndleche Strale gewiermt,
a schonn no kuerzer Zäit huet de Wanderer säi Mantel ausgedoen. Do huet den Nordwand missen zouginn,
datt d’Sonn vun hinnen zwee dee Stäerkste wier."""
speeds = [
"ganz lues",
"lues",
"normal",
"schnell",
"ganz schnell"
]
stops = [
"ganz kuerz",
"kuerz",
"mëttel",
"laang",
"ganz laang"
]
def change_speed(choice):
# print(f"choice: {choice}")
if choice=="ganz lues":
speed=1.8
elif choice=="lues":
speed=1.4
elif choice=="normal":
speed=1
elif choice=="schnell":
speed=0.6
else: # ganz schnell
speed=0.2
return speed
def change_stop(choice):
# print(f"choice: {choice}")
if choice=="ganz kuez":
stop=0.2
elif choice=="kuerz":
stop=0.6
elif choice=="mëttel":
stop=1
elif choice=="laang":
stop=2.5
else: # ganz laang
stop=4
return stop
def init():
key_list=['lb_LU-marylux-medium','lb_LU-femaleLOD-medium','lb_LU-androgynous-medium']
return(gr.update(label="Voice",choices=key_list,value="lb_LU-femaleLOD-medium",interactive=True))
def new_load_mod(instr="en_US-joe-medium"):
model=instr
print(f"model: {model}")
lang=instr.split("_")[0]
# lang="lb"
print(f"lang: {lang}")
dia=instr.split("-")[0]
# dia="lb_LU"
print(f"dia: {dia}")
name=instr.split("-")[1]
# name="female2"
print(f"name: {name}")
style=instr.split("-")[2]
# style="medium"
print(f"style: {style}")
file=f'{instr}.onnx'
print(f"file: {file}")
print(f"Loading model: {file}")
print(f"os.path: {os.path}")
if not os.path.isfile(f'{os.getcwd()}/voices/{file}'):
print(f"Model not found locally")
# m_path= f"https://huggingface.co/rhasspy/piper-voices/resolve/main/{lang}/{dia}/{name}/{style}/{file}"
m_path= f"https://huggingface.co/mbarnig/lb_rhasspy_piper_tts/resolve/main/{lang}/{dia}/{name}/{style}/{file}"
print(f"m_path: {m_path}")
print("Downloading json...")
json_file=requests.get(f"{m_path}.json")
print("Downloading model...")
mod_file=requests.get(m_path)
with open(f'{os.getcwd()}/voices/{file}','wb') as m:
m.write(mod_file.content)
m.close()
with open(f'{os.getcwd()}/voices/{file}.json','wb') as j:
j.write(json_file.content)
j.close()
pp.json_ob=f'{os.getcwd()}/voices/{file}.json'
print("Model Loaded")
def load_mod(instr="en_US-joe-medium"):
load_mes=gr.Info(f"""Loading Model...<br>{instr}""",duration=2)
# new_load_mod(instr=instr)
new_load_mod(instr=instr)
with open(pp.json_ob,'r') as f:
#json_ob=json.dumps(f.read(),indent=4)
json_ob=f.read()
load_mes=gr.Info(f"Model Loaded<br>{instr}",duration=2)
return json_ob
def save_set(model,length,noise,width,sen_pause):
if not os.path.isdir(f'{os.getcwd()}/saved'):
os.mkdir(f'{os.getcwd()}/saved')
set_json={"model":model,"length":length,"noise":noise,"width":width,"pause":sen_pause}
file_name=f'{model}__{length}__{noise}__{width}__{sen_pause}'.replace(".","_")
with open(f'{os.getcwd()}/saved/{file_name}.json','w') as file:
file.write(json.dumps(set_json,indent=4))
file.close()
return(f'{os.getcwd()}/saved/{file_name}.json')
def load_set(set_file):
with open(set_file,'r') as file:
set_json=json.loads(file.read())
file.close()
return(gr.update(value=set_json['model']),gr.update(value=set_json['length']),
gr.update(value=set_json['noise']),gr.update(value=set_json['width']),
gr.update(value=set_json['pause']))
# txt="""PiperTTS is a powerful text-to-speech TTS node designed to convert written text into high-quality spoken audio. This node leverages advanced voice synthesis models to generate natural-sounding speech, making it an invaluable tool for AI developers looking to add a vocal element to their projects."""
def button_on(stream):
if stream==True:
return gr.update(interactive=True,visible=True),gr.update(interactive=False,visible=False)
if stream==False:
return gr.update(interactive=False,visible=False),gr.update(interactive=True,visible=True)
def clear_aud():
return None
with gr.Blocks() as b:
gr.HTML("<h1>Rhasspy Piper LU TTS Streaming</h1>")
with gr.Row():
with gr.Column(scale=2):
in_txt=gr.Textbox(label="Text",lines=10, value=my_examples)
names=gr.Dropdown()
with gr.Row():
stream_btn=gr.Button("Stream",interactive=True,visible=True)
sub_btn=gr.Button(interactive=False,visible=False)
cancel_btn=gr.Button("Stop")
out_aud=gr.Audio(streaming=True, autoplay=True)
with gr.Column(scale=1):
with gr.Row():
with gr.Accordion("Informatiounen", open=False):
gr.Markdown("""
D'Rhasspy **Piper TTS** Technologie gouf vum Michael Hansen, alias [Synesthesiam](https://github.com/synesthesiam), entwéckelt.
Hien huet och d'Marylux Stëmm trainéiert. Déi zwou aner Lëtzebuergesch Stëmme goufe vum Marco Barnig realiséiert.
D'Piper Technik berout op enger Ëmwandlung vun Text an **eSpeak-Phonemen**, déi zesumme mat den entspriechenden Audio-Dateien an
engem **neuronale KI-Netz** trainéiert ginn. Déi generéiert TTS-Modeller sinn optiméiert fir Streaming a kënnen a Screenliesmaschinnen,
wéi [NVDA](https://www.nvaccess.org/about-nvda/), agesat ginn. Sie kënnen awer och am Ganze synthetiséiert ginn,
woubäi d'Qualitéit da besser gëtt.
Verschidde Parameter wéi Liestempo, Sazpaus a Geräischintensitéiten kënnen mat Radioknäpp oder mat Schieberen (am Control-Tab)
agestallt ginn. D'Parameter kënnen och an enger Datei ofgespäichert a reimportéiert ginn.""")
with gr.Row():
vitess=gr.Radio(label="Liestempo", choices = speeds, value = "normal")
pause=gr.Radio(label="Sazpaus", choices = stops, value = "mëttel")
with gr.Accordion("Control", open=False):
stream=gr.Checkbox(label="Stream",info="Streaming is fast, but lower quality",value=True,interactive=True)
length=gr.Slider(label="Length", minimum=0.01, maximum=10.0, value=1)
noise=gr.Slider(label="Noise", minimum=0.01, maximum=3.0, value=0.5, visible=True)
width=gr.Slider(label="Noise Width", minimum=0.01, maximum=3.0, value=0.5, visible=True)
sen_pause=gr.Slider(label="Sentence Pause", minimum=0.1, maximum=10.0, value=1, visible=True)
with gr.Tab("Save Settings"):
save_btn=gr.Button("Save")
save_file=gr.File()
with gr.Tab("Load Settings"):
load_file=gr.File()
with gr.Accordion("Model Config", open=False):
json_ob=gr.JSON(label="JSON")
f1=stream.change(button_on,stream,[stream_btn,sub_btn])
f2=save_btn.click(save_set,[names,length,noise,width,sen_pause],save_file)
f3=load_file.change(load_set,load_file,[names,length,noise,width,sen_pause])
f4=names.change(load_mod,names,json_ob).then(clear_aud,None,out_aud)
f5=stream_btn.click(clear_aud,None,out_aud)
f6=stream_btn.click(pp.stream_tts,[in_txt,names,length,noise,width,sen_pause],out_aud)
f7=sub_btn.click(clear_aud,None,out_aud)
f8=sub_btn.click(pp.tts,[in_txt,names,length,noise,width,sen_pause],out_aud)
cancel_btn.click(None,None,None,cancels=[f1,f2,f3,f4,f5,f6,f7,f8])
vitess.change(change_speed, inputs=vitess, outputs=length)
pause.change(change_stop, inputs=pause, outputs=sen_pause)
# cancel_btn.click(None,None,None,cancels=[f1,f3,f5,f6,f7,f8])
b.load(init,None,names)
# b.queue(default_concurrency_limit=20).launch(max_threads=40)
b.launch() |