Spaces:
Running
Running
Commit
·
e6f3c81
1
Parent(s):
9fe7d60
Allow custom steps on beta synthesis
Browse files
app.py
CHANGED
|
@@ -28,15 +28,19 @@ def synthesize(text, voice):
|
|
| 28 |
raise gr.Error("Text must be under 300 characters")
|
| 29 |
v = voice.lower()
|
| 30 |
return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=7, embedding_scale=1))
|
| 31 |
-
def longsynthesize(text, voice, password, progress=gr.Progress()):
|
| 32 |
if password == os.environ['ACCESS_CODE']:
|
| 33 |
if text.strip() == "":
|
| 34 |
raise gr.Error("You must enter some text")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
texts = split_and_recombine_text(text)
|
| 36 |
v = voice.lower()
|
| 37 |
audios = []
|
| 38 |
for t in progress.tqdm(texts):
|
| 39 |
-
audios.append(styletts2importable.inference(t, voices[v], alpha=0.3, beta=0.7, diffusion_steps=
|
| 40 |
return (24000, np.concatenate(audios))
|
| 41 |
else:
|
| 42 |
raise gr.Error('Wrong access code')
|
|
@@ -81,11 +85,12 @@ with gr.Blocks() as longText:
|
|
| 81 |
with gr.Column(scale=1):
|
| 82 |
lnginp = gr.Textbox(label="Text", info="What would you like StyleTTS 2 to read? It works better on full sentences.", interactive=True)
|
| 83 |
lngvoice = gr.Dropdown(voicelist, label="Voice", info="Select a default voice.", value='m-us-1', interactive=True)
|
|
|
|
| 84 |
lngpwd = gr.Textbox(label="Access code", info="This feature is in beta. You need an access code to use it as it uses more resources and we would like to prevent abuse")
|
| 85 |
with gr.Column(scale=1):
|
| 86 |
lngbtn = gr.Button("Synthesize", variant="primary")
|
| 87 |
lngaudio = gr.Audio(interactive=False, label="Synthesized Audio")
|
| 88 |
-
lngbtn.click(longsynthesize, inputs=[lnginp, lngvoice, lngpwd], outputs=[lngaudio], concurrency_limit=4)
|
| 89 |
with gr.Blocks() as lj:
|
| 90 |
with gr.Row():
|
| 91 |
with gr.Column(scale=1):
|
|
|
|
| 28 |
raise gr.Error("Text must be under 300 characters")
|
| 29 |
v = voice.lower()
|
| 30 |
return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=7, embedding_scale=1))
|
| 31 |
+
def longsynthesize(text, voice, lngsteps, password, progress=gr.Progress()):
|
| 32 |
if password == os.environ['ACCESS_CODE']:
|
| 33 |
if text.strip() == "":
|
| 34 |
raise gr.Error("You must enter some text")
|
| 35 |
+
if lngsteps > 25:
|
| 36 |
+
raise gr.Error("Max 25 steps")
|
| 37 |
+
if lngsteps < 5:
|
| 38 |
+
raise gr.Error("Min 5 steps")
|
| 39 |
texts = split_and_recombine_text(text)
|
| 40 |
v = voice.lower()
|
| 41 |
audios = []
|
| 42 |
for t in progress.tqdm(texts):
|
| 43 |
+
audios.append(styletts2importable.inference(t, voices[v], alpha=0.3, beta=0.7, diffusion_steps=lngsteps, embedding_scale=1))
|
| 44 |
return (24000, np.concatenate(audios))
|
| 45 |
else:
|
| 46 |
raise gr.Error('Wrong access code')
|
|
|
|
| 85 |
with gr.Column(scale=1):
|
| 86 |
lnginp = gr.Textbox(label="Text", info="What would you like StyleTTS 2 to read? It works better on full sentences.", interactive=True)
|
| 87 |
lngvoice = gr.Dropdown(voicelist, label="Voice", info="Select a default voice.", value='m-us-1', interactive=True)
|
| 88 |
+
lngsteps = gr.Slider(minimum=5, maximum=25, value=10, step=1, label="Diffusion Steps", info="Higher = better quality, but longer", interactive=True)
|
| 89 |
lngpwd = gr.Textbox(label="Access code", info="This feature is in beta. You need an access code to use it as it uses more resources and we would like to prevent abuse")
|
| 90 |
with gr.Column(scale=1):
|
| 91 |
lngbtn = gr.Button("Synthesize", variant="primary")
|
| 92 |
lngaudio = gr.Audio(interactive=False, label="Synthesized Audio")
|
| 93 |
+
lngbtn.click(longsynthesize, inputs=[lnginp, lngvoice, lngsteps, lngpwd], outputs=[lngaudio], concurrency_limit=4)
|
| 94 |
with gr.Blocks() as lj:
|
| 95 |
with gr.Row():
|
| 96 |
with gr.Column(scale=1):
|