Spaces:
Running
on
Zero
Running
on
Zero
Add experimental vocos option
Browse files
app.py
CHANGED
@@ -185,7 +185,7 @@ def text_to_phonemes(text):
|
|
185 |
|
186 |
|
187 |
@spaces.GPU
|
188 |
-
def generate(audio_path, ins, speed, alpha, beta, embedding, steps=100):
|
189 |
ref_s = other_tts.compute_style(audio_path)
|
190 |
print(ref_s.size())
|
191 |
s_prev = None
|
@@ -198,7 +198,7 @@ def generate(audio_path, ins, speed, alpha, beta, embedding, steps=100):
|
|
198 |
synthaud, s_prev = other_tts.long_inference_segment(i, diffusion_steps=steps,
|
199 |
alpha=alpha, beta=beta, is_phonemes=True,
|
200 |
embedding_scale=embedding, prev_s=s_prev, ref_s=ref_s,
|
201 |
-
speed=speed, t=0.7)
|
202 |
|
203 |
n_trim = int(len(synthaud) * 0.008) # 960 samples
|
204 |
synthaud[:n_trim] = 0
|
@@ -235,6 +235,7 @@ with gr.Blocks(theme=theme, js=js_func) as clone:
|
|
235 |
interactive=True)
|
236 |
speed = gr.Slider(minimum=0.5, maximum=1.5, value=1, step=0.1, label="Speed of speech",
|
237 |
info="Defaults to 1", interactive=True)
|
|
|
238 |
with gr.Column(scale=1):
|
239 |
clbtn = gr.Button("Synthesize", variant="primary")
|
240 |
claudio = gr.Audio(interactive=False, label="Synthesized Audio",
|
@@ -243,7 +244,7 @@ with gr.Blocks(theme=theme, js=js_func) as clone:
|
|
243 |
concurrency_limit=15)
|
244 |
|
245 |
gr.Examples(examples=examples,
|
246 |
-
inputs=[voice, inp, speed, alpha, beta, embscale, steps],
|
247 |
outputs=[claudio],
|
248 |
fn=generate,
|
249 |
cache_examples=True,)
|
|
|
185 |
|
186 |
|
187 |
@spaces.GPU
|
188 |
+
def generate(audio_path, ins, speed, alpha, beta, embedding, steps=100, vocos=False):
|
189 |
ref_s = other_tts.compute_style(audio_path)
|
190 |
print(ref_s.size())
|
191 |
s_prev = None
|
|
|
198 |
synthaud, s_prev = other_tts.long_inference_segment(i, diffusion_steps=steps,
|
199 |
alpha=alpha, beta=beta, is_phonemes=True,
|
200 |
embedding_scale=embedding, prev_s=s_prev, ref_s=ref_s,
|
201 |
+
speed=speed, t=0.7, vocos=vocos)
|
202 |
|
203 |
n_trim = int(len(synthaud) * 0.008) # 960 samples
|
204 |
synthaud[:n_trim] = 0
|
|
|
235 |
interactive=True)
|
236 |
speed = gr.Slider(minimum=0.5, maximum=1.5, value=1, step=0.1, label="Speed of speech",
|
237 |
info="Defaults to 1", interactive=True)
|
238 |
+
vocos = gr.Checkbox(label="Enable Vocos", info="This may break results, as this is currently untested", interactive=True)
|
239 |
with gr.Column(scale=1):
|
240 |
clbtn = gr.Button("Synthesize", variant="primary")
|
241 |
claudio = gr.Audio(interactive=False, label="Synthesized Audio",
|
|
|
244 |
concurrency_limit=15)
|
245 |
|
246 |
gr.Examples(examples=examples,
|
247 |
+
inputs=[voice, inp, speed, alpha, beta, embscale, steps, vocos],
|
248 |
outputs=[claudio],
|
249 |
fn=generate,
|
250 |
cache_examples=True,)
|