Korakoe commited on
Commit
a94f786
·
verified ·
1 Parent(s): 59afb84

Add experimental vocos option

Browse files
Files changed (1) hide show
  1. app.py +4 -3
app.py CHANGED
@@ -185,7 +185,7 @@ def text_to_phonemes(text):
185
 
186
 
187
  @spaces.GPU
188
- def generate(audio_path, ins, speed, alpha, beta, embedding, steps=100):
189
  ref_s = other_tts.compute_style(audio_path)
190
  print(ref_s.size())
191
  s_prev = None
@@ -198,7 +198,7 @@ def generate(audio_path, ins, speed, alpha, beta, embedding, steps=100):
198
  synthaud, s_prev = other_tts.long_inference_segment(i, diffusion_steps=steps,
199
  alpha=alpha, beta=beta, is_phonemes=True,
200
  embedding_scale=embedding, prev_s=s_prev, ref_s=ref_s,
201
- speed=speed, t=0.7)
202
 
203
  n_trim = int(len(synthaud) * 0.008) # 960 samples
204
  synthaud[:n_trim] = 0
@@ -235,6 +235,7 @@ with gr.Blocks(theme=theme, js=js_func) as clone:
235
  interactive=True)
236
  speed = gr.Slider(minimum=0.5, maximum=1.5, value=1, step=0.1, label="Speed of speech",
237
  info="Defaults to 1", interactive=True)
 
238
  with gr.Column(scale=1):
239
  clbtn = gr.Button("Synthesize", variant="primary")
240
  claudio = gr.Audio(interactive=False, label="Synthesized Audio",
@@ -243,7 +244,7 @@ with gr.Blocks(theme=theme, js=js_func) as clone:
243
  concurrency_limit=15)
244
 
245
  gr.Examples(examples=examples,
246
- inputs=[voice, inp, speed, alpha, beta, embscale, steps],
247
  outputs=[claudio],
248
  fn=generate,
249
  cache_examples=True,)
 
185
 
186
 
187
  @spaces.GPU
188
+ def generate(audio_path, ins, speed, alpha, beta, embedding, steps=100, vocos=False):
189
  ref_s = other_tts.compute_style(audio_path)
190
  print(ref_s.size())
191
  s_prev = None
 
198
  synthaud, s_prev = other_tts.long_inference_segment(i, diffusion_steps=steps,
199
  alpha=alpha, beta=beta, is_phonemes=True,
200
  embedding_scale=embedding, prev_s=s_prev, ref_s=ref_s,
201
+ speed=speed, t=0.7, vocos=vocos)
202
 
203
  n_trim = int(len(synthaud) * 0.008) # 960 samples
204
  synthaud[:n_trim] = 0
 
235
  interactive=True)
236
  speed = gr.Slider(minimum=0.5, maximum=1.5, value=1, step=0.1, label="Speed of speech",
237
  info="Defaults to 1", interactive=True)
238
+ vocos = gr.Checkbox(label="Enable Vocos", info="This may break results, as this is currently untested", interactive=True)
239
  with gr.Column(scale=1):
240
  clbtn = gr.Button("Synthesize", variant="primary")
241
  claudio = gr.Audio(interactive=False, label="Synthesized Audio",
 
244
  concurrency_limit=15)
245
 
246
  gr.Examples(examples=examples,
247
+ inputs=[voice, inp, speed, alpha, beta, embscale, steps, vocos],
248
  outputs=[claudio],
249
  fn=generate,
250
  cache_examples=True,)