ecker commited on
Commit
5613355
·
verified ·
1 Parent(s): 6492776

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -13
app.py CHANGED
@@ -202,6 +202,7 @@ def do_inference_tts( progress=gr.Progress(track_tqdm=True), *args, **kwargs ):
202
  # I'm very sure I can procedurally generate this list
203
  parser.add_argument("--text", type=str, default=kwargs["text"])
204
  parser.add_argument("--task", type=str, default="tts")
 
205
  parser.add_argument("--references", type=str, default=kwargs["reference"])
206
  parser.add_argument("--language", type=str, default=kwargs["language"])
207
  parser.add_argument("--input-prompt-length", type=float, default=kwargs["input-prompt-length"])
@@ -258,16 +259,7 @@ def do_inference_tts( progress=gr.Progress(track_tqdm=True), *args, **kwargs ):
258
 
259
  tts = init_tts()
260
 
261
- gr.Info("Inferencing...")
262
-
263
- # icky
264
- modality = kwargs.get("modality")
265
- if modality:
266
- for name, engine in tts.engines.items():
267
- if modality == "AR+NAR":
268
- engine.hyper_config.capabilities = ["ar", "nar"]
269
- elif modality == "NAR-len":
270
- engine.hyper_config.capabilities = ["nar", "len"]
271
 
272
  sampling_kwargs = dict(
273
  max_steps=args.max_steps,
@@ -293,12 +285,13 @@ def do_inference_tts( progress=gr.Progress(track_tqdm=True), *args, **kwargs ):
293
  input_prompt_length=args.input_prompt_length,
294
  cfg_strength=args.cfg_strength,
295
  )
296
-
297
  with timer("Inferenced in", callback=lambda msg: gr.Info( msg )) as t:
298
  wav, sr = tts.inference(
299
  text=args.text,
300
  language=args.language,
301
  task=args.task,
 
302
  references=args.references.split(";") if args.references is not None else [],
303
  **sampling_kwargs,
304
  )
@@ -438,8 +431,9 @@ with ui:
438
  layout["inference_tts"]["inputs"]["ar-temperature"] = gr.Slider(value=1.0, minimum=0.0, maximum=1.5, step=0.05, label="Temperature (AR)", info="Modifies the randomness from the samples in the AR. (0 to greedy* sample)")
439
  layout["inference_tts"]["inputs"]["nar-temperature"] = gr.Slider(value=0.0, minimum=0.0, maximum=1.5, step=0.05, label="Temperature (NAR)", info="Modifies the randomness from the samples in the NAR. (0 to greedy sample)")
440
  with gr.Row():
441
- layout["inference_tts"]["inputs"]["cfg-strength"] = gr.Slider(value=3.0, minimum=0.0, maximum=14.0, step=0.05, label="CFG Strength", info="Classifier Free Guidance scale")
442
  layout["inference_tts"]["inputs"]["language"] = gr.Dropdown(choices=get_languages(), label="Language", value="en")
 
443
  with gr.Tab("Sampler Settings"):
444
  with gr.Row():
445
  layout["inference_tts"]["inputs"]["top-p"] = gr.Slider(value=1.0, minimum=0.0, maximum=1.0, step=0.05, label="Top P", info=r"Limits the samples that are outside the top P% of probabilities.")
@@ -464,7 +458,6 @@ with ui:
464
  with gr.Row():
465
  layout["inference_tts"]["inputs"]["input-prompt-prefix"] = gr.Checkbox(label="Input Prompt as Prefix", info="Treats the input prompt clip as the prefix of the generated sequence.")
466
  layout["inference_tts"]["inputs"]["prefix-silence"] = gr.Slider(value=0.0, minimum=0.0, maximum=1.0, step=0.05, label="Silence Prefix Duration", info="Amount of silence to prefix to the output response before beginning inference.")
467
- layout["inference_tts"]["inputs"]["modality"] = gr.Dropdown(value="Auto", choices=["Auto", "AR+NAR", "NAR-len"], label="Modality", info="Whether to inference with the AR+NAR or through the NAR-len.")
468
  with gr.Row():
469
  layout["inference_tts"]["inputs"]["beam-width"] = gr.Slider(value=0, minimum=0, maximum=32, step=1, label="Beam Width", info="Number of branches to search through for beam search sampling.")
470
  layout["inference_tts"]["inputs"]["dynamic-sampling"] = gr.Checkbox(label="Dynamic Temperature", info="Dynamically adjusts the temperature based on the highest confident predicted token per sampling step.")
 
202
  # I'm very sure I can procedurally generate this list
203
  parser.add_argument("--text", type=str, default=kwargs["text"])
204
  parser.add_argument("--task", type=str, default="tts")
205
+ parser.add_argument("--modality", type=str, default=kwargs["modality"])
206
  parser.add_argument("--references", type=str, default=kwargs["reference"])
207
  parser.add_argument("--language", type=str, default=kwargs["language"])
208
  parser.add_argument("--input-prompt-length", type=float, default=kwargs["input-prompt-length"])
 
259
 
260
  tts = init_tts()
261
 
262
+ gr.Info(f"Inferencing... (Modality: {tts.modality(args.modality.lower())})")
 
 
 
 
 
 
 
 
 
263
 
264
  sampling_kwargs = dict(
265
  max_steps=args.max_steps,
 
285
  input_prompt_length=args.input_prompt_length,
286
  cfg_strength=args.cfg_strength,
287
  )
288
+
289
  with timer("Inferenced in", callback=lambda msg: gr.Info( msg )) as t:
290
  wav, sr = tts.inference(
291
  text=args.text,
292
  language=args.language,
293
  task=args.task,
294
+ modality=args.modality.lower(),
295
  references=args.references.split(";") if args.references is not None else [],
296
  **sampling_kwargs,
297
  )
 
431
  layout["inference_tts"]["inputs"]["ar-temperature"] = gr.Slider(value=1.0, minimum=0.0, maximum=1.5, step=0.05, label="Temperature (AR)", info="Modifies the randomness from the samples in the AR. (0 to greedy* sample)")
432
  layout["inference_tts"]["inputs"]["nar-temperature"] = gr.Slider(value=0.0, minimum=0.0, maximum=1.5, step=0.05, label="Temperature (NAR)", info="Modifies the randomness from the samples in the NAR. (0 to greedy sample)")
433
  with gr.Row():
434
+ layout["inference_tts"]["inputs"]["cfg-strength"] = gr.Slider(value=1.0, minimum=0.0, maximum=14.0, step=0.05, label="CFG Strength", info="Classifier Free Guidance scale")
435
  layout["inference_tts"]["inputs"]["language"] = gr.Dropdown(choices=get_languages(), label="Language", value="en")
436
+ layout["inference_tts"]["inputs"]["modality"] = gr.Dropdown(value="Auto", choices=["Auto", "AR+NAR", "NAR-len"], label="Modality", info="Whether to inference with the AR+NAR or through the NAR-len.")
437
  with gr.Tab("Sampler Settings"):
438
  with gr.Row():
439
  layout["inference_tts"]["inputs"]["top-p"] = gr.Slider(value=1.0, minimum=0.0, maximum=1.0, step=0.05, label="Top P", info=r"Limits the samples that are outside the top P% of probabilities.")
 
458
  with gr.Row():
459
  layout["inference_tts"]["inputs"]["input-prompt-prefix"] = gr.Checkbox(label="Input Prompt as Prefix", info="Treats the input prompt clip as the prefix of the generated sequence.")
460
  layout["inference_tts"]["inputs"]["prefix-silence"] = gr.Slider(value=0.0, minimum=0.0, maximum=1.0, step=0.05, label="Silence Prefix Duration", info="Amount of silence to prefix to the output response before beginning inference.")
 
461
  with gr.Row():
462
  layout["inference_tts"]["inputs"]["beam-width"] = gr.Slider(value=0, minimum=0, maximum=32, step=1, label="Beam Width", info="Number of branches to search through for beam search sampling.")
463
  layout["inference_tts"]["inputs"]["dynamic-sampling"] = gr.Checkbox(label="Dynamic Temperature", info="Dynamically adjusts the temperature based on the highest confident predicted token per sampling step.")