mrq commited on
Commit
4715909
·
1 Parent(s): f64c96e
Files changed (2) hide show
  1. app.py +13 -2
  2. requirements.txt +1 -2
app.py CHANGED
@@ -99,8 +99,7 @@ def get_model_paths( paths=[Path("./training/"), Path("./models/"), Path("./data
99
  continue
100
  configs.append( sft )
101
 
102
- if is_windows:
103
- configs = [ str(p) for p in configs ]
104
 
105
  return configs
106
 
@@ -205,6 +204,8 @@ def do_inference_tts( progress=gr.Progress(track_tqdm=True), *args, **kwargs ):
205
  parser.add_argument("--modality", type=str, default=kwargs["modality"])
206
  parser.add_argument("--references", type=str, default=kwargs["reference"])
207
  parser.add_argument("--language", type=str, default=kwargs["language"])
 
 
208
  parser.add_argument("--input-prompt-length", type=float, default=kwargs["input-prompt-length"])
209
  parser.add_argument("--input-prompt-prefix", action='store_true', default=kwargs["input-prompt-prefix"])
210
  parser.add_argument("--max-duration", type=int, default=int(kwargs["max-duration"]*cfg.dataset.frames_per_second))
@@ -258,11 +259,18 @@ def do_inference_tts( progress=gr.Progress(track_tqdm=True), *args, **kwargs ):
258
  if kwargs.pop("refine-on-stop", False):
259
  args.refine_on_stop = True
260
 
 
 
 
 
 
261
  tts = init_tts()
262
 
263
  gr.Info(f"Inferencing... (Modality: {tts.modality(args.modality.lower())})")
264
 
265
  sampling_kwargs = dict(
 
 
266
  max_steps=args.max_steps,
267
  max_levels=args.max_levels,
268
  max_duration=args.max_duration,
@@ -438,6 +446,9 @@ with ui:
438
  layout["inference_tts"]["inputs"]["cfg-strength"] = gr.Slider(value=1.0, minimum=0.0, maximum=14.0, step=0.05, label="CFG Strength", info="Classifier Free Guidance scale (AR needs 1, NAR-len needs 3).")
439
  layout["inference_tts"]["inputs"]["cfg-rescale"] = gr.Slider(value=0.75, minimum=0.0, maximum=1.0, step=0.05, label="CFG Rescale (Phi)", info="Factor when rescaling for Classifier Free Guidance (0 to disable).")
440
  layout["inference_tts"]["inputs"]["language"] = gr.Dropdown(choices=get_languages(), label="Language", value="en")
 
 
 
441
  with gr.Tab("Sampler Settings"):
442
  with gr.Row():
443
  layout["inference_tts"]["inputs"]["top-p"] = gr.Slider(value=1.0, minimum=0.0, maximum=1.0, step=0.05, label="Top P", info=r"Limits the samples that are outside the top P% of probabilities.")
 
99
  continue
100
  configs.append( sft )
101
 
102
+ configs = [ str(p) for p in configs ]
 
103
 
104
  return configs
105
 
 
204
  parser.add_argument("--modality", type=str, default=kwargs["modality"])
205
  parser.add_argument("--references", type=str, default=kwargs["reference"])
206
  parser.add_argument("--language", type=str, default=kwargs["language"])
207
+ parser.add_argument("--split-text-by", type=str, default=kwargs["split-text-by"])
208
+ parser.add_argument("--context-history", type=int, default=kwargs["context-history"])
209
  parser.add_argument("--input-prompt-length", type=float, default=kwargs["input-prompt-length"])
210
  parser.add_argument("--input-prompt-prefix", action='store_true', default=kwargs["input-prompt-prefix"])
211
  parser.add_argument("--max-duration", type=int, default=int(kwargs["max-duration"]*cfg.dataset.frames_per_second))
 
259
  if kwargs.pop("refine-on-stop", False):
260
  args.refine_on_stop = True
261
 
262
+ if args.split_text_by == "lines":
263
+ args.split_text_by = "\n"
264
+ elif args.split_text_by == "none":
265
+ args.split_text_by = None
266
+
267
  tts = init_tts()
268
 
269
  gr.Info(f"Inferencing... (Modality: {tts.modality(args.modality.lower())})")
270
 
271
  sampling_kwargs = dict(
272
+ split_text_by=args.split_text_by,
273
+ context_history=args.context_history,
274
  max_steps=args.max_steps,
275
  max_levels=args.max_levels,
276
  max_duration=args.max_duration,
 
446
  layout["inference_tts"]["inputs"]["cfg-strength"] = gr.Slider(value=1.0, minimum=0.0, maximum=14.0, step=0.05, label="CFG Strength", info="Classifier Free Guidance scale (AR needs 1, NAR-len needs 3).")
447
  layout["inference_tts"]["inputs"]["cfg-rescale"] = gr.Slider(value=0.75, minimum=0.0, maximum=1.0, step=0.05, label="CFG Rescale (Phi)", info="Factor when rescaling for Classifier Free Guidance (0 to disable).")
448
  layout["inference_tts"]["inputs"]["language"] = gr.Dropdown(choices=get_languages(), label="Language", value="en")
449
+ with gr.Row():
450
+ layout["inference_tts"]["inputs"]["split-text-by"] = gr.Dropdown(choices=["sentences", "lines"], label="Text Delimiter", info="Splits the text into pieces.", value="sentences")
451
+ layout["inference_tts"]["inputs"]["context-history"] = gr.Slider(value=0, minimum=0, maximum=4, step=1, label="(Rolling) Context History", info="How many prior lines to serve as the context/prefix (0 to disable).")
452
  with gr.Tab("Sampler Settings"):
453
  with gr.Row():
454
  layout["inference_tts"]["inputs"]["top-p"] = gr.Slider(value=1.0, minimum=0.0, maximum=1.0, step=0.05, label="Top P", info=r"Limits the samples that are outside the top P% of probabilities.")
requirements.txt CHANGED
@@ -1,6 +1,5 @@
1
  --extra-index-url https://download.pytorch.org/whl/cu121
2
  torch
3
  torchaudio
4
- sageattention==1.0.6
5
 
6
- vall_e @ git+https://github.com/e-c-k-e-r/vall-e.git@9dff68c0c57bb46da1847313b0ea23d44bd3050c
 
1
  --extra-index-url https://download.pytorch.org/whl/cu121
2
  torch
3
  torchaudio
 
4
 
5
+ vall_e @ git+https://github.com/e-c-k-e-r/vall-e.git@c66a53492c98222f2087de7af7e12da228d29534