mrq
commited on
Commit
·
4715909
1
Parent(s):
f64c96e
- app.py +13 -2
- requirements.txt +1 -2
app.py
CHANGED
@@ -99,8 +99,7 @@ def get_model_paths( paths=[Path("./training/"), Path("./models/"), Path("./data
|
|
99 |
continue
|
100 |
configs.append( sft )
|
101 |
|
102 |
-
|
103 |
-
configs = [ str(p) for p in configs ]
|
104 |
|
105 |
return configs
|
106 |
|
@@ -205,6 +204,8 @@ def do_inference_tts( progress=gr.Progress(track_tqdm=True), *args, **kwargs ):
|
|
205 |
parser.add_argument("--modality", type=str, default=kwargs["modality"])
|
206 |
parser.add_argument("--references", type=str, default=kwargs["reference"])
|
207 |
parser.add_argument("--language", type=str, default=kwargs["language"])
|
|
|
|
|
208 |
parser.add_argument("--input-prompt-length", type=float, default=kwargs["input-prompt-length"])
|
209 |
parser.add_argument("--input-prompt-prefix", action='store_true', default=kwargs["input-prompt-prefix"])
|
210 |
parser.add_argument("--max-duration", type=int, default=int(kwargs["max-duration"]*cfg.dataset.frames_per_second))
|
@@ -258,11 +259,18 @@ def do_inference_tts( progress=gr.Progress(track_tqdm=True), *args, **kwargs ):
|
|
258 |
if kwargs.pop("refine-on-stop", False):
|
259 |
args.refine_on_stop = True
|
260 |
|
|
|
|
|
|
|
|
|
|
|
261 |
tts = init_tts()
|
262 |
|
263 |
gr.Info(f"Inferencing... (Modality: {tts.modality(args.modality.lower())})")
|
264 |
|
265 |
sampling_kwargs = dict(
|
|
|
|
|
266 |
max_steps=args.max_steps,
|
267 |
max_levels=args.max_levels,
|
268 |
max_duration=args.max_duration,
|
@@ -438,6 +446,9 @@ with ui:
|
|
438 |
layout["inference_tts"]["inputs"]["cfg-strength"] = gr.Slider(value=1.0, minimum=0.0, maximum=14.0, step=0.05, label="CFG Strength", info="Classifier Free Guidance scale (AR needs 1, NAR-len needs 3).")
|
439 |
layout["inference_tts"]["inputs"]["cfg-rescale"] = gr.Slider(value=0.75, minimum=0.0, maximum=1.0, step=0.05, label="CFG Rescale (Phi)", info="Factor when rescaling for Classifier Free Guidance (0 to disable).")
|
440 |
layout["inference_tts"]["inputs"]["language"] = gr.Dropdown(choices=get_languages(), label="Language", value="en")
|
|
|
|
|
|
|
441 |
with gr.Tab("Sampler Settings"):
|
442 |
with gr.Row():
|
443 |
layout["inference_tts"]["inputs"]["top-p"] = gr.Slider(value=1.0, minimum=0.0, maximum=1.0, step=0.05, label="Top P", info=r"Limits the samples that are outside the top P% of probabilities.")
|
|
|
99 |
continue
|
100 |
configs.append( sft )
|
101 |
|
102 |
+
configs = [ str(p) for p in configs ]
|
|
|
103 |
|
104 |
return configs
|
105 |
|
|
|
204 |
parser.add_argument("--modality", type=str, default=kwargs["modality"])
|
205 |
parser.add_argument("--references", type=str, default=kwargs["reference"])
|
206 |
parser.add_argument("--language", type=str, default=kwargs["language"])
|
207 |
+
parser.add_argument("--split-text-by", type=str, default=kwargs["split-text-by"])
|
208 |
+
parser.add_argument("--context-history", type=int, default=kwargs["context-history"])
|
209 |
parser.add_argument("--input-prompt-length", type=float, default=kwargs["input-prompt-length"])
|
210 |
parser.add_argument("--input-prompt-prefix", action='store_true', default=kwargs["input-prompt-prefix"])
|
211 |
parser.add_argument("--max-duration", type=int, default=int(kwargs["max-duration"]*cfg.dataset.frames_per_second))
|
|
|
259 |
if kwargs.pop("refine-on-stop", False):
|
260 |
args.refine_on_stop = True
|
261 |
|
262 |
+
if args.split_text_by == "lines":
|
263 |
+
args.split_text_by = "\n"
|
264 |
+
elif args.split_text_by == "none":
|
265 |
+
args.split_text_by = None
|
266 |
+
|
267 |
tts = init_tts()
|
268 |
|
269 |
gr.Info(f"Inferencing... (Modality: {tts.modality(args.modality.lower())})")
|
270 |
|
271 |
sampling_kwargs = dict(
|
272 |
+
split_text_by=args.split_text_by,
|
273 |
+
context_history=args.context_history,
|
274 |
max_steps=args.max_steps,
|
275 |
max_levels=args.max_levels,
|
276 |
max_duration=args.max_duration,
|
|
|
446 |
layout["inference_tts"]["inputs"]["cfg-strength"] = gr.Slider(value=1.0, minimum=0.0, maximum=14.0, step=0.05, label="CFG Strength", info="Classifier Free Guidance scale (AR needs 1, NAR-len needs 3).")
|
447 |
layout["inference_tts"]["inputs"]["cfg-rescale"] = gr.Slider(value=0.75, minimum=0.0, maximum=1.0, step=0.05, label="CFG Rescale (Phi)", info="Factor when rescaling for Classifier Free Guidance (0 to disable).")
|
448 |
layout["inference_tts"]["inputs"]["language"] = gr.Dropdown(choices=get_languages(), label="Language", value="en")
|
449 |
+
with gr.Row():
|
450 |
+
layout["inference_tts"]["inputs"]["split-text-by"] = gr.Dropdown(choices=["sentences", "lines"], label="Text Delimiter", info="Splits the text into pieces.", value="sentences")
|
451 |
+
layout["inference_tts"]["inputs"]["context-history"] = gr.Slider(value=0, minimum=0, maximum=4, step=1, label="(Rolling) Context History", info="How many prior lines to serve as the context/prefix (0 to disable).")
|
452 |
with gr.Tab("Sampler Settings"):
|
453 |
with gr.Row():
|
454 |
layout["inference_tts"]["inputs"]["top-p"] = gr.Slider(value=1.0, minimum=0.0, maximum=1.0, step=0.05, label="Top P", info=r"Limits the samples that are outside the top P% of probabilities.")
|
requirements.txt
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
--extra-index-url https://download.pytorch.org/whl/cu121
|
2 |
torch
|
3 |
torchaudio
|
4 |
-
sageattention==1.0.6
|
5 |
|
6 |
-
vall_e @ git+https://github.com/e-c-k-e-r/vall-e.git@
|
|
|
1 |
--extra-index-url https://download.pytorch.org/whl/cu121
|
2 |
torch
|
3 |
torchaudio
|
|
|
4 |
|
5 |
+
vall_e @ git+https://github.com/e-c-k-e-r/vall-e.git@c66a53492c98222f2087de7af7e12da228d29534
|