mrq
commited on
Commit
·
1d38b2f
1
Parent(s):
ef295ab
- app.py +6 -2
- requirements.txt +1 -1
app.py
CHANGED
|
@@ -127,6 +127,9 @@ def get_speakers():
|
|
| 127 |
def get_languages():
|
| 128 |
return list(get_lang_symmap().keys()) + ["auto"]
|
| 129 |
|
|
|
|
|
|
|
|
|
|
| 130 |
#@gradio_wrapper(inputs=layout["dataset"]["inputs"].keys())
|
| 131 |
def load_sample( speaker ):
|
| 132 |
metadata_path = cfg.metadata_dir / f'{speaker}.json'
|
|
@@ -208,7 +211,7 @@ def do_inference_tts( progress=gr.Progress(track_tqdm=True), *args, **kwargs ):
|
|
| 208 |
parser = argparse.ArgumentParser(allow_abbrev=False, add_help=False)
|
| 209 |
# I'm very sure I can procedurally generate this list
|
| 210 |
parser.add_argument("--text", type=str, default=kwargs["text"])
|
| 211 |
-
parser.add_argument("--task", type=str, default="
|
| 212 |
parser.add_argument("--modality", type=str, default=kwargs["modality"])
|
| 213 |
parser.add_argument("--references", type=str, default=kwargs["reference"])
|
| 214 |
parser.add_argument("--voice-convert", type=str, default=kwargs["voice-convert"])
|
|
@@ -336,7 +339,7 @@ def do_inference_stt( progress=gr.Progress(track_tqdm=True), *args, **kwargs ):
|
|
| 336 |
|
| 337 |
parser = argparse.ArgumentParser(allow_abbrev=False, add_help=False)
|
| 338 |
# I'm very sure I can procedurally generate this list
|
| 339 |
-
parser.add_argument("--task", type=str, default="
|
| 340 |
parser.add_argument("--references", type=str, default=kwargs["reference"])
|
| 341 |
parser.add_argument("--max-duration", type=int, default=0)
|
| 342 |
parser.add_argument("--language", type=str, default=kwargs["language"])
|
|
@@ -460,6 +463,7 @@ with ui:
|
|
| 460 |
with gr.Row():
|
| 461 |
layout["inference_tts"]["inputs"]["text-language"] = gr.Dropdown(choices=get_languages(), label="Language (Text)", value="auto", info="Language the input text is in.")
|
| 462 |
layout["inference_tts"]["inputs"]["language"] = gr.Dropdown(choices=get_languages(), label="Language (Output)", value="auto", info="Target language/accent to output.")
|
|
|
|
| 463 |
with gr.Row():
|
| 464 |
layout["inference_tts"]["inputs"]["split-text-by"] = gr.Dropdown(choices=["sentences", "lines"], label="Text Delimiter", info="How to split the text into utterances.", value="sentences")
|
| 465 |
layout["inference_tts"]["inputs"]["context-history"] = gr.Slider(value=0, minimum=0, maximum=4, step=1, label="(Rolling) Context History", info="How many prior lines to serve as the context/prefix (0 to disable).")
|
|
|
|
| 127 |
def get_languages():
|
| 128 |
return list(get_lang_symmap().keys()) + ["auto"]
|
| 129 |
|
| 130 |
+
def get_tasks():
|
| 131 |
+
return ["tts", "sr", "nr", "vc"]
|
| 132 |
+
|
| 133 |
#@gradio_wrapper(inputs=layout["dataset"]["inputs"].keys())
|
| 134 |
def load_sample( speaker ):
|
| 135 |
metadata_path = cfg.metadata_dir / f'{speaker}.json'
|
|
|
|
| 211 |
parser = argparse.ArgumentParser(allow_abbrev=False, add_help=False)
|
| 212 |
# I'm very sure I can procedurally generate this list
|
| 213 |
parser.add_argument("--text", type=str, default=kwargs["text"])
|
| 214 |
+
parser.add_argument("--task", type=str, default=kwargs["task"])
|
| 215 |
parser.add_argument("--modality", type=str, default=kwargs["modality"])
|
| 216 |
parser.add_argument("--references", type=str, default=kwargs["reference"])
|
| 217 |
parser.add_argument("--voice-convert", type=str, default=kwargs["voice-convert"])
|
|
|
|
| 339 |
|
| 340 |
parser = argparse.ArgumentParser(allow_abbrev=False, add_help=False)
|
| 341 |
# I'm very sure I can procedurally generate this list
|
| 342 |
+
parser.add_argument("--task", type=str, default="stt")
|
| 343 |
parser.add_argument("--references", type=str, default=kwargs["reference"])
|
| 344 |
parser.add_argument("--max-duration", type=int, default=0)
|
| 345 |
parser.add_argument("--language", type=str, default=kwargs["language"])
|
|
|
|
| 463 |
with gr.Row():
|
| 464 |
layout["inference_tts"]["inputs"]["text-language"] = gr.Dropdown(choices=get_languages(), label="Language (Text)", value="auto", info="Language the input text is in.")
|
| 465 |
layout["inference_tts"]["inputs"]["language"] = gr.Dropdown(choices=get_languages(), label="Language (Output)", value="auto", info="Target language/accent to output.")
|
| 466 |
+
layout["inference_tts"]["inputs"]["task"] = gr.Dropdown(choices=get_tasks(), label="Task", value="tts", info="")
|
| 467 |
with gr.Row():
|
| 468 |
layout["inference_tts"]["inputs"]["split-text-by"] = gr.Dropdown(choices=["sentences", "lines"], label="Text Delimiter", info="How to split the text into utterances.", value="sentences")
|
| 469 |
layout["inference_tts"]["inputs"]["context-history"] = gr.Slider(value=0, minimum=0, maximum=4, step=1, label="(Rolling) Context History", info="How many prior lines to serve as the context/prefix (0 to disable).")
|
requirements.txt
CHANGED
|
@@ -4,4 +4,4 @@ torchaudio
|
|
| 4 |
sageattention==1.0.6
|
| 5 |
pykakasi
|
| 6 |
|
| 7 |
-
vall_e @ git+https://github.com/e-c-k-e-r/vall-e.git@
|
|
|
|
| 4 |
sageattention==1.0.6
|
| 5 |
pykakasi
|
| 6 |
|
| 7 |
+
vall_e @ git+https://github.com/e-c-k-e-r/vall-e.git@59bf6b8b3338736cfa38bf888dc0730867370846
|