Spaces:
Runtime error
Runtime error
Ahsen Khaliq
commited on
Commit
·
7f7f412
1
Parent(s):
5af8374
Update demo_cli.py
Browse files- demo_cli.py +12 -18
demo_cli.py
CHANGED
@@ -20,26 +20,21 @@ if __name__ == '__main__':
|
|
20 |
formatter_class=argparse.ArgumentDefaultsHelpFormatter
|
21 |
)
|
22 |
parser.add_argument("-e", "--enc_model_fpath", type=Path,
|
23 |
-
default="
|
24 |
help="Path to a saved encoder")
|
25 |
parser.add_argument("-s", "--syn_model_fpath", type=Path,
|
26 |
-
default="
|
27 |
help="Path to a saved synthesizer")
|
28 |
parser.add_argument("-v", "--voc_model_fpath", type=Path,
|
29 |
-
default="
|
30 |
help="Path to a saved vocoder")
|
31 |
-
parser.add_argument("--cpu", action="store_true", help
|
32 |
-
"If True,
|
33 |
-
parser.add_argument("--
|
34 |
-
"If True,
|
35 |
-
parser.add_argument("--seed", type=int, default=None, help=\
|
36 |
-
"Optional random number seed value to make toolbox deterministic.")
|
37 |
-
parser.add_argument("--no_mp3_support", action="store_true", help=\
|
38 |
-
"If True, disallows loading mp3 files to prevent audioread errors when ffmpeg is not installed.")
|
39 |
parser.add_argument("-audio", "--audio_path", type=Path, required = True,
|
40 |
help="Path to a audio file")
|
41 |
-
parser.add_argument("--text", type=str, required = True, help
|
42 |
-
"Text Input")
|
43 |
args = parser.parse_args()
|
44 |
print_args(args, parser)
|
45 |
if not args.no_sound:
|
@@ -95,7 +90,7 @@ if __name__ == '__main__':
|
|
95 |
# The sampling rate is the number of values (samples) recorded per second, it is set to
|
96 |
# 16000 for the encoder. Creating an array of length <sampling_rate> will always correspond
|
97 |
# to an audio of 1 second.
|
98 |
-
print("
|
99 |
encoder.embed_utterance(np.zeros(encoder.sampling_rate))
|
100 |
|
101 |
# Create a dummy embedding. You would normally use the embedding that encoder.embed_utterance
|
@@ -109,7 +104,7 @@ if __name__ == '__main__':
|
|
109 |
# illustrate that
|
110 |
embeds = [embed, np.zeros(speaker_embedding_size)]
|
111 |
texts = ["test 1", "test 2"]
|
112 |
-
print("
|
113 |
mels = synthesizer.synthesize_spectrograms(texts, embeds)
|
114 |
|
115 |
# The vocoder synthesizes one waveform at a time, but it's more efficient for long ones. We
|
@@ -118,7 +113,7 @@ if __name__ == '__main__':
|
|
118 |
# The vocoder can take a callback function to display the generation. More on that later. For
|
119 |
# now we'll simply hide it like this:
|
120 |
no_action = lambda *args: None
|
121 |
-
print("
|
122 |
# For the sake of making this test short, we'll pass a short target length. The target length
|
123 |
# is the length of the wav segments that are processed in parallel. E.g. for audio sampled
|
124 |
# at 16000 Hertz, a target length of 8000 means that the target audio will be cut in chunks of
|
@@ -139,8 +134,7 @@ if __name__ == '__main__':
|
|
139 |
# while True:
|
140 |
try:
|
141 |
# Get the reference audio filepath
|
142 |
-
message = "Reference voice: enter an audio filepath of a voice to be cloned (mp3, "
|
143 |
-
"wav, m4a, flac, ...):\n"
|
144 |
in_fpath = args.audio_path
|
145 |
|
146 |
if in_fpath.suffix.lower() == ".mp3" and args.no_mp3_support:
|
|
|
20 |
formatter_class=argparse.ArgumentDefaultsHelpFormatter
|
21 |
)
|
22 |
parser.add_argument("-e", "--enc_model_fpath", type=Path,
|
23 |
+
default="encpretrained.pt",
|
24 |
help="Path to a saved encoder")
|
25 |
parser.add_argument("-s", "--syn_model_fpath", type=Path,
|
26 |
+
default="synpretrained.pt",
|
27 |
help="Path to a saved synthesizer")
|
28 |
parser.add_argument("-v", "--voc_model_fpath", type=Path,
|
29 |
+
default="vocpretrained.pt",
|
30 |
help="Path to a saved vocoder")
|
31 |
+
parser.add_argument("--cpu", action="store_true", help=\\n "If True, processing is done on CPU, even when a GPU is available.")
|
32 |
+
parser.add_argument("--no_sound", action="store_true", help=\\n "If True, audio won't be played.")
|
33 |
+
parser.add_argument("--seed", type=int, default=None, help=\\n "Optional random number seed value to make toolbox deterministic.")
|
34 |
+
parser.add_argument("--no_mp3_support", action="store_true", help=\\n "If True, disallows loading mp3 files to prevent audioread errors when ffmpeg is not installed.")
|
|
|
|
|
|
|
|
|
35 |
parser.add_argument("-audio", "--audio_path", type=Path, required = True,
|
36 |
help="Path to a audio file")
|
37 |
+
parser.add_argument("--text", type=str, required = True, help=\\n "Text Input")
|
|
|
38 |
args = parser.parse_args()
|
39 |
print_args(args, parser)
|
40 |
if not args.no_sound:
|
|
|
90 |
# The sampling rate is the number of values (samples) recorded per second, it is set to
|
91 |
# 16000 for the encoder. Creating an array of length <sampling_rate> will always correspond
|
92 |
# to an audio of 1 second.
|
93 |
+
print(" Testing the encoder...")
|
94 |
encoder.embed_utterance(np.zeros(encoder.sampling_rate))
|
95 |
|
96 |
# Create a dummy embedding. You would normally use the embedding that encoder.embed_utterance
|
|
|
104 |
# illustrate that
|
105 |
embeds = [embed, np.zeros(speaker_embedding_size)]
|
106 |
texts = ["test 1", "test 2"]
|
107 |
+
print(" Testing the synthesizer... (loading the model will output a lot of text)")
|
108 |
mels = synthesizer.synthesize_spectrograms(texts, embeds)
|
109 |
|
110 |
# The vocoder synthesizes one waveform at a time, but it's more efficient for long ones. We
|
|
|
113 |
# The vocoder can take a callback function to display the generation. More on that later. For
|
114 |
# now we'll simply hide it like this:
|
115 |
no_action = lambda *args: None
|
116 |
+
print(" Testing the vocoder...")
|
117 |
# For the sake of making this test short, we'll pass a short target length. The target length
|
118 |
# is the length of the wav segments that are processed in parallel. E.g. for audio sampled
|
119 |
# at 16000 Hertz, a target length of 8000 means that the target audio will be cut in chunks of
|
|
|
134 |
# while True:
|
135 |
try:
|
136 |
# Get the reference audio filepath
|
137 |
+
message = "Reference voice: enter an audio filepath of a voice to be cloned (mp3, " \\n "wav, m4a, flac, ...):\n"
|
|
|
138 |
in_fpath = args.audio_path
|
139 |
|
140 |
if in_fpath.suffix.lower() == ".mp3" and args.no_mp3_support:
|