Spaces:
Running
on
T4
Running
on
T4
try to figure out how ZeroGPU works
Browse files
app.py
CHANGED
@@ -119,7 +119,13 @@ class ControllableInterface(torch.nn.Module):
|
|
119 |
return sr, wav, fig
|
120 |
|
121 |
|
122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
|
124 |
|
125 |
def read(prompt,
|
@@ -149,26 +155,17 @@ def read(prompt,
|
|
149 |
return (sr, float2pcm(wav)), fig
|
150 |
|
151 |
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
placeholder="write what you want the synthesis to read here...",
|
164 |
-
value="The woods are lovely, dark and deep, but I have promises to keep, and miles to go, before I sleep.",
|
165 |
-
label="Text input"),
|
166 |
-
gr.Dropdown(text_selection,
|
167 |
-
type="value",
|
168 |
-
value='English Text (eng)',
|
169 |
-
label="Select the Language of the Text (type on your keyboard to find it quickly)"),
|
170 |
-
gr.Slider(minimum=0, maximum=available_artificial_voices, step=1,
|
171 |
-
value=279,
|
172 |
label="Random Seed for the artificial Voice"),
|
173 |
gr.Slider(minimum=0.7, maximum=1.3, step=0.1, value=1.0, label="Duration Scale"),
|
174 |
gr.Slider(minimum=0.5, maximum=1.5, step=0.1, value=1.0, label="Pitch Variance Scale"),
|
@@ -176,10 +173,10 @@ if __name__ == '__main__':
|
|
176 |
gr.Slider(minimum=-10.0, maximum=10.0, step=0.1, value=0.0, label="Femininity / Masculinity"),
|
177 |
gr.Slider(minimum=-10.0, maximum=10.0, step=0.1, value=0.0, label="Voice Depth")
|
178 |
],
|
179 |
-
|
180 |
gr.Image(label="Visualization")],
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
|
|
119 |
return sr, wav, fig
|
120 |
|
121 |
|
122 |
+
title = "Controllable Text-to-Speech for over 7000 Languages"
|
123 |
+
article = "Check out the IMS Toucan TTS Toolkit at https://github.com/DigitalPhonetics/IMS-Toucan"
|
124 |
+
available_artificial_voices = 1000
|
125 |
+
path_to_iso_list = "Preprocessing/multilinguality/iso_to_fullname.json"
|
126 |
+
iso_to_name = load_json_from_path(path_to_iso_list)
|
127 |
+
text_selection = [f"{iso_to_name[iso_code]} Text ({iso_code})" for iso_code in iso_to_name]
|
128 |
+
controllable_ui = ControllableInterface(available_artificial_voices=available_artificial_voices)
|
129 |
|
130 |
|
131 |
def read(prompt,
|
|
|
155 |
return (sr, float2pcm(wav)), fig
|
156 |
|
157 |
|
158 |
+
iface = gr.Interface(fn=read,
|
159 |
+
inputs=[gr.Textbox(lines=2,
|
160 |
+
placeholder="write what you want the synthesis to read here...",
|
161 |
+
value="The woods are lovely, dark and deep, but I have promises to keep, and miles to go, before I sleep.",
|
162 |
+
label="Text input"),
|
163 |
+
gr.Dropdown(text_selection,
|
164 |
+
type="value",
|
165 |
+
value='English Text (eng)',
|
166 |
+
label="Select the Language of the Text (type on your keyboard to find it quickly)"),
|
167 |
+
gr.Slider(minimum=0, maximum=available_artificial_voices, step=1,
|
168 |
+
value=279,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
169 |
label="Random Seed for the artificial Voice"),
|
170 |
gr.Slider(minimum=0.7, maximum=1.3, step=0.1, value=1.0, label="Duration Scale"),
|
171 |
gr.Slider(minimum=0.5, maximum=1.5, step=0.1, value=1.0, label="Pitch Variance Scale"),
|
|
|
173 |
gr.Slider(minimum=-10.0, maximum=10.0, step=0.1, value=0.0, label="Femininity / Masculinity"),
|
174 |
gr.Slider(minimum=-10.0, maximum=10.0, step=0.1, value=0.0, label="Voice Depth")
|
175 |
],
|
176 |
+
outputs=[gr.Audio(type="numpy", label="Speech"),
|
177 |
gr.Image(label="Visualization")],
|
178 |
+
title=title,
|
179 |
+
theme="default",
|
180 |
+
allow_flagging="never",
|
181 |
+
article=article)
|
182 |
+
iface.launch()
|