Spaces:
Running
on
T4
Running
on
T4
try to figure out how ZeroGPU works
Browse files- Architectures/ControllabilityGAN/wgan/wgan_qc.py +4 -4
- app.py +11 -16
Architectures/ControllabilityGAN/wgan/wgan_qc.py
CHANGED
|
@@ -11,12 +11,13 @@ from cvxopt import sparse
|
|
| 11 |
from cvxopt import spmatrix
|
| 12 |
from torch.autograd import grad as torch_grad
|
| 13 |
from tqdm import tqdm
|
|
|
|
| 14 |
|
| 15 |
|
| 16 |
-
class WassersteinGanQuadraticCost:
|
| 17 |
|
| 18 |
-
def __init__(self, generator, discriminator, gen_optimizer, dis_optimizer, criterion, epochs, n_max_iterations,
|
| 19 |
-
|
| 20 |
self.G = generator
|
| 21 |
self.G_opt = gen_optimizer
|
| 22 |
self.D = discriminator
|
|
@@ -242,7 +243,6 @@ class WassersteinGanQuadraticCost:
|
|
| 242 |
else:
|
| 243 |
latent_samples = self.G.sample_latent(num_samples, self.G.z_dim)
|
| 244 |
latent_samples = latent_samples.to(self.device)
|
| 245 |
-
print(self.device)
|
| 246 |
if nograd:
|
| 247 |
with torch.no_grad():
|
| 248 |
generated_data = self.G(latent_samples, return_intermediate=return_intermediate)
|
|
|
|
| 11 |
from cvxopt import spmatrix
|
| 12 |
from torch.autograd import grad as torch_grad
|
| 13 |
from tqdm import tqdm
|
| 14 |
+
import spaces
|
| 15 |
|
| 16 |
|
| 17 |
+
class WassersteinGanQuadraticCost(torch.nn.Module):
|
| 18 |
|
| 19 |
+
def __init__(self, generator, discriminator, gen_optimizer, dis_optimizer, criterion, epochs, n_max_iterations, data_dimensions, batch_size, device, gamma=0.1, K=-1, milestones=[150000, 250000], lr_anneal=1.0, *args, **kwargs):
|
| 20 |
+
super().__init__(*args, **kwargs)
|
| 21 |
self.G = generator
|
| 22 |
self.G_opt = gen_optimizer
|
| 23 |
self.D = discriminator
|
|
|
|
| 243 |
else:
|
| 244 |
latent_samples = self.G.sample_latent(num_samples, self.G.z_dim)
|
| 245 |
latent_samples = latent_samples.to(self.device)
|
|
|
|
| 246 |
if nograd:
|
| 247 |
with torch.no_grad():
|
| 248 |
generated_data = self.G(latent_samples, return_intermediate=return_intermediate)
|
app.py
CHANGED
|
@@ -21,10 +21,10 @@ from Utility.storage_config import MODELS_DIR
|
|
| 21 |
|
| 22 |
class ControllableInterface(torch.nn.Module):
|
| 23 |
|
| 24 |
-
def __init__(self,
|
| 25 |
super().__init__()
|
| 26 |
self.model = ToucanTTSInterface(device="cpu", tts_model_path="Meta")
|
| 27 |
-
self.wgan =
|
| 28 |
self.generated_speaker_embeds = list()
|
| 29 |
self.available_artificial_voices = available_artificial_voices
|
| 30 |
self.current_language = ""
|
|
@@ -117,9 +117,6 @@ class ControllableInterface(torch.nn.Module):
|
|
| 117 |
loudness_in_db=loudness_in_db)
|
| 118 |
return sr, wav, fig
|
| 119 |
|
| 120 |
-
@spaces.GPU
|
| 121 |
-
def get_gw():
|
| 122 |
-
return GanWrapper(os.path.join(MODELS_DIR, "Embedding", "embedding_gan.pt"), device="cuda" if torch.cuda.is_available() else "cpu")
|
| 123 |
|
| 124 |
title = "Controllable Text-to-Speech for over 7000 Languages"
|
| 125 |
article = "Check out the IMS Toucan TTS Toolkit at https://github.com/DigitalPhonetics/IMS-Toucan"
|
|
@@ -127,8 +124,7 @@ available_artificial_voices = 1000
|
|
| 127 |
path_to_iso_list = "Preprocessing/multilinguality/iso_to_fullname.json"
|
| 128 |
iso_to_name = load_json_from_path(path_to_iso_list)
|
| 129 |
text_selection = [f"{iso_to_name[iso_code]} Text ({iso_code})" for iso_code in iso_to_name]
|
| 130 |
-
|
| 131 |
-
controllable_ui = ControllableInterface(gan_wrapper=gw, available_artificial_voices=available_artificial_voices)
|
| 132 |
|
| 133 |
|
| 134 |
def read(prompt,
|
|
@@ -157,7 +153,6 @@ def read(prompt,
|
|
| 157 |
-24.)
|
| 158 |
return (sr, float2pcm(wav)), fig
|
| 159 |
|
| 160 |
-
|
| 161 |
iface = gr.Interface(fn=read,
|
| 162 |
inputs=[gr.Textbox(lines=2,
|
| 163 |
placeholder="write what you want the synthesis to read here...",
|
|
@@ -169,15 +164,15 @@ iface = gr.Interface(fn=read,
|
|
| 169 |
label="Select the Language of the Text (type on your keyboard to find it quickly)"),
|
| 170 |
gr.Slider(minimum=0, maximum=available_artificial_voices, step=1,
|
| 171 |
value=279,
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
outputs=[gr.Audio(type="numpy", label="Speech"),
|
| 180 |
-
|
| 181 |
title=title,
|
| 182 |
theme="default",
|
| 183 |
allow_flagging="never",
|
|
|
|
| 21 |
|
| 22 |
class ControllableInterface(torch.nn.Module):
|
| 23 |
|
| 24 |
+
def __init__(self, available_artificial_voices=1000):
|
| 25 |
super().__init__()
|
| 26 |
self.model = ToucanTTSInterface(device="cpu", tts_model_path="Meta")
|
| 27 |
+
self.wgan = GanWrapper(os.path.join(MODELS_DIR, "Embedding", "embedding_gan.pt"), device="cpu")
|
| 28 |
self.generated_speaker_embeds = list()
|
| 29 |
self.available_artificial_voices = available_artificial_voices
|
| 30 |
self.current_language = ""
|
|
|
|
| 117 |
loudness_in_db=loudness_in_db)
|
| 118 |
return sr, wav, fig
|
| 119 |
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
title = "Controllable Text-to-Speech for over 7000 Languages"
|
| 122 |
article = "Check out the IMS Toucan TTS Toolkit at https://github.com/DigitalPhonetics/IMS-Toucan"
|
|
|
|
| 124 |
path_to_iso_list = "Preprocessing/multilinguality/iso_to_fullname.json"
|
| 125 |
iso_to_name = load_json_from_path(path_to_iso_list)
|
| 126 |
text_selection = [f"{iso_to_name[iso_code]} Text ({iso_code})" for iso_code in iso_to_name]
|
| 127 |
+
controllable_ui = ControllableInterface(available_artificial_voices=available_artificial_voices)
|
|
|
|
| 128 |
|
| 129 |
|
| 130 |
def read(prompt,
|
|
|
|
| 153 |
-24.)
|
| 154 |
return (sr, float2pcm(wav)), fig
|
| 155 |
|
|
|
|
| 156 |
iface = gr.Interface(fn=read,
|
| 157 |
inputs=[gr.Textbox(lines=2,
|
| 158 |
placeholder="write what you want the synthesis to read here...",
|
|
|
|
| 164 |
label="Select the Language of the Text (type on your keyboard to find it quickly)"),
|
| 165 |
gr.Slider(minimum=0, maximum=available_artificial_voices, step=1,
|
| 166 |
value=279,
|
| 167 |
+
label="Random Seed for the artificial Voice"),
|
| 168 |
+
gr.Slider(minimum=0.7, maximum=1.3, step=0.1, value=1.0, label="Duration Scale"),
|
| 169 |
+
gr.Slider(minimum=0.5, maximum=1.5, step=0.1, value=1.0, label="Pitch Variance Scale"),
|
| 170 |
+
gr.Slider(minimum=0.5, maximum=1.5, step=0.1, value=1.0, label="Energy Variance Scale"),
|
| 171 |
+
gr.Slider(minimum=-10.0, maximum=10.0, step=0.1, value=0.0, label="Femininity / Masculinity"),
|
| 172 |
+
gr.Slider(minimum=-10.0, maximum=10.0, step=0.1, value=0.0, label="Voice Depth")
|
| 173 |
+
],
|
| 174 |
outputs=[gr.Audio(type="numpy", label="Speech"),
|
| 175 |
+
gr.Image(label="Visualization")],
|
| 176 |
title=title,
|
| 177 |
theme="default",
|
| 178 |
allow_flagging="never",
|