Flux9665 commited on
Commit
62d7978
·
1 Parent(s): 3a5e670

try to figure out how ZeroGPU works

Browse files
Architectures/ControllabilityGAN/wgan/wgan_qc.py CHANGED
@@ -11,12 +11,13 @@ from cvxopt import sparse
11
  from cvxopt import spmatrix
12
  from torch.autograd import grad as torch_grad
13
  from tqdm import tqdm
 
14
 
15
 
16
- class WassersteinGanQuadraticCost:
17
 
18
- def __init__(self, generator, discriminator, gen_optimizer, dis_optimizer, criterion, epochs, n_max_iterations,
19
- data_dimensions, batch_size, device, gamma=0.1, K=-1, milestones=[150000, 250000], lr_anneal=1.0):
20
  self.G = generator
21
  self.G_opt = gen_optimizer
22
  self.D = discriminator
@@ -242,7 +243,6 @@ class WassersteinGanQuadraticCost:
242
  else:
243
  latent_samples = self.G.sample_latent(num_samples, self.G.z_dim)
244
  latent_samples = latent_samples.to(self.device)
245
- print(self.device)
246
  if nograd:
247
  with torch.no_grad():
248
  generated_data = self.G(latent_samples, return_intermediate=return_intermediate)
 
11
  from cvxopt import spmatrix
12
  from torch.autograd import grad as torch_grad
13
  from tqdm import tqdm
14
+ import spaces
15
 
16
 
17
+ class WassersteinGanQuadraticCost(torch.nn.Module):
18
 
19
+ def __init__(self, generator, discriminator, gen_optimizer, dis_optimizer, criterion, epochs, n_max_iterations, data_dimensions, batch_size, device, gamma=0.1, K=-1, milestones=[150000, 250000], lr_anneal=1.0, *args, **kwargs):
20
+ super().__init__(*args, **kwargs)
21
  self.G = generator
22
  self.G_opt = gen_optimizer
23
  self.D = discriminator
 
243
  else:
244
  latent_samples = self.G.sample_latent(num_samples, self.G.z_dim)
245
  latent_samples = latent_samples.to(self.device)
 
246
  if nograd:
247
  with torch.no_grad():
248
  generated_data = self.G(latent_samples, return_intermediate=return_intermediate)
app.py CHANGED
@@ -21,10 +21,10 @@ from Utility.storage_config import MODELS_DIR
21
 
22
  class ControllableInterface(torch.nn.Module):
23
 
24
- def __init__(self, gan_wrapper, available_artificial_voices=1000):
25
  super().__init__()
26
  self.model = ToucanTTSInterface(device="cpu", tts_model_path="Meta")
27
- self.wgan = gan_wrapper
28
  self.generated_speaker_embeds = list()
29
  self.available_artificial_voices = available_artificial_voices
30
  self.current_language = ""
@@ -117,9 +117,6 @@ class ControllableInterface(torch.nn.Module):
117
  loudness_in_db=loudness_in_db)
118
  return sr, wav, fig
119
 
120
- @spaces.GPU
121
- def get_gw():
122
- return GanWrapper(os.path.join(MODELS_DIR, "Embedding", "embedding_gan.pt"), device="cuda" if torch.cuda.is_available() else "cpu")
123
 
124
  title = "Controllable Text-to-Speech for over 7000 Languages"
125
  article = "Check out the IMS Toucan TTS Toolkit at https://github.com/DigitalPhonetics/IMS-Toucan"
@@ -127,8 +124,7 @@ available_artificial_voices = 1000
127
  path_to_iso_list = "Preprocessing/multilinguality/iso_to_fullname.json"
128
  iso_to_name = load_json_from_path(path_to_iso_list)
129
  text_selection = [f"{iso_to_name[iso_code]} Text ({iso_code})" for iso_code in iso_to_name]
130
- gw = get_gw()
131
- controllable_ui = ControllableInterface(gan_wrapper=gw, available_artificial_voices=available_artificial_voices)
132
 
133
 
134
  def read(prompt,
@@ -157,7 +153,6 @@ def read(prompt,
157
  -24.)
158
  return (sr, float2pcm(wav)), fig
159
 
160
-
161
  iface = gr.Interface(fn=read,
162
  inputs=[gr.Textbox(lines=2,
163
  placeholder="write what you want the synthesis to read here...",
@@ -169,15 +164,15 @@ iface = gr.Interface(fn=read,
169
  label="Select the Language of the Text (type on your keyboard to find it quickly)"),
170
  gr.Slider(minimum=0, maximum=available_artificial_voices, step=1,
171
  value=279,
172
- label="Random Seed for the artificial Voice"),
173
- gr.Slider(minimum=0.7, maximum=1.3, step=0.1, value=1.0, label="Duration Scale"),
174
- gr.Slider(minimum=0.5, maximum=1.5, step=0.1, value=1.0, label="Pitch Variance Scale"),
175
- gr.Slider(minimum=0.5, maximum=1.5, step=0.1, value=1.0, label="Energy Variance Scale"),
176
- gr.Slider(minimum=-10.0, maximum=10.0, step=0.1, value=0.0, label="Femininity / Masculinity"),
177
- gr.Slider(minimum=-10.0, maximum=10.0, step=0.1, value=0.0, label="Voice Depth")
178
- ],
179
  outputs=[gr.Audio(type="numpy", label="Speech"),
180
- gr.Image(label="Visualization")],
181
  title=title,
182
  theme="default",
183
  allow_flagging="never",
 
21
 
22
  class ControllableInterface(torch.nn.Module):
23
 
24
+ def __init__(self, available_artificial_voices=1000):
25
  super().__init__()
26
  self.model = ToucanTTSInterface(device="cpu", tts_model_path="Meta")
27
+ self.wgan = GanWrapper(os.path.join(MODELS_DIR, "Embedding", "embedding_gan.pt"), device="cpu")
28
  self.generated_speaker_embeds = list()
29
  self.available_artificial_voices = available_artificial_voices
30
  self.current_language = ""
 
117
  loudness_in_db=loudness_in_db)
118
  return sr, wav, fig
119
 
 
 
 
120
 
121
  title = "Controllable Text-to-Speech for over 7000 Languages"
122
  article = "Check out the IMS Toucan TTS Toolkit at https://github.com/DigitalPhonetics/IMS-Toucan"
 
124
  path_to_iso_list = "Preprocessing/multilinguality/iso_to_fullname.json"
125
  iso_to_name = load_json_from_path(path_to_iso_list)
126
  text_selection = [f"{iso_to_name[iso_code]} Text ({iso_code})" for iso_code in iso_to_name]
127
+ controllable_ui = ControllableInterface(available_artificial_voices=available_artificial_voices)
 
128
 
129
 
130
  def read(prompt,
 
153
  -24.)
154
  return (sr, float2pcm(wav)), fig
155
 
 
156
  iface = gr.Interface(fn=read,
157
  inputs=[gr.Textbox(lines=2,
158
  placeholder="write what you want the synthesis to read here...",
 
164
  label="Select the Language of the Text (type on your keyboard to find it quickly)"),
165
  gr.Slider(minimum=0, maximum=available_artificial_voices, step=1,
166
  value=279,
167
+ label="Random Seed for the artificial Voice"),
168
+ gr.Slider(minimum=0.7, maximum=1.3, step=0.1, value=1.0, label="Duration Scale"),
169
+ gr.Slider(minimum=0.5, maximum=1.5, step=0.1, value=1.0, label="Pitch Variance Scale"),
170
+ gr.Slider(minimum=0.5, maximum=1.5, step=0.1, value=1.0, label="Energy Variance Scale"),
171
+ gr.Slider(minimum=-10.0, maximum=10.0, step=0.1, value=0.0, label="Femininity / Masculinity"),
172
+ gr.Slider(minimum=-10.0, maximum=10.0, step=0.1, value=0.0, label="Voice Depth")
173
+ ],
174
  outputs=[gr.Audio(type="numpy", label="Speech"),
175
+ gr.Image(label="Visualization")],
176
  title=title,
177
  theme="default",
178
  allow_flagging="never",