AlexK-PL commited on
Commit
a25696f
·
1 Parent(s): 187a298

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -18
app.py CHANGED
@@ -14,22 +14,22 @@ import numpy as np
14
  torch.manual_seed(1234)
15
  MAX_WAV_VALUE = 32768.0
16
 
17
- def init_models(hparams):
18
- # load trained tacotron2 + GST model:
19
- model = load_model(hparams)
20
- checkpoint_path = "trained_models/checkpoint_78000.model"
21
- model.load_state_dict(torch.load(checkpoint_path, map_location="cpu")['state_dict'])
22
- # model.to('cuda')
23
- _ = model.eval()
24
-
25
- # load pre trained MelGAN model for mel2audio:
26
- vocoder_checkpoint_path = "trained_models/nvidia_tacotron2_LJ11_epoch6400.pt"
27
- checkpoint = torch.load(vocoder_checkpoint_path, map_location="cpu")
28
- hp_melgan = load_hparam("melgan/config/default.yaml")
29
- vocoder_model = Generator(80)
30
- vocoder_model.load_state_dict(checkpoint['model_g'])
31
- # vocoder_model = vocoder_model.to('cuda')
32
- vocoder_model.eval(inference=False)
33
 
34
  def synthesize(text):
35
  sequence = np.array(text_to_sequence(text, ['english_cleaners']))[None, :]
@@ -43,13 +43,11 @@ def synthesize(text):
43
  # mel2wav inference:
44
  with torch.no_grad():
45
  audio = vocoder_model.inference(mel_outputs_postnet)
46
-
47
  audio_numpy = audio.data.cpu().detach().numpy()
48
 
49
  return (22050, audio_numpy)
50
 
51
 
52
- init_models(hparams)
53
  iface = gr.Interface(fn=synthesize, inputs="text", outputs=[gr.Audio(label="Generated Speech", type="numpy"),])
54
  iface.launch()
55
 
 
14
  torch.manual_seed(1234)
15
  MAX_WAV_VALUE = 32768.0
16
 
17
+ # load trained tacotron2 + GST model:
18
+ model = load_model(hparams)
19
+ checkpoint_path = "trained_models/checkpoint_78000.model"
20
+ model.load_state_dict(torch.load(checkpoint_path, map_location="cpu")['state_dict'])
21
+ # model.to('cuda')
22
+ _ = model.eval()
23
+
24
+ # load pre trained MelGAN model for mel2audio:
25
+ vocoder_checkpoint_path = "trained_models/nvidia_tacotron2_LJ11_epoch6400.pt"
26
+ checkpoint = torch.load(vocoder_checkpoint_path, map_location="cpu")
27
+ hp_melgan = load_hparam("melgan/config/default.yaml")
28
+ vocoder_model = Generator(80)
29
+ vocoder_model.load_state_dict(checkpoint['model_g'])
30
+ # vocoder_model = vocoder_model.to('cuda')
31
+ vocoder_model.eval(inference=False)
32
+
33
 
34
  def synthesize(text):
35
  sequence = np.array(text_to_sequence(text, ['english_cleaners']))[None, :]
 
43
  # mel2wav inference:
44
  with torch.no_grad():
45
  audio = vocoder_model.inference(mel_outputs_postnet)
 
46
  audio_numpy = audio.data.cpu().detach().numpy()
47
 
48
  return (22050, audio_numpy)
49
 
50
 
 
51
  iface = gr.Interface(fn=synthesize, inputs="text", outputs=[gr.Audio(label="Generated Speech", type="numpy"),])
52
  iface.launch()
53