Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -51,18 +51,17 @@ from ttts.diffusion.aa_model import denormalize_tacotron_mel, normalize_tacotron
|
|
51 |
# print(device)
|
52 |
|
53 |
vocos = Vocos.from_pretrained("charactr/vocos-mel-24khz")
|
54 |
-
|
55 |
-
|
|
|
56 |
def speak(text):
|
57 |
pinyin = ' '.join(lazy_pinyin(text, style=Style.TONE3, neutral_tone_with_five=True))
|
58 |
-
tokenizer = VoiceBpeTokenizer('ttts/gpt/gpt_tts_tokenizer.json')
|
59 |
text_tokens = torch.IntTensor(tokenizer.encode(pinyin)).unsqueeze(0).to(device)
|
60 |
text_tokens = F.pad(text_tokens, (0, 1)) # This may not be necessary.
|
61 |
text_tokens = text_tokens.to(device)
|
62 |
print(pinyin)
|
63 |
print(text_tokens)
|
64 |
-
|
65 |
-
gpt.post_init_gpt2_config(use_deepspeed=False, kv_cache=False, half=False)
|
66 |
codes = gpt.inference_speech(auto_conditioning, text_tokens,
|
67 |
do_sample=True,
|
68 |
top_p=top_p,
|
@@ -81,8 +80,7 @@ def speak(text):
|
|
81 |
conditioning_free=True, conditioning_free_k=2., sampler='dpm++2m')
|
82 |
diffusion_conditioning = normalize_tacotron_mel(cond_mel)
|
83 |
mel = do_spectrogram_diffusion(diffusion, diffuser, latent, diffusion_conditioning, temperature=1.0).detach().cpu()
|
84 |
-
wav = vocos.decode(mel).detach().cpu()
|
85 |
-
print(wav)
|
86 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
|
87 |
write(f.name, data=wav, rate=24000)
|
88 |
return f.name
|
|
|
51 |
# print(device)
|
52 |
|
53 |
vocos = Vocos.from_pretrained("charactr/vocos-mel-24khz")
|
54 |
+
gpt = load_model('gpt',MODELS['gpt.pth'], './ttts/gpt/config.json',device)
|
55 |
+
gpt.post_init_gpt2_config(use_deepspeed=False, kv_cache=False, half=False)
|
56 |
+
tokenizer = VoiceBpeTokenizer('ttts/gpt/gpt_tts_tokenizer.json')
|
57 |
def speak(text):
|
58 |
pinyin = ' '.join(lazy_pinyin(text, style=Style.TONE3, neutral_tone_with_five=True))
|
|
|
59 |
text_tokens = torch.IntTensor(tokenizer.encode(pinyin)).unsqueeze(0).to(device)
|
60 |
text_tokens = F.pad(text_tokens, (0, 1)) # This may not be necessary.
|
61 |
text_tokens = text_tokens.to(device)
|
62 |
print(pinyin)
|
63 |
print(text_tokens)
|
64 |
+
|
|
|
65 |
codes = gpt.inference_speech(auto_conditioning, text_tokens,
|
66 |
do_sample=True,
|
67 |
top_p=top_p,
|
|
|
80 |
conditioning_free=True, conditioning_free_k=2., sampler='dpm++2m')
|
81 |
diffusion_conditioning = normalize_tacotron_mel(cond_mel)
|
82 |
mel = do_spectrogram_diffusion(diffusion, diffuser, latent, diffusion_conditioning, temperature=1.0).detach().cpu()
|
83 |
+
wav = vocos.decode(mel).detach().cpu().numpy()
|
|
|
84 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
|
85 |
write(f.name, data=wav, rate=24000)
|
86 |
return f.name
|