TDN-M commited on
Commit
4eeb5fe
·
verified ·
1 Parent(s): 092d48e

Update tts.py

Browse files
Files changed (1) hide show
  1. tts.py +58 -0
tts.py CHANGED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import torchaudio
4
+ from TTS.tts.configs.xtts_config import XttsConfig
5
+ from TTS.tts.models.xtts import Xtts
6
+ from huggingface_hub import snapshot_download, hf_hub_download
7
+ from vinorm import TTSnorm
8
+
9
+ def generate_speech(text, language="vi", speaker_wav=None):
10
+ # Tải mô hình nếu chưa được tải
11
+ checkpoint_dir = "model/"
12
+ repo_id = "capleaf/viXTTS"
13
+ use_deepspeed = False
14
+
15
+ os.makedirs(checkpoint_dir, exist_ok=True)
16
+
17
+ required_files = ["model.pth", "config.json", "vocab.json", "speakers_xtts.pth"]
18
+ files_in_dir = os.listdir(checkpoint_dir)
19
+ if not all(file in files_in_dir for file in required_files):
20
+ snapshot_download(
21
+ repo_id=repo_id,
22
+ repo_type="model",
23
+ local_dir=checkpoint_dir,
24
+ )
25
+ hf_hub_download(
26
+ repo_id="coqui/XTTS-v2",
27
+ filename="speakers_xtts.pth",
28
+ local_dir=checkpoint_dir,
29
+ )
30
+
31
+ # Cấu hình và tải mô hình
32
+ xtts_config = os.path.join(checkpoint_dir, "config.json")
33
+ config = XttsConfig()
34
+ config.load_json(xtts_config)
35
+ MODEL = Xtts.init_from_config(config)
36
+ MODEL.load_checkpoint(config, checkpoint_dir=checkpoint_dir, use_deepspeed=use_deepspeed)
37
+
38
+ if torch.cuda.is_available():
39
+ MODEL.cuda()
40
+
41
+ # Chuẩn hóa văn bản
42
+ normalized_text = TTSnorm(text)
43
+
44
+ # Tạo giọng nói
45
+ with torch.no_grad():
46
+ gpt_cond_latent, speaker_embedding = MODEL.get_conditioning_latents(audio_path=speaker_wav)
47
+ out = MODEL.inference(
48
+ normalized_text,
49
+ language,
50
+ gpt_cond_latent,
51
+ speaker_embedding,
52
+ temperature=0.7,
53
+ )
54
+
55
+ # Lưu file âm thanh
56
+ output_file = "output.wav"
57
+ torchaudio.save(output_file, torch.tensor(out["wav"]).unsqueeze(0), 22050)
58
+ return output_file