Spaces:

TDN-M
/

GV-a

Running on Zero

TDN-M commited on Feb 1

Commit

78505ef

verified ·

1 Parent(s): 1ee99ff

Update tts.py

Files changed (1) hide show

tts.py CHANGED Viewed

@@ -12,6 +12,8 @@ checkpoint_dir = "model/"
 repo_id = "capleaf/viXTTS"
 use_deepspeed = False
 # Tạo thư mục nếu chưa tồn tại
 os.makedirs(checkpoint_dir, exist_ok=True)
@@ -37,8 +39,8 @@ config.load_json(xtts_config)
 MODEL = Xtts.init_from_config(config)
 MODEL.load_checkpoint(config, checkpoint_dir=checkpoint_dir, use_deepspeed=use_deepspeed)
-# Đảm bảo mô hình chạy trên CPU
-MODEL.to("cpu")
 # Danh sách ngôn ngữ được hỗ trợ (chỉ tiếng Việt và tiếng Anh)
 supported_languages = ["vi", "en"]
@@ -80,9 +82,9 @@ def generate_speech(text, language="vi", speaker_wav=None, normalize_text=True):
         with torch.no_grad():  # Tắt tính gradient để tiết kiệm bộ nhớ
             gpt_cond_latent, speaker_embedding = MODEL.get_conditioning_latents(
                 audio_path=speaker_wav,
-                gpt_cond_len=15,  # Giảm độ dài để tối ưu hóa cho CPU
-                gpt_cond_chunk_len=4,
-                max_ref_length=30,  # Giảm độ dài để tối ưu hóa cho CPU
             )
             # Tạo giọng nói
@@ -98,7 +100,7 @@ def generate_speech(text, language="vi", speaker_wav=None, normalize_text=True):
         # Lưu file âm thanh
         output_file = "output.wav"
-        torchaudio.save(output_file, torch.tensor(out["wav"]).unsqueeze(0), 24000)
         return output_file

 repo_id = "capleaf/viXTTS"
 use_deepspeed = False
+device = "cuda" if torch.cuda.is_available() and "T4" in torch.cuda.get_device_name(0) else "cpu"
 # Tạo thư mục nếu chưa tồn tại
 os.makedirs(checkpoint_dir, exist_ok=True)
 MODEL = Xtts.init_from_config(config)
 MODEL.load_checkpoint(config, checkpoint_dir=checkpoint_dir, use_deepspeed=use_deepspeed)
+# Tải mô hình vào thiết bị phù hợp
+MODEL.to(device)
 # Danh sách ngôn ngữ được hỗ trợ (chỉ tiếng Việt và tiếng Anh)
 supported_languages = ["vi", "en"]
         with torch.no_grad():  # Tắt tính gradient để tiết kiệm bộ nhớ
             gpt_cond_latent, speaker_embedding = MODEL.get_conditioning_latents(
                 audio_path=speaker_wav,
+                gpt_cond_len=30 if device == "cuda" else 15,  # Tăng độ dài khi dùng GPU
+                gpt_cond_chunk_len=8 if device == "cuda" else 4,
+                max_ref_length=60 if device == "cuda" else 30,
             )
             # Tạo giọng nói
         # Lưu file âm thanh
         output_file = "output.wav"
+        torchaudio.save(output_file, torch.tensor(out["wav"]).unsqueeze(0).to("cpu"), 24000)
         return output_file