File size: 968 Bytes
4ee33aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import torchaudio
from ttts.vocoder.feature_extractors import MelSpectrogramFeatures
from ttts.utils.infer_utils import load_model
import torch
from tqdm import tqdm
import os

model_path = '~/tortoise_plus_zh/ttts/vqvae/logs/2023-11-24-01-21-25/model-30.pt'
vqvae = load_model('vqvae', model_path, 'ttts/vqvae/config.json', 'cuda')
mel_extractor = MelSpectrogramFeatures().cuda()
def process_vq(path):
    try:
        audio,sr = torchaudio.load(path)
    except Exception as e:
        print(path)
        print(e)
        return
    if audio.shape[0]>1:
        audio = audio[0].unsqueeze(0)
    if sr!=24000:
        audio = torchaudio.transforms.Resample(sr,24000)(audio).cuda()
    else:
        audio = audio.cuda()
    mel = mel_extractor(audio)
    with torch.no_grad():
        code = vqvae.get_codebook_indices(mel)
        outp = path+'.melvq.pth'
        os.makedirs(os.path.dirname(outp), exist_ok=True)
        torch.save(code.tolist(), outp)
    return