File size: 827 Bytes
4ee33aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import os
import torchaudio
import torch
from ttts.vocoder.feature_extractors import MelSpectrogramFeatures
device = 'cuda'
mel_extractor = MelSpectrogramFeatures().to(device)

def process_mel(wav_file):
    # print(f"Processing {wav_file}..")
    outfile = f'{wav_file}.mel.pth'
    if os.path.exists(outfile):
        return

    try:
        wave, sample_rate = torchaudio.load(wav_file)
        wave = wave.to(device)
        if wave.size(0) > 1:  # mix to mono
            wave = wave[0].unsqueeze(0)
        if sample_rate!=24000:
            resample = torchaudio.transforms.Resample(sample_rate,24000).to(device)
            wave = resample(wave)
    except Exception as e:
        print(e)
        print(f"Error with {wav_file}")
        return
    mel = mel_extractor(wave)
    torch.save(mel.cpu().detach(), outfile)