File size: 1,513 Bytes
5a9b731
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
""" 
test_archisound.py
    Desc: Testing the encoder from archisound (used in audio-diffusion-pytorch)
"""

import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns 
import scipy
import torch
import torchaudio
import os
import ast
import soundfile as sf

from archisound import ArchiSound

if __name__ == "__main__":
    autoencoder = ArchiSound.from_pretrained('autoencoder1d-AT-v1')

    x = torch.randn(1, 2, 2**18)    # [1, 2, 262144]
    z = autoencoder.encode(x)       # [1, 32, 8192]
    y = autoencoder.decode(z)       # [1, 2, 262144]

    data_loc = '/data/robbizorg/music_datasets/fma/'

    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    example_audio_loc = os.path.join(data_loc, 'data/fma_large/000/000420.mp3')
    audio, sr = torchaudio.load(example_audio_loc)

    resamp_48k = torchaudio.functional.resample(audio, sr, 48000)
    resamp_16k = torchaudio.functional.resample(audio, sr, 16000)

    resamp_16k = resamp_16k.unsqueeze(0)
    resamp_48k = resamp_48k.unsqueeze(0)

    z_48k = autoencoder.encode(resamp_48k)  
    # z_16k = autoencoder.encode(resamp_16k) 

    y_48k = autoencoder.decode(z_48k)
    # y_16k = autoencoder.decode(z_16k)

    # sf.write('./assets/audios/example_reconst_archisound_16k.wav', y_16k[0], samplerate = 16000)
    sf.write('./assets/audios/example_reconst_archisound_48k.wav', y_48k[0].detach().numpy().T, 48000, 'PCM_24')
    sf.write('./assets/audios/example_orig_archisound.wav', audio.T, sr, 'PCM_24')