""" test_archisound.py Desc: Testing the encoder from archisound (used in audio-diffusion-pytorch) """ import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import scipy import torch import torchaudio import os import ast import soundfile as sf from archisound import ArchiSound if __name__ == "__main__": autoencoder = ArchiSound.from_pretrained('autoencoder1d-AT-v1') x = torch.randn(1, 2, 2**18) # [1, 2, 262144] z = autoencoder.encode(x) # [1, 32, 8192] y = autoencoder.decode(z) # [1, 2, 262144] data_loc = '/data/robbizorg/music_datasets/fma/' device = 'cuda' if torch.cuda.is_available() else 'cpu' example_audio_loc = os.path.join(data_loc, 'data/fma_large/000/000420.mp3') audio, sr = torchaudio.load(example_audio_loc) resamp_48k = torchaudio.functional.resample(audio, sr, 48000) resamp_16k = torchaudio.functional.resample(audio, sr, 16000) resamp_16k = resamp_16k.unsqueeze(0) resamp_48k = resamp_48k.unsqueeze(0) z_48k = autoencoder.encode(resamp_48k) # z_16k = autoencoder.encode(resamp_16k) y_48k = autoencoder.decode(z_48k) # y_16k = autoencoder.decode(z_16k) # sf.write('./assets/audios/example_reconst_archisound_16k.wav', y_16k[0], samplerate = 16000) sf.write('./assets/audios/example_reconst_archisound_48k.wav', y_48k[0].detach().numpy().T, 48000, 'PCM_24') sf.write('./assets/audios/example_orig_archisound.wav', audio.T, sr, 'PCM_24')