File size: 899 Bytes
1adb3ce |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
import os
import torch
import torchaudio
import torchaudio.transforms as T
import numpy as np
from music2latent import EncoderDecoder # Import your custom model
class FeatureExtractorM2L:
def __init__(self, device_id=0, sr=44100):
self.device_id = device_id
self.sr = sr
self.device = torch.device(f"cuda:{self.device_id}" if torch.cuda.is_available() else "cpu")
self.model = EncoderDecoder(device=self.device)
def extract_features_from_segment(self, segment, sample_rate, save_path):
input_audio = segment.unsqueeze(0).to(self.device) # Add batch dimension and move to the device
with torch.no_grad():
model_outputs = self.model.encode(input_audio, extract_features=True)
features = model_outputs.mean(dim=-1).cpu().numpy()
np.save(save_path, features)
|