English
music
emotion
File size: 899 Bytes
1adb3ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import os
import torch
import torchaudio
import torchaudio.transforms as T
import numpy as np
from music2latent import EncoderDecoder  # Import your custom model

class FeatureExtractorM2L:
    def __init__(self, device_id=0, sr=44100):
        self.device_id = device_id
        self.sr = sr
        self.device = torch.device(f"cuda:{self.device_id}" if torch.cuda.is_available() else "cpu")
        self.model = EncoderDecoder(device=self.device)

    def extract_features_from_segment(self, segment, sample_rate, save_path):
        input_audio = segment.unsqueeze(0).to(self.device)  # Add batch dimension and move to the device

        with torch.no_grad():
            model_outputs = self.model.encode(input_audio, extract_features=True)

        features = model_outputs.mean(dim=-1).cpu().numpy()
        np.save(save_path, features)