sander-wood commited on
Commit
bbaca30
·
verified ·
1 Parent(s): dc083ce

Delete music_classification

Browse files
music_classification/README.md DELETED
@@ -1,46 +0,0 @@
1
- # Music Classification Codebase
2
-
3
- ## Overview
4
- Linear Probe is a powerful classification tool that leverages feature representations for supervised learning tasks. This codebase includes scripts for training a linear classification model, performing classification on new feature data. The features utilized can be extracted from the M3 or CLaMP 2 models, ensuring that the time dimension information is preserved and **not normalized**. Below is a description of the scripts contained in the `music_classification/` folder.
5
-
6
- ## Repository Structure
7
- The `music_classification/` folder contains the following scripts:
8
-
9
- ### 1. `config.py`
10
- This script defines configurations for the linear probe training and inference, specifying training data paths and parameters like learning rate, number of epochs, and hidden size.
11
-
12
- ### 2. `inference_cls.py`
13
- This script enables the classification of feature vectors using a pre-trained linear probe model.
14
-
15
- #### JSON Output Format
16
- The resulting JSON file contains a dictionary with the following structure:
17
- ```json
18
- {
19
- "path/to/feature1.npy": "class_A",
20
- "path/to/feature2.npy": "class_B",
21
- "path/to/feature3.npy": "class_A"
22
- }
23
- ```
24
- - **Key**: The path to the input feature file (e.g., `feature1.npy`).
25
- - **Value**: The predicted class label assigned by the linear probe model (e.g., `class_A`).
26
-
27
- #### Usage
28
- ```bash
29
- python inference_cls.py <feature_folder> <output_file>
30
- ```
31
- - `feature_folder`: Directory containing input feature files (in `.npy` format).
32
- - `output_file`: File path to save the classification results (in JSON format).
33
-
34
- ### 3. `train_cls.py`
35
- This script is designed for training the linear classification model.
36
-
37
- #### Usage
38
- ```bash
39
- python train_cls.py
40
- ```
41
-
42
- ### 4. `utils.py`
43
- The utility script defines the architecture of the linear classification model.
44
-
45
- ## Naming Convention
46
- All `.npy` files used in this codebase must follow the naming convention of `label_filename.npy`, where the filename should not contain any underscores (`_`).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
music_classification/config.py DELETED
@@ -1,26 +0,0 @@
1
- # Configuration for generative modelling and classification
2
- TRAIN_FOLDERS = [
3
- "<path_to_training_data>" # Directory containing training data
4
- ]
5
-
6
- EVAL_FOLDERS = [
7
- "" # (Optional) Directory containing evaluation data
8
- ]
9
-
10
- EVAL_SPLIT = 0.2 # Fraction of training data to use for evaluation
11
-
12
- # Weights and Biases configuration
13
- WANDB_KEY = "<your_wandb_key>" # Set M3/CLaMP2_WANDB_LOG=False if no API key for Weights and Biases logging
14
-
15
- # Model Configuration
16
- INPUT_HIDDEN_SIZE = 768 # Input hidden size
17
- HIDDEN_SIZE = 768 # Model hidden size
18
- NUM_EPOCHS = 1000 # Max number of epochs to train (early stopping can terminate earlier)
19
- LEARNING_RATE = 1e-5 # Optimizer learning rate
20
- BALANCED_TRAINING = False # Set to True to balance labels in training data
21
- WANDB_LOG = False # Set to True to log training metrics to WANDB
22
-
23
- # Paths Configuration
24
- last_folder_name = TRAIN_FOLDERS[-1].split('/')[-1]
25
- WEIGHTS_PATH = f"weights-{last_folder_name}.pth" # Weights file path
26
- LOGS_PATH = f"logs-{last_folder_name}.txt" # Log file path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
music_classification/inference_cls.py DELETED
@@ -1,71 +0,0 @@
1
- import os
2
- import json
3
- import torch
4
- import random
5
- import numpy as np
6
- from utils import *
7
- from tqdm import tqdm
8
- from samplings import *
9
- import argparse
10
-
11
- def list_files_in_directory(directories, extensions=["npy"]):
12
- file_list = []
13
-
14
- for directory in directories:
15
- for root, dirs, files in os.walk(directory):
16
- for file in files:
17
- if any(file.endswith(ext) for ext in extensions):
18
- file_path = os.path.join(root, file)
19
- file_list.append(file_path)
20
-
21
- return file_list
22
-
23
- if __name__ == "__main__":
24
- # Setup argument parser
25
- parser = argparse.ArgumentParser(description="Feature extraction and classification with CLaMP2.")
26
- parser.add_argument("feature_folder", type=str, help="Directory containing input feature files.")
27
- parser.add_argument("output_file", type=str, help="File to save the classification results. (format: json)")
28
-
29
- # Parse arguments
30
- args = parser.parse_args()
31
- feature_folder = args.feature_folder
32
- output_file = args.output_file
33
-
34
- device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
35
- seed = 42
36
- random.seed(seed)
37
- np.random.seed(seed)
38
- torch.manual_seed(seed)
39
- torch.cuda.manual_seed_all(seed)
40
- torch.backends.cudnn.deterministic = True
41
- torch.backends.cudnn.benchmark = False
42
-
43
- checkpoint = torch.load(WEIGHTS_PATH, map_location='cpu')
44
- print(f"Successfully Loaded Checkpoint from Epoch {checkpoint['epoch']} with acc {checkpoint['max_eval_acc']}")
45
- label2idx = checkpoint['labels']
46
- idx2label = {idx: label for label, idx in label2idx.items()} # Create reverse mapping
47
- model = LinearClassification(num_classes=len(label2idx))
48
- model = model.to(device)
49
-
50
- # print parameter number
51
- print("Parameter Number: "+str(sum(p.numel() for p in model.parameters() if p.requires_grad)))
52
-
53
- model.eval()
54
- model.load_state_dict(checkpoint['model'])
55
-
56
- # load filenames under train and eval folder
57
- feature_files = list_files_in_directory([feature_folder])
58
- cls_results = {}
59
-
60
- for filepath in tqdm(feature_files):
61
- outputs = np.load(filepath)[0]
62
- outputs = torch.from_numpy(outputs).to(device)
63
- outputs = outputs.unsqueeze(0)
64
- cls_list = model(outputs)[0].tolist()
65
- max_prob = max(cls_list)
66
- cls_idx = cls_list.index(max_prob)
67
- cls_label = idx2label[cls_idx]
68
- cls_results[filepath] = cls_label
69
-
70
- with open(output_file, "w", encoding="utf-8") as f:
71
- json.dump(cls_results, f)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
music_classification/train_cls.py DELETED
@@ -1,293 +0,0 @@
1
- import os
2
- import time
3
- import math
4
- import wandb
5
- import torch
6
- import random
7
- import numpy as np
8
- from utils import *
9
- from config import *
10
- from tqdm import tqdm
11
- from sklearn.metrics import f1_score
12
- from torch.amp import autocast, GradScaler
13
- from torch.utils.data import Dataset, DataLoader
14
- from transformers import get_constant_schedule_with_warmup
15
- import torch.distributed as dist
16
- from torch.nn.parallel import DistributedDataParallel as DDP
17
- from torch.utils.data.distributed import DistributedSampler
18
-
19
- # Set up distributed training
20
- world_size = int(os.environ['WORLD_SIZE']) if 'WORLD_SIZE' in os.environ else 1
21
- global_rank = int(os.environ['RANK']) if 'RANK' in os.environ else 0
22
- local_rank = int(os.environ['LOCAL_RANK']) if 'LOCAL_RANK' in os.environ else 0
23
-
24
- if world_size > 1:
25
- torch.cuda.set_device(local_rank)
26
- device = torch.device("cuda", local_rank)
27
- dist.init_process_group(backend='nccl') if world_size > 1 else None
28
- else:
29
- device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
30
-
31
- # Set random seed
32
- seed = 42 + global_rank
33
- random.seed(seed)
34
- np.random.seed(seed)
35
- torch.manual_seed(seed)
36
- torch.cuda.manual_seed_all(seed)
37
- torch.backends.cudnn.deterministic = True
38
- torch.backends.cudnn.benchmark = False
39
-
40
- batch_size = 1
41
-
42
- def collate_batch(input_tensors):
43
-
44
- input_tensors, labels = zip(*input_tensors)
45
- input_tensors = torch.stack(input_tensors, dim=0)
46
- labels = torch.stack(labels, dim=0)
47
-
48
- return input_tensors.to(device), labels.to(device)
49
-
50
- def list_files_in_directory(directories):
51
- file_list = []
52
-
53
- for directory in directories:
54
- for root, dirs, files in os.walk(directory):
55
- for file in files:
56
- if file.endswith(".npy"):
57
- file_path = os.path.join(root, file)
58
- file_list.append(file_path)
59
- return file_list
60
-
61
- class TensorDataset(Dataset):
62
- def __init__(self, filenames):
63
- print(f"Loading {len(filenames)} files for classification")
64
- self.filenames = []
65
- self.label2idx = {}
66
-
67
- for filename in tqdm(filenames):
68
- label = os.path.basename(filename).split('_')[0]
69
-
70
- self.filenames.append(filename)
71
- if label not in self.label2idx:
72
- self.label2idx[label] = len(self.label2idx)
73
- print(f"Found {len(self.label2idx)} classes")
74
-
75
- def __len__(self):
76
- return len(self.filenames)
77
-
78
- def __getitem__(self, idx):
79
-
80
- filename = self.filenames[idx]
81
- label = os.path.basename(filename).split('_')[0]
82
- label = self.label2idx[label]
83
-
84
- # load numpy file
85
- data = np.load(filename)
86
- data = torch.from_numpy(data)[0]
87
- label = torch.tensor(label)
88
-
89
- return data, label
90
-
91
- class BalancedTensorDataset(Dataset):
92
- def __init__(self, filenames):
93
- print(f"Loading {len(filenames)} files for classification")
94
- self.filenames = filenames
95
- self.label2idx = {}
96
- self.label2files = {}
97
-
98
- for filename in tqdm(filenames):
99
- label = os.path.basename(filename).split('_')[0]
100
- if label not in self.label2idx:
101
- self.label2idx[label] = len(self.label2idx)
102
- if label not in self.label2files:
103
- self.label2files[label] = []
104
- self.label2files[label].append(filename)
105
- print(f"Found {len(self.label2idx)} classes")
106
-
107
- self.min_samples = min(len(files) for files in self.label2files.values())
108
-
109
- self._update_epoch_filenames()
110
-
111
- def _update_epoch_filenames(self):
112
- self.epoch_filenames = []
113
- for label, files in self.label2files.items():
114
- sampled_files = random.sample(files, self.min_samples)
115
- self.epoch_filenames.extend(sampled_files)
116
-
117
- random.shuffle(self.epoch_filenames)
118
-
119
- def __len__(self):
120
- return len(self.epoch_filenames)
121
-
122
- def __getitem__(self, idx):
123
- filename = self.epoch_filenames[idx]
124
- label = os.path.basename(filename).split('_')[0]
125
- label = self.label2idx[label]
126
-
127
- data = np.load(filename)
128
- data = torch.from_numpy(data)[0]
129
- label = torch.tensor(label)
130
-
131
- return data, label
132
-
133
- def on_epoch_end(self):
134
- self._update_epoch_filenames()
135
-
136
- # load filenames under train and eval folder
137
- train_files = list_files_in_directory(TRAIN_FOLDERS)
138
- eval_files = list_files_in_directory(EVAL_FOLDERS)
139
-
140
- if len(eval_files)==0:
141
- random.shuffle(train_files)
142
- eval_files = train_files[:math.ceil(len(train_files)*EVAL_SPLIT)]
143
- train_files = train_files[math.ceil(len(train_files)*EVAL_SPLIT):]
144
- if BALANCED_TRAINING:
145
- train_set = BalancedTensorDataset(train_files)
146
- else:
147
- train_set = TensorDataset(train_files)
148
- eval_set = TensorDataset(eval_files)
149
- eval_set.label2idx = train_set.label2idx
150
-
151
- model = LinearClassification(num_classes=len(train_set.label2idx))
152
- model = model.to(device)
153
-
154
- # print parameter number
155
- print("Parameter Number: "+str(sum(p.numel() for p in model.parameters() if p.requires_grad)))
156
-
157
- if world_size > 1:
158
- model = DDP(model, device_ids=[local_rank], output_device=local_rank, find_unused_parameters=True)
159
-
160
- scaler = GradScaler()
161
- is_autocast = True
162
- optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE)
163
- loss_fn = torch.nn.CrossEntropyLoss()
164
-
165
- # call model with a batch of input
166
- def process_one_batch(batch):
167
- input_tensors, labels = batch
168
- logits = model(input_tensors)
169
- loss = loss_fn(logits, labels)
170
- prediction = torch.argmax(logits, dim=1)
171
- acc_num = torch.sum(prediction==labels)
172
-
173
- return loss, acc_num, prediction, labels
174
-
175
- # do one epoch for training
176
- def train_epoch():
177
- tqdm_train_set = tqdm(train_set)
178
- total_train_loss = 0
179
- total_acc_num = 0
180
- iter_idx = 1
181
- model.train()
182
-
183
- for batch in tqdm_train_set:
184
- if is_autocast:
185
- with autocast(device_type='cuda'):
186
- loss, acc_num, prediction, labels = process_one_batch(batch)
187
- scaler.scale(loss).backward()
188
- scaler.step(optimizer)
189
- scaler.update()
190
- else:
191
- loss, acc_num, prediction, labels = process_one_batch(batch)
192
- loss.backward()
193
- optimizer.step()
194
-
195
- lr_scheduler.step()
196
- model.zero_grad(set_to_none=True)
197
- total_train_loss += loss.item()
198
- total_acc_num += acc_num.item()
199
- tqdm_train_set.set_postfix({str(global_rank)+'_train_acc': total_acc_num / (iter_idx*batch_size)})
200
- # Log the training loss to wandb
201
- if global_rank==0 and WANDB_LOG:
202
- wandb.log({"acc": total_acc_num / (iter_idx*batch_size)})
203
-
204
- iter_idx += 1
205
-
206
- if BALANCED_TRAINING:
207
- train_set.dataset.on_epoch_end()
208
-
209
- return total_acc_num / ((iter_idx-1)*batch_size)
210
-
211
- # do one epoch for eval
212
- def eval_epoch():
213
- tqdm_eval_set = tqdm(eval_set)
214
- total_eval_loss = 0
215
- total_acc_num = 0
216
- iter_idx = 1
217
- model.eval()
218
-
219
- all_predictions = []
220
- all_labels = []
221
-
222
- # Evaluate data for one epoch
223
- for batch in tqdm_eval_set:
224
- with torch.no_grad():
225
- loss, acc_num, prediction, labels = process_one_batch(batch)
226
- total_eval_loss += loss.item()
227
- total_acc_num += acc_num.item()
228
-
229
- # Accumulate predictions and labels
230
- all_predictions.extend(prediction.cpu().numpy())
231
- all_labels.extend(labels.cpu().numpy())
232
-
233
- tqdm_eval_set.set_postfix({str(global_rank)+'_eval_acc': total_acc_num / (iter_idx*batch_size)})
234
- iter_idx += 1
235
-
236
- # Compute F1 Macro
237
- f1_macro = f1_score(all_labels, all_predictions, average='macro')
238
- return total_acc_num / ((iter_idx - 1) * batch_size), f1_macro
239
-
240
- # train and eval
241
- if __name__ == "__main__":
242
-
243
- label2idx = train_set.label2idx
244
- max_eval_acc = 0
245
- train_sampler = DistributedSampler(train_set, num_replicas=world_size, rank=global_rank)
246
- eval_sampler = DistributedSampler(eval_set, num_replicas=world_size, rank=global_rank)
247
-
248
- train_set = DataLoader(train_set, batch_size=batch_size, collate_fn=collate_batch, sampler=train_sampler, shuffle = (train_sampler is None))
249
- eval_set = DataLoader(eval_set, batch_size=batch_size, collate_fn=collate_batch, sampler=eval_sampler, shuffle = (train_sampler is None))
250
-
251
- lr_scheduler = get_constant_schedule_with_warmup(optimizer = optimizer, num_warmup_steps = len(train_set))
252
-
253
- model = model.to(device)
254
- optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE)
255
-
256
- if WANDB_LOG and global_rank==0:
257
- # Initialize wandb
258
- if WANDB_KEY:
259
- wandb.login(key=WANDB_KEY)
260
- wandb.init(project="linear",
261
- name=WEIGHTS_PATH.replace("weights_", "").replace(".pth", ""))
262
-
263
- for epoch in range(1, NUM_EPOCHS+1):
264
- train_sampler.set_epoch(epoch)
265
- eval_sampler.set_epoch(epoch)
266
- print('-' * 21 + "Epoch " + str(epoch) + '-' * 21)
267
- train_acc = train_epoch()
268
- eval_acc, eval_f1_macro = eval_epoch()
269
- if global_rank==0:
270
- with open(LOGS_PATH,'a') as f:
271
- f.write("Epoch " + str(epoch) + "\ntrain_acc: " + str(train_acc) + "\neval_acc: " +str(eval_acc) + "\neval_f1_macro: " +str(eval_f1_macro) + "\ntime: " + time.asctime(time.localtime(time.time())) + "\n\n")
272
- if eval_acc > max_eval_acc:
273
- best_epoch = epoch
274
- max_eval_acc = eval_acc
275
- checkpoint = {
276
- 'model': model.module.state_dict() if hasattr(model, "module") else model.state_dict(),
277
- 'optimizer': optimizer.state_dict(),
278
- 'lr_sched': lr_scheduler.state_dict(),
279
- 'epoch': epoch,
280
- 'best_epoch': best_epoch,
281
- 'max_eval_acc': max_eval_acc,
282
- "labels": label2idx
283
- }
284
- torch.save(checkpoint, WEIGHTS_PATH)
285
- with open(LOGS_PATH,'a') as f:
286
- f.write("Best Epoch so far!\n\n\n")
287
-
288
- if world_size > 1:
289
- dist.barrier()
290
-
291
- if global_rank==0:
292
- print("Best Eval Epoch : "+str(best_epoch))
293
- print("Max Eval Accuracy : "+str(max_eval_acc))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
music_classification/utils.py DELETED
@@ -1,22 +0,0 @@
1
- import torch
2
- from config import *
3
-
4
- class LinearClassification(torch.nn.Module):
5
- def __init__(self, num_classes):
6
- super(LinearClassification, self).__init__()
7
- self.fc1 = torch.nn.Linear(INPUT_HIDDEN_SIZE, HIDDEN_SIZE)
8
- self.relu = torch.nn.ReLU()
9
- self.fc2 = torch.nn.Linear(HIDDEN_SIZE, num_classes)
10
- self.softmax = torch.nn.Softmax(dim=1)
11
-
12
- def forward(self, x):
13
- # Apply the linear layer and ReLU to each time step
14
- x = self.fc1(x) # x shape (B, L, H) -> (B, L, hidden_size)
15
- x = self.relu(x)
16
-
17
- # Average over the time steps (L dimension)
18
- x = x.mean(dim=1) # Now x has shape (B, hidden_size)
19
-
20
- x = self.fc2(x) # Now applying the final layer (B, hidden_size) -> (B, num_classes)
21
- x = self.softmax(x)
22
- return x