Spaces:
Sleeping
Sleeping
IlayMalinyak
commited on
Commit
·
a79c5f2
1
Parent(s):
82a319f
tested locally
Browse files- .gitignore +1 -1
- req.txt +0 -0
- tasks/audio.py +3 -11
- tasks/run.py +15 -16
- tasks/utils/data.py +1 -0
- tasks/utils/dfs/test.csv +0 -0
- tasks/utils/dfs/train.csv +0 -0
- tasks/utils/dfs/train_val.csv +0 -0
- tasks/utils/dfs/val.csv +0 -0
- tasks/utils/models.py +1 -1
- tasks/utils/train.py +4 -3
.gitignore
CHANGED
|
@@ -14,6 +14,6 @@ eval-queue-bk/
|
|
| 14 |
eval-results-bk/
|
| 15 |
logs/
|
| 16 |
tasks/model
|
| 17 |
-
req.
|
| 18 |
|
| 19 |
emissions.csv
|
|
|
|
| 14 |
eval-results-bk/
|
| 15 |
logs/
|
| 16 |
tasks/model
|
| 17 |
+
req.txtal
|
| 18 |
|
| 19 |
emissions.csv
|
req.txt
ADDED
|
Binary file (20.5 kB). View file
|
|
|
tasks/audio.py
CHANGED
|
@@ -128,7 +128,6 @@ async def evaluate_audio(request: AudioEvaluationRequest):
|
|
| 128 |
accumulation_step=1, max_iter=np.inf,
|
| 129 |
exp_name=f"frugal_cnnencoder_inference")
|
| 130 |
predictions, true_labels, acc = trainer.predict(test_dl, device=device)
|
| 131 |
-
# true_labels = test_dataset["label"]
|
| 132 |
|
| 133 |
# Make random predictions (placeholder for actual model inference)
|
| 134 |
print("accuracy: ", acc)
|
|
@@ -144,7 +143,7 @@ async def evaluate_audio(request: AudioEvaluationRequest):
|
|
| 144 |
|
| 145 |
# Calculate accuracy
|
| 146 |
accuracy = accuracy_score(true_labels, predictions)
|
| 147 |
-
|
| 148 |
# Prepare results dictionary
|
| 149 |
results = {
|
| 150 |
"username": username,
|
|
@@ -162,14 +161,7 @@ async def evaluate_audio(request: AudioEvaluationRequest):
|
|
| 162 |
"test_seed": request.test_seed
|
| 163 |
}
|
| 164 |
}
|
|
|
|
|
|
|
| 165 |
|
| 166 |
return results
|
| 167 |
-
|
| 168 |
-
# if __name__ == "__main__":
|
| 169 |
-
# sample_request = AudioEvaluationRequest(
|
| 170 |
-
# dataset_name="rfcx/frugalai", # Replace with actual dataset name
|
| 171 |
-
# test_size=0.2, # Example values
|
| 172 |
-
# test_seed=42
|
| 173 |
-
# )
|
| 174 |
-
# #
|
| 175 |
-
# asyncio.run(evaluate_audio(sample_request))
|
|
|
|
| 128 |
accumulation_step=1, max_iter=np.inf,
|
| 129 |
exp_name=f"frugal_cnnencoder_inference")
|
| 130 |
predictions, true_labels, acc = trainer.predict(test_dl, device=device)
|
|
|
|
| 131 |
|
| 132 |
# Make random predictions (placeholder for actual model inference)
|
| 133 |
print("accuracy: ", acc)
|
|
|
|
| 143 |
|
| 144 |
# Calculate accuracy
|
| 145 |
accuracy = accuracy_score(true_labels, predictions)
|
| 146 |
+
|
| 147 |
# Prepare results dictionary
|
| 148 |
results = {
|
| 149 |
"username": username,
|
|
|
|
| 161 |
"test_seed": request.test_seed
|
| 162 |
}
|
| 163 |
}
|
| 164 |
+
|
| 165 |
+
print('results: ', results)
|
| 166 |
|
| 167 |
return results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tasks/run.py
CHANGED
|
@@ -38,9 +38,7 @@ def create_dataframe(ds, save_name='train'):
|
|
| 38 |
# Flatten the nested dictionary structure
|
| 39 |
feature_dict = {'label': label}
|
| 40 |
for k, v in features.items():
|
| 41 |
-
|
| 42 |
-
for sub_k, sub_v in v.items():
|
| 43 |
-
feature_dict[f"{k}_{sub_k}"] = sub_v[0].item() # Aggregate (e.g., mean)
|
| 44 |
data.append(feature_dict)
|
| 45 |
# Convert to DataFrame
|
| 46 |
df = pd.DataFrame(data)
|
|
@@ -88,13 +86,14 @@ test_dl = DataLoader(test_ds,batch_size=data_args.batch_size, collate_fn=collate
|
|
| 88 |
|
| 89 |
|
| 90 |
x,y = create_dataframe(full_ds, save_name='train_val')
|
| 91 |
-
print(x.shape)
|
| 92 |
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.2, random_state=42)
|
|
|
|
| 93 |
|
| 94 |
evals_result = {}
|
| 95 |
num_boost_round = 1000 # Set a large number of boosting rounds
|
| 96 |
|
| 97 |
# Watchlist to monitor performance on train and validation data
|
|
|
|
| 98 |
|
| 99 |
dtrain = xgb.DMatrix(x_train, label=y_train)
|
| 100 |
dval = xgb.DMatrix(x_val, label=y_val)
|
|
@@ -178,13 +177,13 @@ model = CNNKanFeaturesEncoder(xgb_model, model_args, kan_args.get_dict())
|
|
| 178 |
# model = KanEncoder(kan_args.get_dict())
|
| 179 |
model = model.to(local_rank)
|
| 180 |
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
|
| 189 |
# model = DDP(model, device_ids=[local_rank], output_device=local_rank)
|
| 190 |
num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
|
|
@@ -209,11 +208,11 @@ trainer = Trainer(model=model, optimizer=optimizer,
|
|
| 209 |
range_update=None,
|
| 210 |
accumulation_step=1, max_iter=np.inf,
|
| 211 |
exp_name=f"frugal_kan_features_{exp_num}")
|
| 212 |
-
fit_res = trainer.fit(num_epochs=100, device=local_rank,
|
| 213 |
-
|
| 214 |
-
output_filename = f'{data_args.log_dir}/{datetime_dir}/{model_name}_frugal_{exp_num}.json'
|
| 215 |
-
with open(output_filename, "w") as f:
|
| 216 |
-
|
| 217 |
preds, tru, acc = trainer.predict(test_dl, local_rank)
|
| 218 |
print(f"Accuracy: {acc}")
|
| 219 |
|
|
|
|
| 38 |
# Flatten the nested dictionary structure
|
| 39 |
feature_dict = {'label': label}
|
| 40 |
for k, v in features.items():
|
| 41 |
+
feature_dict[f"{k}"] = v[0].item() # Aggregate (e.g., mean)
|
|
|
|
|
|
|
| 42 |
data.append(feature_dict)
|
| 43 |
# Convert to DataFrame
|
| 44 |
df = pd.DataFrame(data)
|
|
|
|
| 86 |
|
| 87 |
|
| 88 |
x,y = create_dataframe(full_ds, save_name='train_val')
|
|
|
|
| 89 |
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.2, random_state=42)
|
| 90 |
+
print(x_train.shape)
|
| 91 |
|
| 92 |
evals_result = {}
|
| 93 |
num_boost_round = 1000 # Set a large number of boosting rounds
|
| 94 |
|
| 95 |
# Watchlist to monitor performance on train and validation data
|
| 96 |
+
print(x_train.head())
|
| 97 |
|
| 98 |
dtrain = xgb.DMatrix(x_train, label=y_train)
|
| 99 |
dval = xgb.DMatrix(x_val, label=y_val)
|
|
|
|
| 177 |
# model = KanEncoder(kan_args.get_dict())
|
| 178 |
model = model.to(local_rank)
|
| 179 |
|
| 180 |
+
state_dict = torch.load(data_args.checkpoint_path, map_location=torch.device('cpu'))
|
| 181 |
+
new_state_dict = OrderedDict()
|
| 182 |
+
for key, value in state_dict.items():
|
| 183 |
+
if key.startswith('module.'):
|
| 184 |
+
key = key[7:]
|
| 185 |
+
new_state_dict[key] = value
|
| 186 |
+
missing, unexpected = model.load_state_dict(new_state_dict)
|
| 187 |
|
| 188 |
# model = DDP(model, device_ids=[local_rank], output_device=local_rank)
|
| 189 |
num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
|
|
|
|
| 208 |
range_update=None,
|
| 209 |
accumulation_step=1, max_iter=np.inf,
|
| 210 |
exp_name=f"frugal_kan_features_{exp_num}")
|
| 211 |
+
# fit_res = trainer.fit(num_epochs=100, device=local_rank,
|
| 212 |
+
# early_stopping=10, only_p=False, best='loss', conf=True)
|
| 213 |
+
# output_filename = f'{data_args.log_dir}/{datetime_dir}/{model_name}_frugal_{exp_num}.json'
|
| 214 |
+
# with open(output_filename, "w") as f:
|
| 215 |
+
# json.dump(fit_res, f, indent=2)
|
| 216 |
preds, tru, acc = trainer.predict(test_dl, local_rank)
|
| 217 |
print(f"Accuracy: {acc}")
|
| 218 |
|
tasks/utils/data.py
CHANGED
|
@@ -57,6 +57,7 @@ class FFTDataset(IterableDataset):
|
|
| 57 |
orig_sample_rate=12000,
|
| 58 |
target_sample_rate=3000,
|
| 59 |
features=False):
|
|
|
|
| 60 |
self.dataset = original_dataset
|
| 61 |
self.resampler = T.Resample(orig_freq=orig_sample_rate, new_freq=target_sample_rate)
|
| 62 |
self.target_sample_rate = target_sample_rate
|
|
|
|
| 57 |
orig_sample_rate=12000,
|
| 58 |
target_sample_rate=3000,
|
| 59 |
features=False):
|
| 60 |
+
super().__init__()
|
| 61 |
self.dataset = original_dataset
|
| 62 |
self.resampler = T.Resample(orig_freq=orig_sample_rate, new_freq=target_sample_rate)
|
| 63 |
self.target_sample_rate = target_sample_rate
|
tasks/utils/dfs/test.csv
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tasks/utils/dfs/train.csv
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tasks/utils/dfs/train_val.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tasks/utils/dfs/val.csv
DELETED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tasks/utils/models.py
CHANGED
|
@@ -234,7 +234,7 @@ class CNNKanFeaturesEncoder(nn.Module):
|
|
| 234 |
for batch_idx in range(batch_size):
|
| 235 |
feature_dict = {}
|
| 236 |
for k, v in features[batch_idx].items():
|
| 237 |
-
feature_dict[f"
|
| 238 |
data.append(feature_dict)
|
| 239 |
|
| 240 |
return pd.DataFrame(data)
|
|
|
|
| 234 |
for batch_idx in range(batch_size):
|
| 235 |
feature_dict = {}
|
| 236 |
for k, v in features[batch_idx].items():
|
| 237 |
+
feature_dict[f"{k}"] = v[0].item()
|
| 238 |
data.append(feature_dict)
|
| 239 |
|
| 240 |
return pd.DataFrame(data)
|
tasks/utils/train.py
CHANGED
|
@@ -226,7 +226,7 @@ class Trainer(object):
|
|
| 226 |
|
| 227 |
def train_batch(self, batch, batch_idx, device):
|
| 228 |
x, fft, y = batch['audio']['array'], batch['audio']['fft_mag'], batch['label']
|
| 229 |
-
# features = batch['audio']['features']
|
| 230 |
# cwt = batch['audio']['cwt_mag']
|
| 231 |
x = x.to(device).float()
|
| 232 |
fft = fft.to(device).float()
|
|
@@ -267,7 +267,7 @@ class Trainer(object):
|
|
| 267 |
|
| 268 |
def eval_batch(self, batch, batch_idx, device):
|
| 269 |
x, fft, y = batch['audio']['array'], batch['audio']['fft_mag'], batch['label']
|
| 270 |
-
# features = batch['audio']['features']
|
| 271 |
|
| 272 |
# features = batch['audio']['features_arr'].to(device).float()
|
| 273 |
x = x.to(device).float()
|
|
@@ -294,6 +294,7 @@ class Trainer(object):
|
|
| 294 |
pbar = tqdm(test_dataloader)
|
| 295 |
for i,batch in enumerate(pbar):
|
| 296 |
x, fft, y = batch['audio']['array'], batch['audio']['fft_mag'], batch['label']
|
|
|
|
| 297 |
x = x.to(device).float()
|
| 298 |
fft = fft.to(device).float()
|
| 299 |
x_fft = torch.cat((x.unsqueeze(dim=1), fft.unsqueeze(dim=1)), dim=1)
|
|
@@ -305,7 +306,7 @@ class Trainer(object):
|
|
| 305 |
cls_pred = (probs > 0.5).float()
|
| 306 |
acc = (cls_pred == y).sum()
|
| 307 |
predictions.extend(cls_pred.cpu().numpy())
|
| 308 |
-
true_labels.extend(y.cpu().numpy())
|
| 309 |
all_accs += acc
|
| 310 |
total += len(y)
|
| 311 |
pbar.set_description("acc: {:.4f}".format(acc))
|
|
|
|
| 226 |
|
| 227 |
def train_batch(self, batch, batch_idx, device):
|
| 228 |
x, fft, y = batch['audio']['array'], batch['audio']['fft_mag'], batch['label']
|
| 229 |
+
# features = torch.stack(batch['audio']['features']).to(device).float()
|
| 230 |
# cwt = batch['audio']['cwt_mag']
|
| 231 |
x = x.to(device).float()
|
| 232 |
fft = fft.to(device).float()
|
|
|
|
| 267 |
|
| 268 |
def eval_batch(self, batch, batch_idx, device):
|
| 269 |
x, fft, y = batch['audio']['array'], batch['audio']['fft_mag'], batch['label']
|
| 270 |
+
# features = torch.stack(batch['audio']['features']).to(device).float()
|
| 271 |
|
| 272 |
# features = batch['audio']['features_arr'].to(device).float()
|
| 273 |
x = x.to(device).float()
|
|
|
|
| 294 |
pbar = tqdm(test_dataloader)
|
| 295 |
for i,batch in enumerate(pbar):
|
| 296 |
x, fft, y = batch['audio']['array'], batch['audio']['fft_mag'], batch['label']
|
| 297 |
+
# features = batch['audio']['features']
|
| 298 |
x = x.to(device).float()
|
| 299 |
fft = fft.to(device).float()
|
| 300 |
x_fft = torch.cat((x.unsqueeze(dim=1), fft.unsqueeze(dim=1)), dim=1)
|
|
|
|
| 306 |
cls_pred = (probs > 0.5).float()
|
| 307 |
acc = (cls_pred == y).sum()
|
| 308 |
predictions.extend(cls_pred.cpu().numpy())
|
| 309 |
+
true_labels.extend(y.cpu().numpy().astype(np.int64))
|
| 310 |
all_accs += acc
|
| 311 |
total += len(y)
|
| 312 |
pbar.set_description("acc: {:.4f}".format(acc))
|