wi-lab
/

lwm-v1.1

@@ -1,446 +1,446 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Fri Dec 20 09:32:12 2024
-This script contains the LWM pre-training and task-specific fine-tuning functions.
-@author: Sadjad Alikhani
-"""
-import torch
-import torch.nn as nn
-from tqdm import tqdm
-import matplotlib.pyplot as plt
-import os
-import csv
-from utils import count_parameters
-import time
-#%% LOSS FUNCTION
-def nmse_loss(y_pred, y_true):
-    y_pred_flat = y_pred.view(y_pred.size(0), -1)
-    y_true_flat = y_true.view(y_true.size(0), -1)
-    mse = torch.sum((y_true_flat - y_pred_flat)**2, dim=-1)
-    normalization = torch.sum(y_true_flat**2, dim=-1)
-    return mse / normalization
-#%%
-def train_lwm(model, train_loaders, val_loaders, optimizer, scheduler, epochs, device, save_dir="models", log_file="training_log.csv"):
-    if not os.path.exists(save_dir):
-        os.makedirs(save_dir)
-    # Initialize CSV log
-    if not os.path.exists(log_file):
-        with open(log_file, mode='w', newline='') as file:
-            writer = csv.writer(file)
-            writer.writerow(["Epoch", "Train NMSE", "Validation NMSE", "Learning Rate", "Best Model"])
-    train_nmse_losses = []
-    val_nmse_losses = []
-    best_val_nmse = float('inf')
-    for epoch in range(epochs):
-        model.train()
-        train_nmse = 0.0
-        train_samples = 0
-        # Training loop across all buckets
-        print(f"\nEpoch {epoch + 1}/{epochs} [Training]")
-        for length, train_loader in train_loaders.items():
-            print(f"Processing sequences of length {length}")
-            with tqdm(train_loader, desc=f"Length {length} [Training]", unit="batch") as t:
-                for batch in t:
-                    # train_batches += 1
-                    optimizer.zero_grad()
-                    # Move data to device
-                    input_ids, masked_tokens, masked_pos = [b.to(device) for b in batch]
-                    # Forward pass
-                    logits_lm, _, _ = model(input_ids, masked_pos)
-                    # Compute NMSE
-                    loss = torch.sum(nmse_loss(masked_tokens, logits_lm))
-                    loss.backward()
-                    optimizer.step()
-                    scheduler.step()
-                    train_nmse += loss.item()
-                    train_samples += input_ids.shape[0]
-                    # Update progress bar
-                    t.set_postfix({"nmse": train_nmse/train_samples, "lr": scheduler.get_last_lr()[0]})
-        # Average NMSE across training batches
-        train_nmse /= max(train_samples, 1)
-        train_nmse_losses.append(train_nmse)
-        if epoch % 2 == 0:
-            # Validation loop across all buckets
-            model.eval()
-            val_nmse = 0.0
-            val_samples = 0
-            with torch.no_grad():
-                print(f"\nEpoch {epoch + 1}/{epochs} [Validation]")
-                for length, val_loader in val_loaders.items():
-                    print(f"Processing sequences of length {length}")
-                    with tqdm(val_loader, desc=f"Length {length} [Validation]", unit="batch") as t:
-                        for batch in t:
-                            # val_batches += 1
-                            # Move data to device
-                            input_ids, masked_tokens, masked_pos = [b.to(device) for b in batch]
-                            # Forward pass
-                            logits_lm, _, _ = model(input_ids, masked_pos)
-                            # Compute NMSE
-                            loss = torch.sum(nmse_loss(masked_tokens, logits_lm))
-                            val_nmse += loss.item()
-                            val_samples += input_ids.shape[0]
-                            # Update progress bar
-                            t.set_postfix({"nmse": val_nmse/val_samples})
-            # Average NMSE across validation batches
-            val_nmse /= max(val_samples, 1)
-            val_nmse_losses.append(val_nmse)
-            # Save model if validation NMSE improves
-            is_best_model = False
-            if val_nmse < best_val_nmse:
-                best_val_nmse = val_nmse
-                model_path = os.path.join(save_dir, f"lwm_epoch{epoch+1}_train{train_nmse:.4f}_val{val_nmse:.4f}.pth")
-                torch.save(model.state_dict(), model_path)
-                print(f"Model saved: {model_path}")
-                is_best_model = True
-        # Log the results
-        print(f"  Train NMSE: {train_nmse:.4f}")
-        print(f"  Validation NMSE: {val_nmse:.4f}")
-        print(f"  Learning Rate: {scheduler.get_last_lr()[0]:.6e}")
-        # Append to CSV log
-        with open(log_file, mode='a', newline='') as file:
-            writer = csv.writer(file)
-            writer.writerow([epoch + 1, train_nmse, val_nmse, scheduler.get_last_lr()[0], is_best_model])
-        # Plot losses after each epoch
-        plt.figure(figsize=(10, 6))
-        plt.plot(range(1, len(train_nmse_losses) + 1), train_nmse_losses, label="Train NMSE")
-        plt.plot(range(1, len(val_nmse_losses) + 1), val_nmse_losses, label="Validation NMSE")
-        plt.xlabel("Epochs")
-        plt.ylabel("NMSE")
-        plt.title("Training and Validation NMSE Loss")
-        plt.legend()
-        plt.grid(True)
-        plt.show()
-    print("Training and validation complete.")
-    return model
-#%% FINE-TUNE
-from torch.cuda.amp import GradScaler, autocast
-# Define the ClassificationHead
-class ClassificationHead(nn.Module):
-    def __init__(self, input_dim, num_classes):
-        super().__init__()
-        self.fc = nn.Linear(input_dim, num_classes)
-    def forward(self, x):
-        return self.fc(x)
-# Define the RegressionHead
-class RegressionHead(nn.Module):
-    def __init__(self, input_dim):
-        super().__init__()
-        self.fc = nn.Linear(input_dim, 1)
-    def forward(self, x):
-        return self.fc(x)
-class CustomClassificationHead(nn.Module):
-    def __init__(self, input_dim, num_classes):
-        super().__init__()
-        self.classifier = nn.Sequential(
-            nn.Linear(input_dim, 512),
-            nn.BatchNorm1d(512),
-            nn.ReLU(),
-            nn.Dropout(0.1),
-            nn.Linear(512, 256),
-            nn.BatchNorm1d(256),
-            nn.ReLU(),
-            nn.Dropout(0.1),
-            nn.Linear(256, 128),
-            nn.BatchNorm1d(128),
-            nn.ReLU(),
-            # nn.Dropout(0.1),
-            nn.Linear(128, num_classes)
-        )
-    def forward(self, x):
-        return self.classifier(x)
-class CustomRegressionHead(nn.Module):
-    def __init__(self, input_dim, output_dim):
-        super().__init__()
-        self.regressor = nn.Sequential(
-            nn.Linear(input_dim, 512),
-            nn.BatchNorm1d(512),
-            nn.ReLU(),
-            nn.Dropout(0.1),
-            nn.Linear(512, 256),
-            nn.BatchNorm1d(256),
-            nn.ReLU(),
-            nn.Dropout(0.1),
-            nn.Linear(256, output_dim)
-        )
-    def forward(self, x):
-        return self.regressor(x)
-def custom_heads(input_dim, num_classes=None, output_dim=None, task_type="classification"):
-    """
-    Creates a custom head for classification or regression tasks.
-    Users should modify the class implementations for further customization.
-    Args:
-        input_dim (int): Input dimension of the head.
-        num_classes (int): Number of classes for classification tasks. Ignored for regression.
-        task_type (str): "classification" or "regression".
-    Returns:
-        nn.Module: Custom head for the specified task.
-    """
-    if task_type == "classification":
-        if num_classes is None:
-            raise ValueError("num_classes must be specified for classification tasks.")
-        return CustomClassificationHead(input_dim=input_dim, num_classes=num_classes)
-    elif task_type == "regression":
-        return CustomRegressionHead(input_dim=input_dim, output_dim=output_dim)
-    else:
-        raise ValueError("Invalid task_type. Choose 'classification' or 'regression'.")
-#%%
-# Fine-tuning wrapper for the base model
-class FineTuningWrapper(nn.Module):
-    def __init__(self, model, task_head, fine_tune_layers="full"):
-        super().__init__()
-        self.model = model
-        self.task_head = task_head
-        # Freeze all layers initially
-        for param in self.model.parameters():
-            param.requires_grad = False
-        # Handle fine-tuning layers
-        if fine_tune_layers is not None:
-            if fine_tune_layers == "full":
-                # Unfreeze all layers if "all" is specified
-                for param in self.model.parameters():
-                    param.requires_grad = True
-            else:
-                # Get a list of all available layer names in the model
-                available_layers = [name for name, _ in self.model.named_parameters()]
-                # Validate that specified layers exist in the model
-                for layer in fine_tune_layers:
-                    if not any(layer in lname for lname in available_layers):
-                        raise ValueError(
-                            f"Layer '{layer}' not found in the model. "
-                            f"Available layers: {available_layers}"
-                        )
-                # Unfreeze only the specified layers
-                for name, param in self.model.named_parameters():
-                    if any(layer in name for layer in fine_tune_layers):
-                        param.requires_grad = True
-    def forward(self, x, input_type="cls_emb"):
-        if input_type == "raw":
-            task_input = x.view(x.size(0), -1)
-        else:
-            embeddings, attn_maps = self.model(x)  # Get embeddings from the base model
-            if input_type == "cls_emb":
-                task_input = embeddings[:, 0, :]  # CLS token
-            elif input_type == "chs_emb":
-                chs_emb = embeddings[:, 1:, :]
-                task_input = chs_emb.view(chs_emb.size(0), -1) # embeddings.mean(dim=1)  # Mean pooling over channel embeddings
-        return self.task_head(task_input), 0 if input_type=="raw" else attn_maps
-#%%
-# Fine-tuning function
-from sklearn.metrics import f1_score
-def finetune(
-    base_model,
-    train_loader,
-    val_loader=None,
-    task_type="classification",
-    input_type="cls_emb",
-    num_classes=None,
-    output_dim=None,
-    use_custom_head=False,
-    fine_tune_layers=None,
-    optimizer_config=None,
-    criterion=None,
-    epochs=10,
-    device="cuda",
-    task="Beam Prediction"
-):
-    """
-    Configures and fine-tunes the base model with user-defined settings, saving results and models.
-    """
-    # Create results folder
-    time_now = f"{time.time():.0f}"
-    results_folder = f"results/{task}/{time_now}"
-    os.makedirs(results_folder, exist_ok=True)
-    log_file = os.path.join(results_folder, "training_log.csv")
-    # Initialize the CSV log
-    with open(log_file, mode='w', newline='') as file:
-        writer = csv.writer(file)
-        writer.writerow(["Task", "Input", "Epoch", "Train Loss", "Validation Loss", "F1-Score (Classification)", "Learning Rate", "Time"])
-    for batch in val_loader:
-        input_data, targets = batch[0].to(device), batch[1].to(device)
-        break
-    if input_type == "cls_emb":
-        n_patches = 1
-        patch_size = 128
-    elif input_type == "channel_emb":
-        n_patches = input_data.shape[1]-1
-        patch_size = 128
-    elif input_type == "raw":
-        n_patches = input_data.shape[1]
-        patch_size = 32
-        # patch_size = 1
-    if use_custom_head:
-        custom_head = custom_heads(input_dim=n_patches*patch_size,
-                                   num_classes=num_classes,
-                                   output_dim=output_dim,
-                                   task_type=task_type)
-    # Handle DataParallel models
-    if isinstance(base_model, nn.DataParallel):
-        base_model = base_model.module
-    # Set up the task-specific head
-    if use_custom_head:
-        task_head = custom_head
-    elif task_type == "classification":
-        if num_classes is None:
-            raise ValueError("num_classes must be specified for classification tasks.")
-        task_head = ClassificationHead(input_dim=n_patches*patch_size, num_classes=num_classes) # input_dim=base_model.embedding.d_model
-    elif task_type == "regression":
-        task_head = RegressionHead(input_dim=n_patches*patch_size) # input_dim=base_model.embedding.d_model
-    else:
-        raise ValueError("Invalid task_type. Choose 'classification' or 'regression'.")
-    # Wrap the model with the fine-tuning head
-    wrapper = FineTuningWrapper(base_model, task_head, fine_tune_layers=fine_tune_layers)
-    wrapper = wrapper.to(device)
-    print(f'Number of head parameters: {count_parameters(wrapper)}')
-   # Set default optimizer config if not provided
-    if optimizer_config is None:
-        optimizer_config = {"lr": 1e-4}
-    # Set up the optimizer
-    optimizer = torch.optim.Adam(wrapper.parameters(), **optimizer_config)
-    # Set up the scheduler for learning rate decay
-    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.2)  # Example: Reduce LR by 10x every 10 epochs
-    # Set up the loss criterion
-    if criterion is None:
-        criterion = nn.CrossEntropyLoss() if task_type == "classification" else nn.MSELoss()
-    scaler = GradScaler()
-    train_losses, val_losses, f1_scores = [], [], []
-    best_val_loss = float("inf")
-    best_model_path = None
-    for epoch in range(epochs):
-        # Training loop
-        wrapper.train()
-        epoch_loss = 0.0
-        with tqdm(train_loader, desc=f"Epoch {epoch + 1}/{epochs}") as progress_bar:
-            for batch in progress_bar:
-                input_data, targets = batch[0].to(device), batch[1].to(device)
-                optimizer.zero_grad()
-                with autocast():
-                    outputs, attn_maps = wrapper(input_data, input_type=input_type)
-                    loss = criterion(outputs, targets)
-                scaler.scale(loss).backward()
-                scaler.step(optimizer)
-                scaler.update()
-                epoch_loss += loss.item()
-                progress_bar.set_postfix({"Loss": loss.item()})
-        avg_train_loss = epoch_loss / len(train_loader)
-        train_losses.append(avg_train_loss)
-        # Validation loop
-        if val_loader:
-            wrapper.eval()
-            val_loss = 0.0
-            all_preds, all_targets = [], []
-            with torch.no_grad():
-                for batch in val_loader:
-                    input_data, targets = batch[0].to(device), batch[1].to(device)
-                    with autocast():
-                        outputs, _ = wrapper(input_data, input_type=input_type)
-                        loss = criterion(outputs, targets)
-                    val_loss += loss.item()
-                    if task_type == "classification":
-                        preds = torch.argmax(outputs, dim=1).cpu().numpy()
-                        all_preds.extend(preds)
-                        all_targets.extend(targets.cpu().numpy())
-            avg_val_loss = val_loss / len(val_loader)
-            val_losses.append(avg_val_loss)
-            time_now = f"{time.time():.0f}"
-            # Save the best model
-            if avg_val_loss < best_val_loss:
-                best_val_loss = avg_val_loss
-                best_model_path = os.path.join(results_folder, f"{input_type}_epoch{epoch+1}_valLoss{avg_val_loss:.4f}_{time_now}.pth")
-                torch.save(wrapper.state_dict(), best_model_path)
-                print(f"Model saved at {best_model_path} with validation loss: {best_val_loss:.4f}")
-            # Compute F1-score for classification tasks
-            f1 = None
-            if task_type == "classification":
-                f1 = f1_score(all_targets, all_preds, average="macro")
-                print(f"Epoch {epoch + 1}, Validation F1-Score: {f1:.4f}")
-                f1_scores.append(f1)
-        scheduler.step()
-        # Log results
-        with open(log_file, mode='a', newline='') as file:
-            writer = csv.writer(file)
-            writer.writerow([task, input_type, epoch + 1, avg_train_loss, avg_val_loss, f1 if f1 is not None else "-", scheduler.get_last_lr()[0], f"{time_now}"])
-    # Plot training and validation losses
-    plt.figure(figsize=(10, 6))
-    plt.plot(range(1, epochs + 1), train_losses, label="Training Loss")
-    plt.plot(range(1, epochs + 1), val_losses, label="Validation Loss", linestyle="--")
-    plt.xlabel("Epochs")
-    plt.ylabel("Loss")
-    plt.title("Training and Validation Loss")
-    plt.legend()
-    plt.grid(True)
-    # plt.savefig(os.path.join(results_folder, "loss_curve.png"))
-    plt.show()
     return wrapper, best_model_path, train_losses, val_losses, f1_scores if task_type == "classification" else 0, attn_maps

+# -*- coding: utf-8 -*-
+"""
+Created on Fri Dec 20 09:32:12 2024
+This script contains the LWM pre-training and task-specific fine-tuning functions.
+@author: Sadjad Alikhani
+"""
+import torch
+import torch.nn as nn
+from tqdm import tqdm
+import matplotlib.pyplot as plt
+import os
+import csv
+from utils import count_parameters
+import time
+#%% LOSS FUNCTION
+def nmse_loss(y_pred, y_true):
+    y_pred_flat = y_pred.view(y_pred.size(0), -1)
+    y_true_flat = y_true.view(y_true.size(0), -1)
+    mse = torch.sum((y_true_flat - y_pred_flat)**2, dim=-1)
+    normalization = torch.sum(y_true_flat**2, dim=-1)
+    return mse / normalization
+#%%
+def train_lwm(model, train_loaders, val_loaders, optimizer, scheduler, epochs, device, save_dir="models", log_file="training_log.csv"):
+    if not os.path.exists(save_dir):
+        os.makedirs(save_dir)
+    # Initialize CSV log
+    if not os.path.exists(log_file):
+        with open(log_file, mode='w', newline='') as file:
+            writer = csv.writer(file)
+            writer.writerow(["Epoch", "Train NMSE", "Validation NMSE", "Learning Rate", "Best Model"])
+    train_nmse_losses = []
+    val_nmse_losses = []
+    best_val_nmse = float('inf')
+    for epoch in range(epochs):
+        model.train()
+        train_nmse = 0.0
+        train_samples = 0
+        # Training loop across all buckets
+        print(f"\nEpoch {epoch + 1}/{epochs} [Training]")
+        for length, train_loader in train_loaders.items():
+            print(f"Processing sequences of length {length}")
+            with tqdm(train_loader, desc=f"Length {length} [Training]", unit="batch") as t:
+                for batch in t:
+                    # train_batches += 1
+                    optimizer.zero_grad()
+                    # Move data to device
+                    input_ids, masked_tokens, masked_pos = [b.to(device) for b in batch]
+                    # Forward pass
+                    logits_lm, _, _ = model(input_ids, masked_pos)
+                    # Compute NMSE
+                    loss = torch.sum(nmse_loss(masked_tokens, logits_lm))
+                    loss.backward()
+                    optimizer.step()
+                    scheduler.step()
+                    train_nmse += loss.item()
+                    train_samples += input_ids.shape[0]
+                    # Update progress bar
+                    t.set_postfix({"nmse": train_nmse/train_samples, "lr": scheduler.get_last_lr()[0]})
+        # Average NMSE across training batches
+        train_nmse /= max(train_samples, 1)
+        train_nmse_losses.append(train_nmse)
+        if epoch % 2 == 0:
+            # Validation loop across all buckets
+            model.eval()
+            val_nmse = 0.0
+            val_samples = 0
+            with torch.no_grad():
+                print(f"\nEpoch {epoch + 1}/{epochs} [Validation]")
+                for length, val_loader in val_loaders.items():
+                    print(f"Processing sequences of length {length}")
+                    with tqdm(val_loader, desc=f"Length {length} [Validation]", unit="batch") as t:
+                        for batch in t:
+                            # val_batches += 1
+                            # Move data to device
+                            input_ids, masked_tokens, masked_pos = [b.to(device) for b in batch]
+                            # Forward pass
+                            logits_lm, _, _ = model(input_ids, masked_pos)
+                            # Compute NMSE
+                            loss = torch.sum(nmse_loss(masked_tokens, logits_lm))
+                            val_nmse += loss.item()
+                            val_samples += input_ids.shape[0]
+                            # Update progress bar
+                            t.set_postfix({"nmse": val_nmse/val_samples})
+            # Average NMSE across validation batches
+            val_nmse /= max(val_samples, 1)
+            val_nmse_losses.append(val_nmse)
+            # Save model if validation NMSE improves
+            is_best_model = False
+            if val_nmse < best_val_nmse:
+                best_val_nmse = val_nmse
+                model_path = os.path.join(save_dir, f"lwm_epoch{epoch+1}_train{train_nmse:.4f}_val{val_nmse:.4f}.pth")
+                torch.save(model.state_dict(), model_path)
+                print(f"Model saved: {model_path}")
+                is_best_model = True
+        # Log the results
+        print(f"  Train NMSE: {train_nmse:.4f}")
+        print(f"  Validation NMSE: {val_nmse:.4f}")
+        print(f"  Learning Rate: {scheduler.get_last_lr()[0]:.6e}")
+        # Append to CSV log
+        with open(log_file, mode='a', newline='') as file:
+            writer = csv.writer(file)
+            writer.writerow([epoch + 1, train_nmse, val_nmse, scheduler.get_last_lr()[0], is_best_model])
+        # Plot losses after each epoch
+        plt.figure(figsize=(10, 6))
+        plt.plot(range(1, len(train_nmse_losses) + 1), train_nmse_losses, label="Train NMSE")
+        plt.plot(range(1, len(val_nmse_losses) + 1), val_nmse_losses, label="Validation NMSE")
+        plt.xlabel("Epochs")
+        plt.ylabel("NMSE")
+        plt.title("Training and Validation NMSE Loss")
+        plt.legend()
+        plt.grid(True)
+        plt.show()
+    print("Training and validation complete.")
+    return model
+#%% FINE-TUNE
+from torch.cuda.amp import GradScaler, autocast
+# Define the ClassificationHead
+class ClassificationHead(nn.Module):
+    def __init__(self, input_dim, num_classes):
+        super().__init__()
+        self.fc = nn.Linear(input_dim, num_classes)
+    def forward(self, x):
+        return self.fc(x)
+# Define the RegressionHead
+class RegressionHead(nn.Module):
+    def __init__(self, input_dim):
+        super().__init__()
+        self.fc = nn.Linear(input_dim, 1)
+    def forward(self, x):
+        return self.fc(x)
+class CustomClassificationHead(nn.Module):
+    def __init__(self, input_dim, num_classes):
+        super().__init__()
+        self.classifier = nn.Sequential(
+            nn.Linear(input_dim, 512),
+            nn.BatchNorm1d(512),
+            nn.ReLU(),
+            nn.Dropout(0.1),
+            nn.Linear(512, 256),
+            nn.BatchNorm1d(256),
+            nn.ReLU(),
+            nn.Dropout(0.1),
+            nn.Linear(256, 128),
+            nn.BatchNorm1d(128),
+            nn.ReLU(),
+            # nn.Dropout(0.1),
+            nn.Linear(128, num_classes)
+        )
+    def forward(self, x):
+        return self.classifier(x)
+class CustomRegressionHead(nn.Module):
+    def __init__(self, input_dim, output_dim):
+        super().__init__()
+        self.regressor = nn.Sequential(
+            nn.Linear(input_dim, 512),
+            nn.BatchNorm1d(512),
+            nn.ReLU(),
+            nn.Dropout(0.1),
+            nn.Linear(512, 256),
+            nn.BatchNorm1d(256),
+            nn.ReLU(),
+            nn.Dropout(0.1),
+            nn.Linear(256, output_dim)
+        )
+    def forward(self, x):
+        return self.regressor(x)
+def custom_heads(input_dim, num_classes=None, output_dim=None, task_type="classification"):
+    """
+    Creates a custom head for classification or regression tasks.
+    Users should modify the class implementations for further customization.
+    Args:
+        input_dim (int): Input dimension of the head.
+        num_classes (int): Number of classes for classification tasks. Ignored for regression.
+        task_type (str): "classification" or "regression".
+    Returns:
+        nn.Module: Custom head for the specified task.
+    """
+    if task_type == "classification":
+        if num_classes is None:
+            raise ValueError("num_classes must be specified for classification tasks.")
+        return CustomClassificationHead(input_dim=input_dim, num_classes=num_classes)
+    elif task_type == "regression":
+        return CustomRegressionHead(input_dim=input_dim, output_dim=output_dim)
+    else:
+        raise ValueError("Invalid task_type. Choose 'classification' or 'regression'.")
+#%%
+# Fine-tuning wrapper for the base model
+class FineTuningWrapper(nn.Module):
+    def __init__(self, model, task_head, fine_tune_layers="full"):
+        super().__init__()
+        self.model = model
+        self.task_head = task_head
+        # Freeze all layers initially
+        for param in self.model.parameters():
+            param.requires_grad = False
+        # Handle fine-tuning layers
+        if fine_tune_layers is not None:
+            if fine_tune_layers == "full":
+                # Unfreeze all layers if "all" is specified
+                for param in self.model.parameters():
+                    param.requires_grad = True
+            else:
+                # Get a list of all available layer names in the model
+                available_layers = [name for name, _ in self.model.named_parameters()]
+                # Validate that specified layers exist in the model
+                for layer in fine_tune_layers:
+                    if not any(layer in lname for lname in available_layers):
+                        raise ValueError(
+                            f"Layer '{layer}' not found in the model. "
+                            f"Available layers: {available_layers}"
+                        )
+                # Unfreeze only the specified layers
+                for name, param in self.model.named_parameters():
+                    if any(layer in name for layer in fine_tune_layers):
+                        param.requires_grad = True
+    def forward(self, x, input_type="cls_emb"):
+        if input_type == "raw":
+            task_input = x.view(x.size(0), -1)
+        else:
+            embeddings, attn_maps = self.model(x)  # Get embeddings from the base model
+            if input_type == "cls_emb":
+                task_input = embeddings[:, 0, :]  # CLS token
+            elif input_type == "channel_emb":
+                chs_emb = embeddings[:, 1:, :]
+                task_input = chs_emb.view(chs_emb.size(0), -1) # embeddings.mean(dim=1)  # Mean pooling over channel embeddings
+        return self.task_head(task_input), 0 if input_type=="raw" else attn_maps
+#%%
+# Fine-tuning function
+from sklearn.metrics import f1_score
+def finetune(
+    base_model,
+    train_loader,
+    val_loader=None,
+    task_type="classification",
+    input_type="cls_emb",
+    num_classes=None,
+    output_dim=None,
+    use_custom_head=False,
+    fine_tune_layers=None,
+    optimizer_config=None,
+    criterion=None,
+    epochs=10,
+    device="cuda",
+    task="Beam Prediction"
+):
+    """
+    Configures and fine-tunes the base model with user-defined settings, saving results and models.
+    """
+    # Create results folder
+    time_now = f"{time.time():.0f}"
+    results_folder = f"results/{task}/{time_now}"
+    os.makedirs(results_folder, exist_ok=True)
+    log_file = os.path.join(results_folder, "training_log.csv")
+    # Initialize the CSV log
+    with open(log_file, mode='w', newline='') as file:
+        writer = csv.writer(file)
+        writer.writerow(["Task", "Input", "Epoch", "Train Loss", "Validation Loss", "F1-Score (Classification)", "Learning Rate", "Time"])
+    for batch in val_loader:
+        input_data, targets = batch[0].to(device), batch[1].to(device)
+        break
+    if input_type == "cls_emb":
+        n_patches = 1
+        patch_size = 128
+    elif input_type == "channel_emb":
+        n_patches = input_data.shape[1]-1
+        patch_size = 128
+    elif input_type == "raw":
+        n_patches = input_data.shape[1]
+        patch_size = 32
+        # patch_size = 1
+    if use_custom_head:
+        custom_head = custom_heads(input_dim=n_patches*patch_size,
+                                   num_classes=num_classes,
+                                   output_dim=output_dim,
+                                   task_type=task_type)
+    # Handle DataParallel models
+    if isinstance(base_model, nn.DataParallel):
+        base_model = base_model.module
+    # Set up the task-specific head
+    if use_custom_head:
+        task_head = custom_head
+    elif task_type == "classification":
+        if num_classes is None:
+            raise ValueError("num_classes must be specified for classification tasks.")
+        task_head = ClassificationHead(input_dim=n_patches*patch_size, num_classes=num_classes) # input_dim=base_model.embedding.d_model
+    elif task_type == "regression":
+        task_head = RegressionHead(input_dim=n_patches*patch_size) # input_dim=base_model.embedding.d_model
+    else:
+        raise ValueError("Invalid task_type. Choose 'classification' or 'regression'.")
+    # Wrap the model with the fine-tuning head
+    wrapper = FineTuningWrapper(base_model, task_head, fine_tune_layers=fine_tune_layers)
+    wrapper = wrapper.to(device)
+    print(f'Number of head parameters: {count_parameters(wrapper)}')
+   # Set default optimizer config if not provided
+    if optimizer_config is None:
+        optimizer_config = {"lr": 1e-4}
+    # Set up the optimizer
+    optimizer = torch.optim.Adam(wrapper.parameters(), **optimizer_config)
+    # Set up the scheduler for learning rate decay
+    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.2)  # Example: Reduce LR by 10x every 10 epochs
+    # Set up the loss criterion
+    if criterion is None:
+        criterion = nn.CrossEntropyLoss() if task_type == "classification" else nn.MSELoss()
+    scaler = GradScaler()
+    train_losses, val_losses, f1_scores = [], [], []
+    best_val_loss = float("inf")
+    best_model_path = None
+    for epoch in range(epochs):
+        # Training loop
+        wrapper.train()
+        epoch_loss = 0.0
+        with tqdm(train_loader, desc=f"Epoch {epoch + 1}/{epochs}") as progress_bar:
+            for batch in progress_bar:
+                input_data, targets = batch[0].to(device), batch[1].to(device)
+                optimizer.zero_grad()
+                with autocast():
+                    outputs, attn_maps = wrapper(input_data, input_type=input_type)
+                    loss = criterion(outputs, targets)
+                scaler.scale(loss).backward()
+                scaler.step(optimizer)
+                scaler.update()
+                epoch_loss += loss.item()
+                progress_bar.set_postfix({"Loss": loss.item()})
+        avg_train_loss = epoch_loss / len(train_loader)
+        train_losses.append(avg_train_loss)
+        # Validation loop
+        if val_loader:
+            wrapper.eval()
+            val_loss = 0.0
+            all_preds, all_targets = [], []
+            with torch.no_grad():
+                for batch in val_loader:
+                    input_data, targets = batch[0].to(device), batch[1].to(device)
+                    with autocast():
+                        outputs, _ = wrapper(input_data, input_type=input_type)
+                        loss = criterion(outputs, targets)
+                    val_loss += loss.item()
+                    if task_type == "classification":
+                        preds = torch.argmax(outputs, dim=1).cpu().numpy()
+                        all_preds.extend(preds)
+                        all_targets.extend(targets.cpu().numpy())
+            avg_val_loss = val_loss / len(val_loader)
+            val_losses.append(avg_val_loss)
+            time_now = f"{time.time():.0f}"
+            # Save the best model
+            if avg_val_loss < best_val_loss:
+                best_val_loss = avg_val_loss
+                best_model_path = os.path.join(results_folder, f"{input_type}_epoch{epoch+1}_valLoss{avg_val_loss:.4f}_{time_now}.pth")
+                torch.save(wrapper.state_dict(), best_model_path)
+                print(f"Model saved at {best_model_path} with validation loss: {best_val_loss:.4f}")
+            # Compute F1-score for classification tasks
+            f1 = None
+            if task_type == "classification":
+                f1 = f1_score(all_targets, all_preds, average="macro")
+                print(f"Epoch {epoch + 1}, Validation F1-Score: {f1:.4f}")
+                f1_scores.append(f1)
+        scheduler.step()
+        # Log results
+        with open(log_file, mode='a', newline='') as file:
+            writer = csv.writer(file)
+            writer.writerow([task, input_type, epoch + 1, avg_train_loss, avg_val_loss, f1 if f1 is not None else "-", scheduler.get_last_lr()[0], f"{time_now}"])
+    # Plot training and validation losses
+    plt.figure(figsize=(10, 6))
+    plt.plot(range(1, epochs + 1), train_losses, label="Training Loss")
+    plt.plot(range(1, epochs + 1), val_losses, label="Validation Loss", linestyle="--")
+    plt.xlabel("Epochs")
+    plt.ylabel("Loss")
+    plt.title("Training and Validation Loss")
+    plt.legend()
+    plt.grid(True)
+    # plt.savefig(os.path.join(results_folder, "loss_curve.png"))
+    plt.show()
     return wrapper, best_model_path, train_losses, val_losses, f1_scores if task_type == "classification" else 0, attn_maps