Upload the pre-trained model and pre-training, inference, downstream, and utility scripts

Browse files

Files changed (9) hide show

.gitignore +2 -0
downstream.py +146 -0
inference.py +52 -0
input_preprocess.py +1020 -0
lwm_model.py +154 -0
main.py +120 -0
models/model.pth +3 -0
train.py +446 -0
utils.py +247 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ __pycache__*
2	+ /images

downstream.py ADDED Viewed

	@@ -0,0 +1,146 @@

+# -*- coding: utf-8 -*-
+"""
+Created on Fri Jan 10 11:11:58 2025
+This script evaluates downstream task performance by comparing models trained
+on raw channel representations versus those trained on LWM embeddings.
+@author: Sadjad Alikhani
+"""
+#%% IMPORT PACKAGES & MODULES
+from input_preprocess import tokenizer, scenarios_list
+from inference import lwm_inference
+from utils import prepare_loaders
+from train import finetune
+import lwm_model
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+import torch.nn as nn
+import warnings
+warnings.filterwarnings("ignore", category=UserWarning)
+#%% DOWNSTERAM DATA GENERATION
+n_beams = 16
+task = ['Beam Prediction', 'LoS/NLoS Classification'][1]
+task_type = ["classification", "regression"][0]
+visualization_method = ["pca", "umap", "tsne"][2]
+input_types = ["cls_emb", "channel_emb", "raw"]
+train_ratios = [.001, .01, .05, .1, .25, .5, .8]
+fine_tuning_status = [None, ["layers.8", "layers.9", "layers.10", "layers.11"], "full"]
+selected_scenario_names = [scenarios_list()[18]]
+preprocessed_data, labels, raw_chs = tokenizer(
+    selected_scenario_names,
+    bs_idxs=[3],
+    load_data=False,
+    task=task,
+    n_beams=n_beams)
+#%% LOAD THE MODEL
+gpu_ids = [0]
+device = torch.device("cuda:0")
+model = lwm_model.lwm().to(device)
+model_name = "lwm_epoch50_train0.0077_val0.0060_masking0.40.pth"
+state_dict = torch.load(f"models/{model_name}", map_location=device)
+new_state_dict = {k.replace("module.", ""): v for k, v in state_dict.items()}
+model.load_state_dict(new_state_dict)
+model = nn.DataParallel(model, gpu_ids)
+print(f"Model loaded successfully on GPU {device.index}")
+#%% 2D EMBEDDING SPACE VISUALIZATIONN BEFORE FINE-TUNING
+chs = lwm_inference(
+    model,
+    preprocessed_data,
+    input_type="cls_emb",
+    device=device,
+    batch_size=64,
+    visualization=False,
+    labels=labels,
+    visualization_method=visualization_method)
+#%% FINE-TUNE
+results = np.zeros((len(fine_tuning_status), len(input_types), len(train_ratios)))
+for fine_tuning_stat_idx, fine_tuning_stat in enumerate(fine_tuning_status):
+    for input_type_idx, input_type in enumerate(input_types):
+        if input_type == "raw" and fine_tuning_stat is not None:
+            continue
+        selected_patches_idxs = None
+        for train_ratio_idx, train_ratio in enumerate(train_ratios):
+            print(f"\nfine-tuning status: {fine_tuning_stat}")
+            print(f"input type: {input_type}")
+            print(f"train ratio: {train_ratio}\n")
+            # PREPARE LOADERS
+            train_loader, val_loader, samples, target = prepare_loaders(
+                preprocessed_data=preprocessed_data,
+                labels=labels,
+                selected_patches_idxs=selected_patches_idxs,
+                input_type=input_type,
+                task_type=task_type,
+                train_ratio=train_ratio,
+                batch_size=128,
+                seed=42
+            )
+            # FINE-TUNE LWM
+            fine_tuned_model, best_model_path, train_losses, val_losses, f1_scores, attn_maps_ft = finetune(
+                base_model=model,
+                train_loader=train_loader,
+                val_loader=val_loader,
+                task_type=task_type,
+                input_type=input_type,
+                num_classes=n_beams if task=='Beam Prediction' else 2 if task=='LoS/NLoS Classification' else None,
+                output_dim=target.shape[-1] if task_type =='regression' else None,
+                use_custom_head=True,
+                fine_tune_layers=fine_tuning_stat,
+                optimizer_config={"lr": 1e-3},
+                epochs=15,
+                device=device,
+                task=task
+            )
+            results[fine_tuning_stat_idx][input_type_idx][train_ratio_idx] = f1_scores[-1]
+markers = ['o', 's', 'D']
+labels = ['CLS Emb', 'CHS Emb', 'Raw']
+fine_tuning_status_labels = ['No FT', 'Partial FT', 'Full FT']
+line_styles = ['-', '--', ':']
+colors = plt.cm.viridis(np.linspace(0, 0.8, len(labels)))
+plt.figure(figsize=(12, 8), dpi=500)
+for ft_idx, (ft_status_label, line_style) in enumerate(zip(fine_tuning_status_labels, line_styles)):
+    for idx, (marker, label, color) in enumerate(zip(markers, labels, colors)):
+        # For "Raw Channels," only plot "No Fine-Tuning" case
+        if label == "Raw" and ft_status_label != "No FT":
+            continue
+        # Simplify label for "Raw Channels" without fine-tuning
+        plot_label = label if label != "Raw Channels" or ft_status_label != "No Fine-Tuning" else "Raw Channels"
+        plt.plot(
+            train_ratios,
+            results[ft_idx, idx],
+            marker=marker,
+            linestyle=line_style,
+            label=f"{plot_label} ({ft_status_label})" if label != "Raw Channels" else plot_label,
+            color=color,
+            linewidth=3,
+            markersize=9
+        )
+plt.xscale('log')
+plt.xlabel("Train Ratio", fontsize=20)
+plt.ylabel("F1-Score", fontsize=20)
+plt.legend(fontsize=17, loc="best")
+plt.grid(True, linestyle="--", alpha=0.7)
+plt.xticks(fontsize=17)
+plt.yticks(fontsize=17)
+plt.tight_layout()
+plt.show()
+#%% 2D EMBEDDING SPACE VISUALIZATIONN AFTER FINE-TUNING
+chs = lwm_inference(
+    fine_tuned_model.model,
+    preprocessed_data,
+    input_type="cls_emb",
+    device=device,
+    batch_size=64,
+    visualization=False,
+    labels=labels,
+    visualization_method=visualization_method)

inference.py ADDED Viewed

	@@ -0,0 +1,52 @@

+# -*- coding: utf-8 -*-
+"""
+Created on Sun Sep 15 18:27:17 2024
+This scripts performs the LWM inference on raw channel representations.
+@author: Sadjad Alikhani
+"""
+import torch
+from torch.utils.data import DataLoader, TensorDataset
+from utils import visualize_embeddings
+from tqdm import tqdm
+import warnings
+warnings.filterwarnings('ignore')
+#%%
+def lwm_inference(model, data, input_type="cls_emb", device="cpu", batch_size=64, visualization=False, labels=None, visualization_method="t-sne"):
+    if input_type == "raw":
+        output_total = data
+    else:
+        dataset = TensorDataset(data)
+        dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
+        embeddings = []
+        model.eval()
+        with torch.no_grad():
+            with tqdm(dataloader, desc="Inference", unit="batch") as t:
+                for batch in t:
+                    input_ids = batch[0].to(device)
+                    output = model(input_ids)[0]
+                    if input_type == "cls_emb":
+                        batch_embeddings = output[:, 0, :]
+                        embeddings.append(batch_embeddings)
+                    elif input_type == "channel_emb":
+                        batch_embeddings = output[:, 1:, :]
+                        embeddings.append(batch_embeddings)
+        output_total = torch.cat(embeddings, dim=0).float()
+        if visualization:
+            visualize_embeddings(output_total.view(output_total.size(0), -1),
+                                 labels,
+                                 method=visualization_method,
+                                 label="Embedding Space")
+            visualize_embeddings(data.view(data.size(0), -1),
+                                 labels,
+                                 method=visualization_method,
+                                 label="Original Space")
+    return output_total

input_preprocess.py ADDED Viewed

	@@ -0,0 +1,1020 @@

+# -*- coding: utf-8 -*-
+"""
+Created on Fri Sep 13 16:13:29 2024
+This script generates preprocessed data from wireless communication scenarios,
+including channel generation, patch generation, masking, and preparing raw
+channels for the Transformer-based LWM model.
+@author: Sadjad Alikhani
+"""
+import numpy as np
+import os
+from tqdm import tqdm
+import time
+import pickle
+import DeepMIMOv3
+import torch
+from collections import defaultdict
+from utils import generate_gaussian_noise, plot_coverage
+#%% Scenarios List
+def scenarios_list():
+    scen_list = np.array([
+        'city_0_newyork',
+        'city_1_losangeles',
+        'city_2_chicago',
+        'city_3_houston',
+        'city_4_phoenix',
+        'city_5_philadelphia',
+        'city_6_miami',
+        'city_7_sandiego',
+        'city_8_dallas',
+        'city_9_sanfrancisco',
+        'city_10_austin',
+        'city_11_santaclara',
+        'city_12_fortworth',
+        'city_13_columbus',
+        'city_14_charlotte',
+        'city_15_indianapolis',
+        'city_16_sanfrancisco',
+        'city_17_seattle',
+        'city_18_denver',
+        'city_19_oklahoma',
+        'asu_campus1_v1',
+        'asu_campus1_v2',
+        'asu_campus1_v3',
+        'asu_campus1_v4',
+        'asu_campus1_v5',
+        'asu_campus1_v6',
+        'asu_campus1_v7',
+        'asu_campus1_v8',
+        'asu_campus1_v9',
+        'asu_campus1_v10',
+        'asu_campus1_v11',
+        'asu_campus1_v12',
+        'asu_campus1_v13',
+        'asu_campus1_v14',
+        'asu_campus1_v15',
+        'asu_campus1_v16',
+        'asu_campus1_v17',
+        'asu_campus1_v18',
+        'asu_campus1_v19',
+        'asu_campus1_v20',
+        'Boston5G_3p5_v1',
+        'Boston5G_3p5_v2',
+        'Boston5G_3p5_v3',
+        'Boston5G_3p5_v4',
+        'Boston5G_3p5_v5',
+        'Boston5G_3p5_v6',
+        'Boston5G_3p5_v7',
+        'Boston5G_3p5_v8',
+        'Boston5G_3p5_v9',
+        'Boston5G_3p5_v10',
+        'Boston5G_3p5_v11',
+        'Boston5G_3p5_v12',
+        'Boston5G_3p5_v13',
+        'Boston5G_3p5_v14',
+        'Boston5G_3p5_v15',
+        'Boston5G_3p5_v16',
+        'Boston5G_3p5_v17',
+        'Boston5G_3p5_v18',
+        'Boston5G_3p5_v19',
+        'Boston5G_3p5_v20',
+        'O1_3p5_v1',
+        'O1_3p5_v2',
+        'O1_3p5_v3',
+        'O1_3p5_v4',
+        'O1_3p5_v5',
+        'O1_3p5_v6',
+        'O1_3p5_v7',
+        'O1_3p5_v8',
+        'O1_3p5_v9',
+        'O1_3p5_v10',
+        'O1_3p5_v11',
+        'O1_3p5_v12',
+        'O1_3p5_v13',
+        'O1_3p5_v14',
+        'O1_3p5_v15',
+        'O1_3p5_v16',
+        'O1_3p5_v17',
+        'O1_3p5_v18',
+        'O1_3p5_v19',
+        'O1_3p5_v20',
+        'asu_campus1',
+        'O1_3p5',
+        'Boston5G_3p5',
+        'city_0_newyork_v16x64',
+        'city_1_losangeles_v16x64',
+        'city_2_chicago_v16x64',
+        'city_3_houston_v16x64',
+        'city_4_phoenix_v16x64',
+        'city_5_philadelphia_v16x64',
+        'city_6_miami_v16x64',
+        'city_7_sandiego_v16x64',
+        'city_8_dallas_v16x64',
+        'city_9_sanfrancisco_v16x64'
+        ])
+    return scen_list
+#%% Token Generation
+def patch_gen(N_ROWS=4, N_COLUMNS=4, selected_scenario_names=None,
+              manual_data=None, bs_idxs=[1,2,3], load_data=False,
+              save_dir="data", task="LoS/NLoS Classification",
+              n_beams=64, o1_bs_idx=[4]):
+    os.makedirs(save_dir, exist_ok=True)
+    if manual_data is not None:
+        patches = patch_maker(np.expand_dims(np.array(manual_data), axis=1))
+    else:
+        deepmimo_data = []
+        for scenario_name in selected_scenario_names:
+            if "O1" in scenario_name: # make an exception for bs idxs of the o1 scenario
+                if o1_bs_idx is None:
+                    bs_idxs = [4, 15]
+                else:
+                    bs_idxs = o1_bs_idx
+            for bs_idx in bs_idxs:
+                if has_version_suffix(scenario_name) and bs_idx in [2,3]:
+                    continue
+                if not load_data:
+                    print(f"\nGenerating data for scenario: {scenario_name}, BS #{bs_idx}")
+                    data, n_ant_bs, n_subcarriers = DeepMIMO_data_gen(scenario_name, bs_idx)
+                    file_name = f"{save_dir}/{scenario_name}_ant{n_ant_bs}_sub{n_subcarriers}_bs{bs_idx}.npy"
+                    np.save(file_name, data)
+                    print(f"Data saved to {file_name}")
+                    deepmimo_data.append(data)
+                else:
+                    n_ant_bs, n_subcarriers = parametersv2(scenario_name, bs_idx)
+                    print(f"\nLoading data for scenario: {scenario_name}, BS #{bs_idx}")
+                    file_name = f"{save_dir}/{scenario_name}_ant{n_ant_bs}_sub{n_subcarriers}_bs{bs_idx}.npy"
+                    data = np.load(file_name, allow_pickle=True).item()
+                    print(f"Data loaded from {file_name}")
+                    deepmimo_data.append(data)
+        cleaned_deepmimo_data = [deepmimo_data_cleaning(deepmimo_data[scenario_idx]) for scenario_idx in range(len(deepmimo_data))] #n_scenarios*n_bs_idxs
+        patches = [patch_maker(cleaned_deepmimo_data[scenario_idx], N_ROWS, N_COLUMNS) for scenario_idx in range(len(deepmimo_data))]
+        raw_chs = torch.tensor(cleaned_deepmimo_data[0]).squeeze(1)
+        raw_chs = raw_chs.view(raw_chs.size(0), -1)
+        raw_chs = torch.hstack((raw_chs.real, raw_chs.imag))
+        if task:
+            labels = [label_gen(task, deepmimo_data[scenario_idx], selected_scenario_names[scenario_idx], n_beams=n_beams) for scenario_idx in range(len(deepmimo_data))]
+            return patches, torch.tensor(labels[0]), raw_chs.view(raw_chs.size(0), -1)
+        else:
+            return patches, raw_chs.view(raw_chs.size(0), -1)
+#%%
+def tokenizer(selected_scenario_names,
+              bs_idxs=[1,2,3],
+              load_data=False,
+              task="LoS/NLoS Classification",
+              n_beams=64,
+              MAX_LEN=513,
+              masking_percent=.40,
+              mask=False,
+              seed=42,
+              snr=None):
+    patches, labels, raw_chs = patch_gen(
+        selected_scenario_names=selected_scenario_names,
+        bs_idxs=bs_idxs,
+        load_data=load_data,
+        task=task,
+        n_beams=n_beams
+    )
+    patches = [patch for patch_list in patches for patch in patch_list]
+    print("Total number of samples:", len(patches))
+    grouped_data = defaultdict(list)  # Group samples by sequence length
+    grouped_data_2 = []
+    for user_idx in tqdm(range(len(patches)), desc="Processing items"):
+        patch_size = patches[user_idx].shape[1]
+        n_patches = patches[user_idx].shape[0]
+        n_masks_half = int(masking_percent * n_patches)
+        word2id = {
+            '[CLS]': 0.2 * np.ones((patch_size)),
+            '[MASK]': 0.1 * np.ones((patch_size))
+        }
+        sample = make_sample(
+            user_idx, patches, word2id, n_patches, n_masks_half, patch_size, MAX_LEN, mask=mask, seed=seed
+        )
+        if mask:
+            seq_length = len(sample[0])
+            grouped_data[seq_length].append(sample)
+        else:
+            grouped_data_2.append(sample)
+    if mask:
+        # Normalize keys to 0, 1, 2, ...
+        normalized_grouped_data = {i: grouped_data[key] for i, key in enumerate(sorted(grouped_data.keys()))}
+    else:
+        normalized_grouped_data = torch.stack(grouped_data_2, dim=0)
+        # normalized_grouped_data = grouped_data_2
+        if snr is not None:
+            normalized_grouped_data += generate_gaussian_noise(normalized_grouped_data, snr)
+    # normalized_grouped_data = {i: grouped_data[key] for i, key in enumerate(sorted(grouped_data.keys()))}
+    return normalized_grouped_data, labels, raw_chs
+#%% REMOVE ZERO CHANNELS AND SCALE
+def deepmimo_data_cleaning(deepmimo_data):
+    idxs = np.where(deepmimo_data['user']['LoS'] != -1)[0]
+    cleaned_deepmimo_data = deepmimo_data['user']['channel'][idxs]
+    return np.array(cleaned_deepmimo_data) * 1e6
+#%%
+def make_sample(user_idx, patch, word2id, n_patches, n_masks, patch_size, MAX_LEN, mask=True, seed=None):
+    if seed is not None:
+        np.random.seed(seed)
+    # Step 1: Retrieve tokens and prepend [CLS]
+    tokens = patch[user_idx]
+    input_ids = np.vstack((word2id['[CLS]'], tokens))
+    # Step 2: Mask real and imaginary patches
+    tokens_size = int(n_patches)  # int(n_patches / 2)
+    masked_pos = np.random.choice(range(1, tokens_size), size=n_masks, replace=False)
+    masked_tokens = []
+    for pos in masked_pos:
+        original_masked_tokens = input_ids[pos].copy()
+        masked_tokens.append(original_masked_tokens)
+        if mask:
+            rnd_num = np.random.rand()
+            if rnd_num < 0.1:
+                input_ids[pos] = np.random.rand(patch_size)  # Replace with random values
+            elif rnd_num < 0.9:
+                input_ids[pos] = word2id['[MASK]']  # Replace with [MASK]
+    if not mask:
+        return torch.tensor(input_ids)
+    else:
+        return [input_ids, masked_tokens, masked_pos]
+#%% Patch GENERATION
+def patch_maker(original_ch, patch_rows, patch_cols):
+    # Step 1: Remove the singleton channel dimension
+    n_samples, _, n_rows, n_cols = original_ch.shape  # Unpack shape
+    original_ch = original_ch[:, 0]  # Remove the singleton dimension
+    # Step 2: Split into real and imaginary parts and interleave them
+    flat_real = original_ch.real
+    flat_imag = original_ch.imag
+    # Interleave real and imaginary parts along the last axis
+    interleaved = np.empty((n_samples, n_rows, n_cols * 2), dtype=np.float32)
+    interleaved[:, :, 0::2] = flat_real
+    interleaved[:, :, 1::2] = flat_imag
+    # Step 3: Compute the number of patches along rows and columns
+    n_patches_rows = int(np.ceil(n_rows / patch_rows))
+    n_patches_cols = int(np.ceil(n_cols / patch_cols))
+    # Step 4: Pad the matrix if necessary to make it divisible by patch size
+    padded_rows = n_patches_rows * patch_rows - n_rows
+    padded_cols = n_patches_cols * patch_cols - n_cols
+    if padded_rows > 0 or padded_cols > 0:
+        interleaved = np.pad(
+            interleaved,
+            ((0, 0), (0, padded_rows), (0, padded_cols * 2)),  # Double padding for interleaved axis
+            mode='constant',
+            constant_values=0,
+        )
+    # Step 5: Create patches by dividing into blocks
+    n_samples, padded_rows, padded_cols = interleaved.shape
+    padded_cols //= 2  # Adjust for interleaving (real and imaginary parts count as one)
+    patches = []
+    for i in range(0, padded_rows, patch_rows):
+        for j in range(0, padded_cols, patch_cols):
+            patch = interleaved[:, i:i + patch_rows, j * 2:(j + patch_cols) * 2]
+            patches.append(patch.reshape(n_samples, -1))  # Flatten each patch
+    # Step 6: Stack patches to form the final array
+    patches = np.stack(patches, axis=1)  # Shape: (num_samples, n_patches, patch_rows * patch_cols * 2)
+    return patches
+#%% Data Generation for Scenario Areas
+def DeepMIMO_data_gen(scenario, bs_idx):
+    import DeepMIMOv3
+    parameters, row_column_users = get_parameters(scenario, bs_idx)
+    deepMIMO_dataset = DeepMIMOv3.generate_data(parameters)
+    if "O1" in scenario:
+        hops = [2, 2]
+    else:
+        hops = [1, 1]
+    uniform_idxs = uniform_sampling(deepMIMO_dataset, hops, len(parameters['user_rows']),
+                                    users_per_row=row_column_users[scenario]['n_per_row'])
+    data = select_by_idx(deepMIMO_dataset, uniform_idxs)[0]
+    n_ant_bs = parameters['bs_antenna']['shape'][0]
+    n_subcarriers = parameters['OFDM']['subcarriers']
+    return data, n_ant_bs, n_subcarriers
+#%%
+def parametersv2(scenario, bs_idx):
+    parameters, _ = get_parameters(scenario, bs_idx)
+    n_ant_bs = parameters['bs_antenna']['shape'][0]
+    n_subcarriers = parameters['OFDM']['subcarriers']
+    return n_ant_bs, n_subcarriers
+#%%%
+def get_parameters(scenario, bs_idx=1):
+    n_ant_ue = 1
+    scs = 30e3
+    row_column_users = scenario_prop()
+    parameters = DeepMIMOv3.default_params()
+    parameters['dataset_folder'] = './scenarios'
+    parameters['scenario'] = scenario.split("_v")[0]
+    n_ant_bs = row_column_users[scenario]['n_ant_bs']
+    n_subcarriers = row_column_users[scenario]['n_subcarriers']
+    parameters['active_BS'] = np.array([bs_idx])
+    if isinstance(row_column_users[scenario]['n_rows'], int):
+        parameters['user_rows'] = np.arange(row_column_users[scenario]['n_rows'])
+    else:
+        parameters['user_rows'] = np.arange(row_column_users[scenario]['n_rows'][0],
+                                            row_column_users[scenario]['n_rows'][1])
+    parameters['bs_antenna']['shape'] = np.array([n_ant_bs, 1]) # Horizontal, Vertical
+    parameters['bs_antenna']['rotation'] = np.array([0,0,-135]) # (x,y,z)
+    parameters['ue_antenna']['shape'] = np.array([n_ant_ue, 1])
+    parameters['enable_BS2BS'] = False
+    parameters['OFDM']['subcarriers'] = n_subcarriers
+    parameters['OFDM']['selected_subcarriers'] = np.arange(n_subcarriers)
+    parameters['OFDM']['bandwidth'] = scs * n_subcarriers / 1e9
+    parameters['num_paths'] = 20
+    return parameters, row_column_users
+#%% Sampling and Data Selection
+def uniform_sampling(dataset, sampling_div, n_rows, users_per_row):
+    cols = np.arange(users_per_row, step=sampling_div[0])
+    rows = np.arange(n_rows, step=sampling_div[1])
+    uniform_idxs = np.array([j + i * users_per_row for i in rows for j in cols])
+    return uniform_idxs
+def select_by_idx(dataset, idxs):
+    dataset_t = []  # Trimmed dataset
+    for bs_idx in range(len(dataset)):
+        dataset_t.append({})
+        for key in dataset[bs_idx].keys():
+            dataset_t[bs_idx]['location'] = dataset[bs_idx]['location']
+            dataset_t[bs_idx]['user'] = {k: dataset[bs_idx]['user'][k][idxs] for k in dataset[bs_idx]['user']}
+    return dataset_t
+#%%
+def inverse_patch_maker(patches, original_shape, patch_rows, patch_cols):
+    """
+    Reconstructs the original channel matrix from patches.
+    Args:
+        patches (numpy array): Patches of shape (num_samples, n_patches, patch_rows * patch_cols * 2).
+        original_shape (tuple): Original shape of the channel matrix (num_samples, 1, n_rows, n_cols).
+        patch_rows (int): Number of rows in each patch.
+        patch_cols (int): Number of columns in each patch.
+    Returns:
+        numpy array: Reconstructed complex-valued channel matrix of shape (num_samples, 1, n_rows, n_cols).
+    """
+    n_samples, n_patches, patch_size = patches.shape
+    _, _, n_rows, n_cols = original_shape
+    # Ensure patch dimensions match
+    assert patch_rows * patch_cols * 2 == patch_size, "Patch size mismatch with provided dimensions."
+    # Compute the number of patches along rows and columns
+    n_patches_rows = int(np.ceil(n_rows / patch_rows))
+    n_patches_cols = int(np.ceil(n_cols / patch_cols))
+    # Reassemble interleaved array from patches
+    interleaved = np.zeros((n_samples, n_patches_rows * patch_rows, n_patches_cols * patch_cols * 2), dtype=np.float32)
+    patch_idx = 0
+    for i in range(n_patches_rows):
+        for j in range(n_patches_cols):
+            patch = patches[:, patch_idx, :].reshape(n_samples, patch_rows, patch_cols * 2)
+            interleaved[:, i * patch_rows:(i + 1) * patch_rows, j * patch_cols * 2:(j + 1) * patch_cols * 2] = patch
+            patch_idx += 1
+    # Remove padding if necessary
+    interleaved = interleaved[:, :n_rows, :n_cols * 2]
+    # Separate real and imaginary parts
+    flat_real = interleaved[:, :, 0::2]
+    flat_imag = interleaved[:, :, 1::2]
+    # Reconstruct the complex-valued original channel
+    reconstructed = flat_real + 1j * flat_imag
+    # Add the singleton channel dimension back
+    reconstructed = reconstructed[:, np.newaxis, :, :]  # Shape: (num_samples, 1, n_rows, n_cols)
+    return reconstructed
+#%%
+def label_gen(task, data, scenario, n_beams=64):
+    idxs = np.where(data['user']['LoS'] != -1)[0]
+    if task == 'LoS/NLoS Classification':
+        label = data['user']['LoS'][idxs]
+        losChs = np.where(data['user']['LoS'] == -1, np.nan, data['user']['LoS'])
+        plot_coverage(data['user']['location'], losChs, cbar_title='LoS status')
+    elif task == 'Beam Prediction':
+        parameters, row_column_users = get_parameters(scenario, bs_idx=1)
+        n_users = len(data['user']['channel'])
+        n_subbands = 1
+        fov = 180
+        # Setup Beamformers
+        beam_angles = np.around(np.arange(-fov/2, fov/2+.1, fov/(n_beams-1)), 2)
+        F1 = np.array([steering_vec(parameters['bs_antenna']['shape'],
+                                    phi=azi*np.pi/180,
+                                    kd=2*np.pi*parameters['bs_antenna']['spacing']).squeeze()
+                       for azi in beam_angles])
+        full_dbm = np.zeros((n_beams, n_subbands, n_users), dtype=float)
+        for ue_idx in tqdm(range(n_users), desc='Computing the channel for each user'):
+            if data['user']['LoS'][ue_idx] == -1:
+                full_dbm[:,:,ue_idx] = np.nan
+            else:
+                chs = F1 @ data['user']['channel'][ue_idx]
+                full_linear = np.abs(np.mean(chs.squeeze().reshape((n_beams, n_subbands, -1)), axis=-1))
+                full_dbm[:,:,ue_idx] = np.around(20*np.log10(full_linear) + 30, 1)
+        best_beams = np.argmax(np.mean(full_dbm,axis=1), axis=0)
+        best_beams = best_beams.astype(float)
+        best_beams[np.isnan(full_dbm[0,0,:])] = np.nan
+        # max_bf_pwr = np.max(np.mean(full_dbm,axis=1), axis=0)
+        plot_coverage(data['user']['location'], best_beams, tx_pos=data['location'],
+                      tx_ori=parameters['bs_antenna']['rotation']*np.pi/180,
+                      cbar_title='Best beam index')
+        label = best_beams[idxs]
+    return label.astype(int)
+#%%
+def steering_vec(array, phi=0, theta=0, kd=np.pi):
+    idxs = DeepMIMOv3.ant_indices(array)
+    resp = DeepMIMOv3.array_response(idxs, phi, theta+np.pi/2, kd)
+    return resp / np.linalg.norm(resp)
+#%%
+import re
+def has_version_suffix(s):
+    pattern = r"_v([1-9]|1[0-9]|20)$"
+    return bool(re.search(pattern, s))
+#%%
+def scenario_prop():
+    row_column_users = {
+    'city_0_newyork': {
+        'n_rows': 109,
+        'n_per_row': 291,
+        'n_ant_bs': 8,
+        'n_subcarriers': 32
+    },
+    'city_1_losangeles': {
+        'n_rows': 142,
+        'n_per_row': 201,
+        'n_ant_bs': 8,
+        'n_subcarriers': 64
+    },
+    'city_2_chicago': {
+        'n_rows': 139,
+        'n_per_row': 200,
+        'n_ant_bs': 8,
+        'n_subcarriers': 128
+    },
+    'city_3_houston': {
+        'n_rows': 154,
+        'n_per_row': 202,
+        'n_ant_bs': 8,
+        'n_subcarriers': 256
+    },
+    'city_4_phoenix': {
+        'n_rows': 198,
+        'n_per_row': 214,
+        'n_ant_bs': 8,
+        'n_subcarriers': 512
+    },
+    'city_5_philadelphia': {
+        'n_rows': 239,
+        'n_per_row': 164,
+        'n_ant_bs': 8,
+        'n_subcarriers': 1024
+    },
+    'city_6_miami': {
+        'n_rows': 199,
+        'n_per_row': 216 ,
+        'n_ant_bs': 16,
+        'n_subcarriers': 32
+    },
+    'city_7_sandiego': {
+        'n_rows': 207,
+        'n_per_row': 176,
+        'n_ant_bs': 16,
+        'n_subcarriers': 64
+    },
+    'city_8_dallas': {
+        'n_rows': 207,
+        'n_per_row': 190,
+        'n_ant_bs': 16,
+        'n_subcarriers': 128
+    },
+    'city_9_sanfrancisco': {
+        'n_rows': 196,
+        'n_per_row': 206,
+        'n_ant_bs': 16,
+        'n_subcarriers': 256
+    },
+    'city_10_austin': {
+        'n_rows': 255,
+        'n_per_row': 137,
+        'n_ant_bs': 16,
+        'n_subcarriers': 512
+    },
+    'city_11_santaclara': {
+        'n_rows': 117,
+        'n_per_row': 285,
+        'n_ant_bs': 32,
+        'n_subcarriers': 32
+    },
+    'city_12_fortworth': {
+        'n_rows': 214,
+        'n_per_row': 179,
+        'n_ant_bs': 32,
+        'n_subcarriers': 64
+    },
+    'city_13_columbus': {
+        'n_rows': 178,
+        'n_per_row': 240,
+        'n_ant_bs': 32,
+        'n_subcarriers': 128
+    },
+    'city_14_charlotte': {
+        'n_rows': 216,
+        'n_per_row': 177,
+        'n_ant_bs': 32,
+        'n_subcarriers': 256
+    },
+    'city_15_indianapolis': {
+        'n_rows': 200,
+        'n_per_row': 196,
+        'n_ant_bs': 64,
+        'n_subcarriers': 32
+    },
+    'city_16_sanfrancisco': {
+        'n_rows': 201,
+        'n_per_row': 208,
+        'n_ant_bs': 64,
+        'n_subcarriers': 64
+    },
+    'city_17_seattle': {
+        'n_rows': 185,
+        'n_per_row': 205,
+        'n_ant_bs': 64,
+        'n_subcarriers': 128
+    },
+    'city_18_denver': {
+        'n_rows': 212,
+        'n_per_row': 204,
+        'n_ant_bs': 128,
+        'n_subcarriers': 32
+    },
+    'city_19_oklahoma': {
+        'n_rows': 204,
+        'n_per_row': 188,
+        'n_ant_bs': 128,
+        'n_subcarriers': 64
+    },
+    'asu_campus1_v1': {
+        'n_rows': [0, 1*int(321/20)],
+        'n_per_row': 411,
+        'n_ant_bs': 8,
+        'n_subcarriers': 32
+    },
+    'asu_campus1_v2': {
+        'n_rows': [1*int(321/20), 2*int(321/20)],
+        'n_per_row': 411,
+        'n_ant_bs': 8,
+        'n_subcarriers': 64
+    },
+    'asu_campus1_v3': {
+        'n_rows': [2*int(321/20), 3*int(321/20)],
+        'n_per_row': 411,
+        'n_ant_bs': 8,
+        'n_subcarriers': 128
+    },
+    'asu_campus1_v4': {
+        'n_rows': [3*int(321/20), 4*int(321/20)],
+        'n_per_row': 411,
+        'n_ant_bs': 8,
+        'n_subcarriers': 256
+    },
+    'asu_campus1_v5': {
+        'n_rows': [4*int(321/20), 5*int(321/20)],
+        'n_per_row': 411,
+        'n_ant_bs': 8,
+        'n_subcarriers': 512
+    },
+    'asu_campus1_v6': {
+        'n_rows': [5*int(321/20), 6*int(321/20)],
+        'n_per_row': 411,
+        'n_ant_bs': 8,
+        'n_subcarriers': 1024
+    },
+    'asu_campus1_v7': {
+        'n_rows': [6*int(321/20), 7*int(321/20)],
+        'n_per_row': 411,
+        'n_ant_bs': 16,
+        'n_subcarriers': 32
+    },
+    'asu_campus1_v8': {
+        'n_rows': [7*int(321/20), 8*int(321/20)],
+        'n_per_row': 411,
+        'n_ant_bs':16,
+        'n_subcarriers': 64
+    },
+    'asu_campus1_v9': {
+        'n_rows': [8*int(321/20), 9*int(321/20)],
+        'n_per_row': 411,
+        'n_ant_bs': 16,
+        'n_subcarriers': 128
+    },
+    'asu_campus1_v10': {
+        'n_rows': [9*int(321/20), 10*int(321/20)],
+        'n_per_row': 411,
+        'n_ant_bs': 16,
+        'n_subcarriers': 256
+    },
+    'asu_campus1_v11': {
+        'n_rows': [10*int(321/20), 11*int(321/20)],
+        'n_per_row': 411,
+        'n_ant_bs': 16,
+        'n_subcarriers': 512
+    },
+    'asu_campus1_v12': {
+        'n_rows': [11*int(321/20), 12*int(321/20)],
+        'n_per_row': 411,
+        'n_ant_bs': 32,
+        'n_subcarriers': 32
+    },
+    'asu_campus1_v13': {
+        'n_rows': [12*int(321/20), 13*int(321/20)],
+        'n_per_row': 411,
+        'n_ant_bs': 32,
+        'n_subcarriers': 64
+    },
+    'asu_campus1_v14': {
+        'n_rows': [13*int(321/20), 14*int(321/20)],
+        'n_per_row': 411,
+        'n_ant_bs': 32,
+        'n_subcarriers': 128
+    },
+    'asu_campus1_v15': {
+        'n_rows': [14*int(321/20), 15*int(321/20)],
+        'n_per_row': 411,
+        'n_ant_bs': 32,
+        'n_subcarriers': 256
+    },
+    'asu_campus1_v16': {
+        'n_rows': [15*int(321/20), 16*int(321/20)],
+        'n_per_row': 411,
+        'n_ant_bs': 64,
+        'n_subcarriers': 32
+    },
+    'asu_campus1_v17': {
+        'n_rows': [16*int(321/20), 17*int(321/20)],
+        'n_per_row': 411,
+        'n_ant_bs': 64,
+        'n_subcarriers': 64
+    },
+    'asu_campus1_v18': {
+        'n_rows': [17*int(321/20), 18*int(321/20)],
+        'n_per_row': 411,
+        'n_ant_bs': 64,
+        'n_subcarriers': 128
+    },
+    'asu_campus1_v19': {
+        'n_rows': [18*int(321/20), 19*int(321/20)],
+        'n_per_row': 411,
+        'n_ant_bs': 128,
+        'n_subcarriers': 32
+    },
+    'asu_campus1_v20': {
+        'n_rows': [19*int(321/20), 20*int(321/20)],
+        'n_per_row': 411,
+        'n_ant_bs': 128,
+        'n_subcarriers': 64
+    },
+    'Boston5G_3p5_v1': {
+        'n_rows': [812, 812 + 1*int((1622-812)/20)],
+        'n_per_row': 595,
+        'n_ant_bs': 8,
+        'n_subcarriers': 32
+    },
+    'Boston5G_3p5_v2': {
+        'n_rows': [812 + 1*int((1622-812)/20), 812 + 2*int((1622-812)/20)],
+        'n_per_row': 595,
+        'n_ant_bs': 8,
+        'n_subcarriers': 64
+    },
+    'Boston5G_3p5_v3': {
+        'n_rows': [812 + 2*int((1622-812)/20), 812 + 3*int((1622-812)/20)],
+        'n_per_row': 595,
+        'n_ant_bs': 8,
+        'n_subcarriers': 128
+    },
+    'Boston5G_3p5_v4': {
+        'n_rows': [812 + 3*int((1622-812)/20), 812 + 4*int((1622-812)/20)],
+        'n_per_row': 595,
+        'n_ant_bs': 8,
+        'n_subcarriers': 256
+    },
+    'Boston5G_3p5_v5': {
+        'n_rows': [812 + 4*int((1622-812)/20), 812 + 5*int((1622-812)/20)],
+        'n_per_row': 595,
+        'n_ant_bs': 8,
+        'n_subcarriers': 512
+    },
+    'Boston5G_3p5_v6': {
+        'n_rows': [812 + 5*int((1622-812)/20), 812 + 6*int((1622-812)/20)],
+        'n_per_row': 595,
+        'n_ant_bs': 8,
+        'n_subcarriers': 1024
+    },
+    'Boston5G_3p5_v7': {
+        'n_rows': [812 + 6*int((1622-812)/20), 812 + 7*int((1622-812)/20)],
+        'n_per_row': 595,
+        'n_ant_bs': 16,
+        'n_subcarriers': 32
+    },
+    'Boston5G_3p5_v8': {
+        'n_rows': [812 + 7*int((1622-812)/20), 812 + 8*int((1622-812)/20)],
+        'n_per_row': 595,
+        'n_ant_bs':16,
+        'n_subcarriers': 64
+    },
+    'Boston5G_3p5_v9': {
+        'n_rows': [812 + 8*int((1622-812)/20), 812 + 9*int((1622-812)/20)],
+        'n_per_row': 595,
+        'n_ant_bs': 16,
+        'n_subcarriers': 128
+    },
+    'Boston5G_3p5_v10': {
+        'n_rows': [812 + 9*int((1622-812)/20), 812 + 10*int((1622-812)/20)],
+        'n_per_row': 595,
+        'n_ant_bs': 16,
+        'n_subcarriers': 256
+    },
+    'Boston5G_3p5_v11': {
+        'n_rows': [812 + 10*int((1622-812)/20), 812 + 11*int((1622-812)/20)],
+        'n_per_row': 595,
+        'n_ant_bs': 16,
+        'n_subcarriers': 512
+    },
+    'Boston5G_3p5_v12': {
+        'n_rows': [812 + 11*int((1622-812)/20), 812 + 12*int((1622-812)/20)],
+        'n_per_row': 595,
+        'n_ant_bs': 32,
+        'n_subcarriers': 32
+    },
+    'Boston5G_3p5_v13': {
+        'n_rows': [812 + 12*int((1622-812)/20), 812 + 13*int((1622-812)/20)],
+        'n_per_row': 595,
+        'n_ant_bs': 32,
+        'n_subcarriers': 64
+    },
+    'Boston5G_3p5_v14': {
+        'n_rows': [812 + 13*int((1622-812)/20), 812 + 14*int((1622-812)/20)],
+        'n_per_row': 595,
+        'n_ant_bs': 32,
+        'n_subcarriers': 128
+    },
+    'Boston5G_3p5_v15': {
+        'n_rows': [812 + 14*int((1622-812)/20), 812 + 15*int((1622-812)/20)],
+        'n_per_row': 595,
+        'n_ant_bs': 32,
+        'n_subcarriers': 256
+    },
+    'Boston5G_3p5_v16': {
+        'n_rows': [812 + 15*int((1622-812)/20), 812 + 16*int((1622-812)/20)],
+        'n_per_row': 595,
+        'n_ant_bs': 64,
+        'n_subcarriers': 32
+    },
+    'Boston5G_3p5_v17': {
+        'n_rows': [812 + 16*int((1622-812)/20), 812 + 17*int((1622-812)/20)],
+        'n_per_row': 595,
+        'n_ant_bs': 64,
+        'n_subcarriers': 64
+    },
+    'Boston5G_3p5_v18': {
+        'n_rows': [812 + 17*int((1622-812)/20), 812 + 18*int((1622-812)/20)],
+        'n_per_row': 595,
+        'n_ant_bs': 64,
+        'n_subcarriers': 128
+    },
+    'Boston5G_3p5_v19': {
+        'n_rows': [812 + 18*int((1622-812)/20), 812 + 19*int((1622-812)/20)],
+        'n_per_row': 595,
+        'n_ant_bs': 128,
+        'n_subcarriers': 32
+    },
+    'Boston5G_3p5_v20': {
+        'n_rows': [812 + 19*int((1622-812)/20), 812 + 20*int((1622-812)/20)],
+        'n_per_row': 595,
+        'n_ant_bs': 128,
+        'n_subcarriers': 64
+    },
+    'O1_3p5_v1': {
+        'n_rows': [0*int(3852/12), 1*int(3852/12)],
+        'n_per_row': 181,
+        'n_ant_bs': 8,
+        'n_subcarriers': 32
+    },
+    'O1_3p5_v2': {
+        'n_rows': [1*int(3852/12), 2*int(3852/12)],
+        'n_per_row': 181,
+        'n_ant_bs': 8,
+        'n_subcarriers': 64
+    },
+    'O1_3p5_v3': {
+        'n_rows': [2*int(3852/12), 3*int(3852/12)],
+        'n_per_row': 181,
+        'n_ant_bs': 8,
+        'n_subcarriers': 128
+    },
+    'O1_3p5_v4': {
+        'n_rows': [3*int(3852/12), 4*int(3852/12)],
+        'n_per_row': 181,
+        'n_ant_bs': 8,
+        'n_subcarriers': 256
+    },
+    'O1_3p5_v5': {
+        'n_rows': [4*int(3852/12), 5*int(3852/12)],
+        'n_per_row': 181,
+        'n_ant_bs': 8,
+        'n_subcarriers': 512
+    },
+    'O1_3p5_v6': {
+        'n_rows': [5*int(3852/12), 6*int(3852/12)],
+        'n_per_row': 181,
+        'n_ant_bs': 8,
+        'n_subcarriers': 1024
+    },
+    'O1_3p5_v7': {
+        'n_rows': [6*int(3852/12), 7*int(3852/12)],
+        'n_per_row': 181,
+        'n_ant_bs': 16,
+        'n_subcarriers': 32
+    },
+    'O1_3p5_v8': {
+        'n_rows': [7*int(3852/12), 8*int(3852/12)],
+        'n_per_row': 181,
+        'n_ant_bs': 16,
+        'n_subcarriers': 64
+    },
+    'O1_3p5_v9': {
+        'n_rows': [8*int(3852/12), 9*int(3852/12)],
+        'n_per_row': 181,
+        'n_ant_bs': 16,
+        'n_subcarriers': 128
+    },
+    'O1_3p5_v10': {
+        'n_rows': [9*int(3852/12), 10*int(3852/12)],
+        'n_per_row': 181,
+        'n_ant_bs': 16,
+        'n_subcarriers': 256
+    },
+    'O1_3p5_v11': {
+        'n_rows': [10*int(3852/12), 11*int(3852/12)],
+        'n_per_row': 181,
+        'n_ant_bs': 16,
+        'n_subcarriers': 512
+    },
+    'O1_3p5_v12': {
+        'n_rows': [11*int(3852/12), 12*int(3852/12)],
+        'n_per_row': 181,
+        'n_ant_bs': 32,
+        'n_subcarriers': 32
+    },
+    'O1_3p5_v13': {
+        'n_rows': [12*int(3852/12)+0*int(1351/10), 12*int(3852/12)+1*int(1351/10)],
+        'n_per_row': 361,
+        'n_ant_bs': 32,
+        'n_subcarriers': 64
+    },
+    'O1_3p5_v14': {
+        'n_rows': [12*int(3852/12)+1*int(1351/10), 12*int(3852/12)+2*int(1351/10)],
+        'n_per_row': 181,
+        'n_ant_bs': 32,
+        'n_subcarriers': 128
+    },
+    'O1_3p5_v15': {
+        'n_rows': [12*int(3852/12)+2*int(1351/10), 12*int(3852/12)+3*int(1351/10)],
+        'n_per_row': 181,
+        'n_ant_bs': 32,
+        'n_subcarriers': 256
+    },
+    'O1_3p5_v16': {
+        'n_rows': [12*int(3852/12)+3*int(1351/10), 12*int(3852/12)+4*int(1351/10)],
+        'n_per_row': 181,
+        'n_ant_bs': 64,
+        'n_subcarriers': 32
+    },
+    'O1_3p5_v17': {
+        'n_rows': [12*int(3852/12)+4*int(1351/10), 12*int(3852/12)+5*int(1351/10)],
+        'n_per_row': 181,
+        'n_ant_bs': 64,
+        'n_subcarriers': 64
+    },
+    'O1_3p5_v18': {
+        'n_rows': [12*int(3852/12)+5*int(1351/10), 12*int(3852/12)+6*int(1351/10)],
+        'n_per_row': 181,
+        'n_ant_bs': 64,
+        'n_subcarriers': 128
+    },
+    'O1_3p5_v19': {
+        'n_rows': [12*int(3852/12)+6*int(1351/10), 12*int(3852/12)+7*int(1351/10)],
+        'n_per_row': 181,
+        'n_ant_bs': 128,
+        'n_subcarriers': 32
+    },
+    'O1_3p5_v20': {
+        'n_rows': [12*int(3852/12)+7*int(1351/10), 12*int(3852/12)+8*int(1351/10)],
+        'n_per_row': 181,
+        'n_ant_bs': 128,
+        'n_subcarriers': 64
+    },
+    'city_0_newyork_v16x64': {
+        'n_rows': 109,
+        'n_per_row': 291,
+        'n_ant_bs': 16,
+        'n_subcarriers': 64
+    },
+    'city_1_losangeles_v16x64': {
+        'n_rows': 142,
+        'n_per_row': 201,
+        'n_ant_bs': 16,
+        'n_subcarriers': 64
+    },
+    'city_2_chicago_v16x64': {
+        'n_rows': 139,
+        'n_per_row': 200,
+        'n_ant_bs': 16,
+        'n_subcarriers': 64
+    },
+    'city_3_houston_v16x64': {
+        'n_rows': 154,
+        'n_per_row': 202,
+        'n_ant_bs': 16,
+        'n_subcarriers': 64
+    },
+    'city_4_phoenix_v16x64': {
+        'n_rows': 198,
+        'n_per_row': 214,
+        'n_ant_bs': 16,
+        'n_subcarriers': 64
+    },
+    'city_5_philadelphia_v16x64': {
+        'n_rows': 239,
+        'n_per_row': 164,
+        'n_ant_bs': 16,
+        'n_subcarriers': 64
+    },
+    'city_6_miami_v16x64': {
+        'n_rows': 199,
+        'n_per_row': 216,
+        'n_ant_bs': 16,
+        'n_subcarriers': 64
+    },
+    'city_7_sandiego_v16x64': {
+        'n_rows': 207,
+        'n_per_row': 176,
+        'n_ant_bs': 16,
+        'n_subcarriers': 64
+    },
+    'city_8_dallas_v16x64': {
+        'n_rows': 207,
+        'n_per_row': 190,
+        'n_ant_bs': 16,
+        'n_subcarriers': 64
+    },
+    'city_9_sanfrancisco_v16x64': {
+        'n_rows': 196,
+        'n_per_row': 206,
+        'n_ant_bs': 16,
+        'n_subcarriers': 64
+    }}
+    return row_column_users

lwm_model.py ADDED Viewed

	@@ -0,0 +1,154 @@

+# -*- coding: utf-8 -*-
+"""
+Created on Fri Sep 13 19:23:54 2024
+This script defines the LWM model architecture.
+@author: Sadjad Alikhani
+"""
+#%%
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+#%%
+class LayerNormalization(nn.Module):
+    def __init__(self, d_model: int, eps: float = 1e-6) -> None:
+        super().__init__()
+        self.eps = eps
+        self.alpha = nn.Parameter(torch.ones(d_model))
+        self.bias = nn.Parameter(torch.zeros(d_model))
+    def forward(self, x):
+        mean = x.mean(dim=-1, keepdim=True)
+        std = x.std(dim=-1, keepdim=True)
+        return self.alpha * (x - mean) / (std + self.eps) + self.bias
+class Embedding(nn.Module):
+    def __init__(self, element_length, d_model, max_len=513):
+        super().__init__()
+        self.element_length = element_length
+        self.d_model = d_model
+        self.proj = nn.Linear(element_length, d_model)
+        self.pos_embed = nn.Embedding(max_len, d_model)
+        self.norm = LayerNormalization(d_model)
+    def forward(self, x):
+        seq_len = x.size(1)
+        pos = torch.arange(seq_len, dtype=torch.long, device=x.device)
+        pos_encodings = self.pos_embed(pos)
+        tok_emb = self.proj(x.float())
+        embedding = tok_emb + pos_encodings
+        return self.norm(embedding)
+class ScaledDotProductAttention(nn.Module):
+    def __init__(self, d_k):
+        super().__init__()
+        self.d_k = d_k
+    def forward(self, Q, K, V):
+        scores = torch.matmul(Q, K.transpose(-1, -2)) / np.sqrt(self.d_k)
+        attn = F.softmax(scores, dim=-1)
+        context = torch.matmul(attn, V)
+        return context, attn
+class MultiHeadAttention(nn.Module):
+    def __init__(self, d_model, n_heads, dropout):
+        super().__init__()
+        self.d_k = d_model // n_heads
+        self.d_v = d_model // n_heads
+        self.n_heads = n_heads
+        self.W_Q = nn.Linear(d_model, self.d_k * n_heads)
+        self.W_K = nn.Linear(d_model, self.d_k * n_heads)
+        self.W_V = nn.Linear(d_model, self.d_v * n_heads)
+        self.linear = nn.Linear(n_heads * self.d_v, d_model)
+        self.dropout = nn.Dropout(dropout)
+        self.scaled_dot_attn = ScaledDotProductAttention(self.d_k)
+    def forward(self, Q, K, V):
+        residual, batch_size = Q, Q.size(0)
+        q_s = self.W_Q(Q).view(batch_size, -1, self.n_heads, self.d_k).transpose(1, 2)
+        k_s = self.W_K(K).view(batch_size, -1, self.n_heads, self.d_k).transpose(1, 2)
+        v_s = self.W_V(V).view(batch_size, -1, self.n_heads, self.d_v).transpose(1, 2)
+        context, attn = self.scaled_dot_attn(q_s, k_s, v_s)
+        output = context.transpose(1, 2).contiguous().view(batch_size, -1, self.n_heads * self.d_v)
+        output = self.linear(output)
+        return residual + self.dropout(output), attn
+class PoswiseFeedForwardNet(nn.Module):
+    def __init__(self, d_model, d_ff, dropout):
+        super().__init__()
+        self.fc1 = nn.Linear(d_model, d_ff)
+        self.fc2 = nn.Linear(d_ff, d_model)
+        self.dropout = nn.Dropout(dropout)
+    def forward(self, x):
+        return self.fc2(self.dropout(F.relu(self.fc1(x))))
+class EncoderLayer(nn.Module):
+    def __init__(self, d_model, n_heads, d_ff, dropout):
+        super().__init__()
+        self.enc_self_attn = MultiHeadAttention(d_model, n_heads, dropout)
+        self.pos_ffn = PoswiseFeedForwardNet(d_model, d_ff, dropout)
+        self.norm1 = LayerNormalization(d_model)
+        self.norm2 = LayerNormalization(d_model)
+    def forward(self, enc_inputs):
+        # Self-Attention with Add & Norm
+        attn_outputs, attn = self.enc_self_attn(enc_inputs, enc_inputs, enc_inputs)
+        attn_outputs = self.norm1(enc_inputs + attn_outputs)  # Add & Norm
+        # Feed-Forward with Add & Norm
+        ff_outputs = self.pos_ffn(attn_outputs)
+        enc_outputs = self.norm2(attn_outputs + ff_outputs)  # Add & Norm
+        return enc_outputs, attn
+class lwm(nn.Module):
+    def __init__(self, element_length=32, d_model=128, n_layers=12, max_len=513, n_heads=8, dropout=0.1):
+        super().__init__()
+        self.embedding = Embedding(element_length, d_model, max_len)
+        self.layers = nn.ModuleList(
+            [EncoderLayer(d_model, n_heads, d_model*4, dropout) for _ in range(n_layers)]
+        )
+        self.linear = nn.Linear(d_model, d_model)
+        self.norm = LayerNormalization(d_model)
+        embed_weight = self.embedding.proj.weight
+        _, n_dim = embed_weight.size()
+        self.decoder = nn.Linear(d_model, n_dim, bias=False)
+        self.decoder_bias = nn.Parameter(torch.zeros(n_dim))
+    @classmethod
+    def from_pretrained(cls, ckpt_name='model_weights.pth', device='cuda'):
+        model = cls().to(device)
+        model.load_state_dict(torch.load(ckpt_name, map_location=device))
+        print(f"Model loaded successfully from {ckpt_name}")
+        return model
+    def forward(self, input_ids, masked_pos=None):
+        # Step 1: Embedding
+        output = self.embedding(input_ids)
+        attention_maps = []
+        # Step 2: Pass through Encoder Layers
+        for layer in self.layers:
+            output, attn = layer(output)
+            attention_maps.append(attn)
+        # If masked_pos is provided, perform masked token prediction
+        if masked_pos is not None:
+            masked_pos = masked_pos.long()[:, :, None].expand(-1, -1, output.size(-1))
+            h_masked = torch.gather(output, 1, masked_pos)
+            h_masked = self.norm(F.relu(self.linear(h_masked)))
+            logits_lm = self.decoder(h_masked) + self.decoder_bias
+            return logits_lm, output, attention_maps
+        else:
+            return output, attention_maps

main.py ADDED Viewed

	@@ -0,0 +1,120 @@

+# -*- coding: utf-8 -*-
+"""
+Created on Sat Dec 21 13:24:21 2024
+This script pre-trains the LWM model
+@author: salikha4
+"""
+import torch
+import torch.nn as nn
+from torch.utils.data import random_split
+from input_preprocess import tokenizer, scenarios_list
+from utils import create_dataloader, count_parameters
+import numpy as np
+import lwm_model
+from torch.optim.lr_scheduler import CosineAnnealingLR
+from torch.optim.lr_scheduler import LambdaLR
+from torch.optim import AdamW
+from train import train_lwm
+import warnings
+warnings.filterwarnings("ignore", category=UserWarning)
+#%% SETTINGS
+EPOCHS = 50
+BATCH_SIZE = 128
+VAL_BATCH_SIZE = 64
+WARMUP_EPOCHS = 5
+BASE_LR = 5e-4
+N_ROWS = 4
+N_COLUMNS = 4
+ELEMENT_LENGTH = N_ROWS*N_COLUMNS*2
+D_MODEL = 128
+MAX_LEN = 513
+N_LAYERS = 12
+WEIGHT_DECAY = 0.05
+BETA1 = 0.9
+BETA2 = 0.999
+MASK_PERCENT = 0.40
+N_HEADS = 8
+DROPOUT = 0.1
+#%% GENERATE DATASET
+bs_idxs = [1, 2, 3]
+selected_scenario_names = scenarios_list()[:80]
+preprocessed_data = tokenizer(
+    selected_scenario_names,
+    MAX_LEN,
+    masking_percent=MASK_PERCENT,
+    mask=True,
+    seed=42
+)
+#%% SPLIT DATASET
+SEED = 42
+torch.manual_seed(SEED)
+np.random.seed(SEED)
+train_ratio = 0.8
+val_ratio = 0.2
+train_data = {}
+val_data = {}
+test_data = {}
+for key, samples in preprocessed_data.items():
+    print(f"key: {key}")
+    total_samples = len(samples)
+    train_size = int(train_ratio * total_samples)
+    val_size = int(val_ratio * total_samples)
+    test_size = total_samples - val_size - train_size
+    train_data[key], val_data[key], test_data[key] = random_split(
+        samples, [train_size, val_size, test_size]
+    )
+train_loaders = create_dataloader(train_data, batch_size=BATCH_SIZE, shuffle=True)
+val_loaders = create_dataloader(val_data, batch_size=VAL_BATCH_SIZE, shuffle=False)
+#%% INITIALIZE MODEL
+load_model = True
+gpu_ids = [0]
+device = torch.device("cuda:0")
+model = lwm_model.lwm().to(device)
+if load_model:
+    model_name = "lwm_epoch50_train0.0077_val0.0060_masking0.40.pth"
+    state_dict = torch.load(f"models/{model_name}", map_location=device)
+    new_state_dict = {k.replace("module.", ""): v for k, v in state_dict.items()}
+    model.load_state_dict(new_state_dict)
+model = nn.DataParallel(model, gpu_ids)
+print(f"Model loaded successfully on GPU {device.index}")
+n_parameters = count_parameters(model)
+print(f"Number of trainable parameters: {n_parameters:,}")
+#%% OPTIMIZER AND SCHEDULER
+BASE_LR = 5e-5
+MIN_LR = 1e-8
+TOTAL_STEPS = sum(len(loader) for loader in train_loaders.values()) * EPOCHS
+WARMUP_STEPS = sum(len(loader) for loader in train_loaders.values()) * WARMUP_EPOCHS
+optimizer = AdamW(
+    model.parameters(),
+    lr=BASE_LR,
+    betas=(BETA1, BETA2),
+    weight_decay=WEIGHT_DECAY
+)
+def lr_lambda(current_step):
+    if current_step < WARMUP_STEPS:
+        # Linear warmup
+        return current_step / WARMUP_STEPS
+    else:
+        # Scaled cosine decay
+        scaled_progress = (current_step - WARMUP_STEPS) / (TOTAL_STEPS - WARMUP_STEPS)
+        cosine_decay = 0.5 * (1 + np.cos(np.pi * scaled_progress))
+        return cosine_decay * (BASE_LR - MIN_LR) / BASE_LR + MIN_LR / BASE_LR
+scheduler = LambdaLR(optimizer, lr_lambda=lr_lambda)
+#%% PRE-TRAIN THE MODEL
+pretrained_model = train_lwm(
+    model,
+    train_loaders,
+    val_loaders,
+    optimizer,
+    scheduler,
+    EPOCHS,
+    device=device
+)

models/model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:485611f1a0f819f9c673827b8e613887b39672e97072bd7a412866b49d8dd40f
+size 9960738

train.py ADDED Viewed

	@@ -0,0 +1,446 @@

+# -*- coding: utf-8 -*-
+"""
+Created on Fri Dec 20 09:32:12 2024
+This script contains the LWM pre-training and task-specific fine-tuning functions.
+@author: Sadjad Alikhani
+"""
+import torch
+import torch.nn as nn
+from tqdm import tqdm
+import matplotlib.pyplot as plt
+import os
+import csv
+from utils import count_parameters
+import time
+#%% LOSS FUNCTION
+def nmse_loss(y_pred, y_true):
+    y_pred_flat = y_pred.view(y_pred.size(0), -1)
+    y_true_flat = y_true.view(y_true.size(0), -1)
+    mse = torch.sum((y_true_flat - y_pred_flat)**2, dim=-1)
+    normalization = torch.sum(y_true_flat**2, dim=-1)
+    return mse / normalization
+#%%
+def train_lwm(model, train_loaders, val_loaders, optimizer, scheduler, epochs, device, save_dir="models", log_file="training_log.csv"):
+    if not os.path.exists(save_dir):
+        os.makedirs(save_dir)
+    # Initialize CSV log
+    if not os.path.exists(log_file):
+        with open(log_file, mode='w', newline='') as file:
+            writer = csv.writer(file)
+            writer.writerow(["Epoch", "Train NMSE", "Validation NMSE", "Learning Rate", "Best Model"])
+    train_nmse_losses = []
+    val_nmse_losses = []
+    best_val_nmse = float('inf')
+    for epoch in range(epochs):
+        model.train()
+        train_nmse = 0.0
+        train_samples = 0
+        # Training loop across all buckets
+        print(f"\nEpoch {epoch + 1}/{epochs} [Training]")
+        for length, train_loader in train_loaders.items():
+            print(f"Processing sequences of length {length}")
+            with tqdm(train_loader, desc=f"Length {length} [Training]", unit="batch") as t:
+                for batch in t:
+                    # train_batches += 1
+                    optimizer.zero_grad()
+                    # Move data to device
+                    input_ids, masked_tokens, masked_pos = [b.to(device) for b in batch]
+                    # Forward pass
+                    logits_lm, _, _ = model(input_ids, masked_pos)
+                    # Compute NMSE
+                    loss = torch.sum(nmse_loss(masked_tokens, logits_lm))
+                    loss.backward()
+                    optimizer.step()
+                    scheduler.step()
+                    train_nmse += loss.item()
+                    train_samples += input_ids.shape[0]
+                    # Update progress bar
+                    t.set_postfix({"nmse": train_nmse/train_samples, "lr": scheduler.get_last_lr()[0]})
+        # Average NMSE across training batches
+        train_nmse /= max(train_samples, 1)
+        train_nmse_losses.append(train_nmse)
+        if epoch % 2 == 0:
+            # Validation loop across all buckets
+            model.eval()
+            val_nmse = 0.0
+            val_samples = 0
+            with torch.no_grad():
+                print(f"\nEpoch {epoch + 1}/{epochs} [Validation]")
+                for length, val_loader in val_loaders.items():
+                    print(f"Processing sequences of length {length}")
+                    with tqdm(val_loader, desc=f"Length {length} [Validation]", unit="batch") as t:
+                        for batch in t:
+                            # val_batches += 1
+                            # Move data to device
+                            input_ids, masked_tokens, masked_pos = [b.to(device) for b in batch]
+                            # Forward pass
+                            logits_lm, _, _ = model(input_ids, masked_pos)
+                            # Compute NMSE
+                            loss = torch.sum(nmse_loss(masked_tokens, logits_lm))
+                            val_nmse += loss.item()
+                            val_samples += input_ids.shape[0]
+                            # Update progress bar
+                            t.set_postfix({"nmse": val_nmse/val_samples})
+            # Average NMSE across validation batches
+            val_nmse /= max(val_samples, 1)
+            val_nmse_losses.append(val_nmse)
+            # Save model if validation NMSE improves
+            is_best_model = False
+            if val_nmse < best_val_nmse:
+                best_val_nmse = val_nmse
+                model_path = os.path.join(save_dir, f"lwm_epoch{epoch+1}_train{train_nmse:.4f}_val{val_nmse:.4f}.pth")
+                torch.save(model.state_dict(), model_path)
+                print(f"Model saved: {model_path}")
+                is_best_model = True
+        # Log the results
+        print(f"  Train NMSE: {train_nmse:.4f}")
+        print(f"  Validation NMSE: {val_nmse:.4f}")
+        print(f"  Learning Rate: {scheduler.get_last_lr()[0]:.6e}")
+        # Append to CSV log
+        with open(log_file, mode='a', newline='') as file:
+            writer = csv.writer(file)
+            writer.writerow([epoch + 1, train_nmse, val_nmse, scheduler.get_last_lr()[0], is_best_model])
+        # Plot losses after each epoch
+        plt.figure(figsize=(10, 6))
+        plt.plot(range(1, len(train_nmse_losses) + 1), train_nmse_losses, label="Train NMSE")
+        plt.plot(range(1, len(val_nmse_losses) + 1), val_nmse_losses, label="Validation NMSE")
+        plt.xlabel("Epochs")
+        plt.ylabel("NMSE")
+        plt.title("Training and Validation NMSE Loss")
+        plt.legend()
+        plt.grid(True)
+        plt.show()
+    print("Training and validation complete.")
+    return model
+#%% FINE-TUNE
+from torch.cuda.amp import GradScaler, autocast
+# Define the ClassificationHead
+class ClassificationHead(nn.Module):
+    def __init__(self, input_dim, num_classes):
+        super().__init__()
+        self.fc = nn.Linear(input_dim, num_classes)
+    def forward(self, x):
+        return self.fc(x)
+# Define the RegressionHead
+class RegressionHead(nn.Module):
+    def __init__(self, input_dim):
+        super().__init__()
+        self.fc = nn.Linear(input_dim, 1)
+    def forward(self, x):
+        return self.fc(x)
+class CustomClassificationHead(nn.Module):
+    def __init__(self, input_dim, num_classes):
+        super().__init__()
+        self.classifier = nn.Sequential(
+            nn.Linear(input_dim, 512),
+            nn.BatchNorm1d(512),
+            nn.ReLU(),
+            nn.Dropout(0.1),
+            nn.Linear(512, 256),
+            nn.BatchNorm1d(256),
+            nn.ReLU(),
+            nn.Dropout(0.1),
+            nn.Linear(256, 128),
+            nn.BatchNorm1d(128),
+            nn.ReLU(),
+            # nn.Dropout(0.1),
+            nn.Linear(128, num_classes)
+        )
+    def forward(self, x):
+        return self.classifier(x)
+class CustomRegressionHead(nn.Module):
+    def __init__(self, input_dim, output_dim):
+        super().__init__()
+        self.regressor = nn.Sequential(
+            nn.Linear(input_dim, 512),
+            nn.BatchNorm1d(512),
+            nn.ReLU(),
+            nn.Dropout(0.1),
+            nn.Linear(512, 256),
+            nn.BatchNorm1d(256),
+            nn.ReLU(),
+            nn.Dropout(0.1),
+            nn.Linear(256, output_dim)
+        )
+    def forward(self, x):
+        return self.regressor(x)
+def custom_heads(input_dim, num_classes=None, output_dim=None, task_type="classification"):
+    """
+    Creates a custom head for classification or regression tasks.
+    Users should modify the class implementations for further customization.
+    Args:
+        input_dim (int): Input dimension of the head.
+        num_classes (int): Number of classes for classification tasks. Ignored for regression.
+        task_type (str): "classification" or "regression".
+    Returns:
+        nn.Module: Custom head for the specified task.
+    """
+    if task_type == "classification":
+        if num_classes is None:
+            raise ValueError("num_classes must be specified for classification tasks.")
+        return CustomClassificationHead(input_dim=input_dim, num_classes=num_classes)
+    elif task_type == "regression":
+        return CustomRegressionHead(input_dim=input_dim, output_dim=output_dim)
+    else:
+        raise ValueError("Invalid task_type. Choose 'classification' or 'regression'.")
+#%%
+# Fine-tuning wrapper for the base model
+class FineTuningWrapper(nn.Module):
+    def __init__(self, model, task_head, fine_tune_layers="full"):
+        super().__init__()
+        self.model = model
+        self.task_head = task_head
+        # Freeze all layers initially
+        for param in self.model.parameters():
+            param.requires_grad = False
+        # Handle fine-tuning layers
+        if fine_tune_layers is not None:
+            if fine_tune_layers == "full":
+                # Unfreeze all layers if "all" is specified
+                for param in self.model.parameters():
+                    param.requires_grad = True
+            else:
+                # Get a list of all available layer names in the model
+                available_layers = [name for name, _ in self.model.named_parameters()]
+                # Validate that specified layers exist in the model
+                for layer in fine_tune_layers:
+                    if not any(layer in lname for lname in available_layers):
+                        raise ValueError(
+                            f"Layer '{layer}' not found in the model. "
+                            f"Available layers: {available_layers}"
+                        )
+                # Unfreeze only the specified layers
+                for name, param in self.model.named_parameters():
+                    if any(layer in name for layer in fine_tune_layers):
+                        param.requires_grad = True
+    def forward(self, x, input_type="cls_emb"):
+        if input_type == "raw":
+            task_input = x.view(x.size(0), -1)
+        else:
+            embeddings, attn_maps = self.model(x)  # Get embeddings from the base model
+            if input_type == "cls_emb":
+                task_input = embeddings[:, 0, :]  # CLS token
+            elif input_type == "chs_emb":
+                chs_emb = embeddings[:, 1:, :]
+                task_input = chs_emb.view(chs_emb.size(0), -1) # embeddings.mean(dim=1)  # Mean pooling over channel embeddings
+        return self.task_head(task_input), 0 if input_type=="raw" else attn_maps
+#%%
+# Fine-tuning function
+from sklearn.metrics import f1_score
+def finetune(
+    base_model,
+    train_loader,
+    val_loader=None,
+    task_type="classification",
+    input_type="cls_emb",
+    num_classes=None,
+    output_dim=None,
+    use_custom_head=False,
+    fine_tune_layers=None,
+    optimizer_config=None,
+    criterion=None,
+    epochs=10,
+    device="cuda",
+    task="Beam Prediction"
+):
+    """
+    Configures and fine-tunes the base model with user-defined settings, saving results and models.
+    """
+    # Create results folder
+    time_now = f"{time.time():.0f}"
+    results_folder = f"results/{task}/{time_now}"
+    os.makedirs(results_folder, exist_ok=True)
+    log_file = os.path.join(results_folder, "training_log.csv")
+    # Initialize the CSV log
+    with open(log_file, mode='w', newline='') as file:
+        writer = csv.writer(file)
+        writer.writerow(["Task", "Input", "Epoch", "Train Loss", "Validation Loss", "F1-Score (Classification)", "Learning Rate", "Time"])
+    for batch in val_loader:
+        input_data, targets = batch[0].to(device), batch[1].to(device)
+        break
+    if input_type == "cls_emb":
+        n_patches = 1
+        patch_size = 128
+    elif input_type == "channel_emb":
+        n_patches = input_data.shape[1]-1
+        patch_size = 128
+    elif input_type == "raw":
+        n_patches = input_data.shape[1]
+        patch_size = 32
+        # patch_size = 1
+    if use_custom_head:
+        custom_head = custom_heads(input_dim=n_patches*patch_size,
+                                   num_classes=num_classes,
+                                   output_dim=output_dim,
+                                   task_type=task_type)
+    # Handle DataParallel models
+    if isinstance(base_model, nn.DataParallel):
+        base_model = base_model.module
+    # Set up the task-specific head
+    if use_custom_head:
+        task_head = custom_head
+    elif task_type == "classification":
+        if num_classes is None:
+            raise ValueError("num_classes must be specified for classification tasks.")
+        task_head = ClassificationHead(input_dim=n_patches*patch_size, num_classes=num_classes) # input_dim=base_model.embedding.d_model
+    elif task_type == "regression":
+        task_head = RegressionHead(input_dim=n_patches*patch_size) # input_dim=base_model.embedding.d_model
+    else:
+        raise ValueError("Invalid task_type. Choose 'classification' or 'regression'.")
+    # Wrap the model with the fine-tuning head
+    wrapper = FineTuningWrapper(base_model, task_head, fine_tune_layers=fine_tune_layers)
+    wrapper = wrapper.to(device)
+    print(f'Number of head parameters: {count_parameters(wrapper)}')
+   # Set default optimizer config if not provided
+    if optimizer_config is None:
+        optimizer_config = {"lr": 1e-4}
+    # Set up the optimizer
+    optimizer = torch.optim.Adam(wrapper.parameters(), **optimizer_config)
+    # Set up the scheduler for learning rate decay
+    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.2)  # Example: Reduce LR by 10x every 10 epochs
+    # Set up the loss criterion
+    if criterion is None:
+        criterion = nn.CrossEntropyLoss() if task_type == "classification" else nn.MSELoss()
+    scaler = GradScaler()
+    train_losses, val_losses, f1_scores = [], [], []
+    best_val_loss = float("inf")
+    best_model_path = None
+    for epoch in range(epochs):
+        # Training loop
+        wrapper.train()
+        epoch_loss = 0.0
+        with tqdm(train_loader, desc=f"Epoch {epoch + 1}/{epochs}") as progress_bar:
+            for batch in progress_bar:
+                input_data, targets = batch[0].to(device), batch[1].to(device)
+                optimizer.zero_grad()
+                with autocast():
+                    outputs, attn_maps = wrapper(input_data, input_type=input_type)
+                    loss = criterion(outputs, targets)
+                scaler.scale(loss).backward()
+                scaler.step(optimizer)
+                scaler.update()
+                epoch_loss += loss.item()
+                progress_bar.set_postfix({"Loss": loss.item()})
+        avg_train_loss = epoch_loss / len(train_loader)
+        train_losses.append(avg_train_loss)
+        # Validation loop
+        if val_loader:
+            wrapper.eval()
+            val_loss = 0.0
+            all_preds, all_targets = [], []
+            with torch.no_grad():
+                for batch in val_loader:
+                    input_data, targets = batch[0].to(device), batch[1].to(device)
+                    with autocast():
+                        outputs, _ = wrapper(input_data, input_type=input_type)
+                        loss = criterion(outputs, targets)
+                    val_loss += loss.item()
+                    if task_type == "classification":
+                        preds = torch.argmax(outputs, dim=1).cpu().numpy()
+                        all_preds.extend(preds)
+                        all_targets.extend(targets.cpu().numpy())
+            avg_val_loss = val_loss / len(val_loader)
+            val_losses.append(avg_val_loss)
+            time_now = f"{time.time():.0f}"
+            # Save the best model
+            if avg_val_loss < best_val_loss:
+                best_val_loss = avg_val_loss
+                best_model_path = os.path.join(results_folder, f"{input_type}_epoch{epoch+1}_valLoss{avg_val_loss:.4f}_{time_now}.pth")
+                torch.save(wrapper.state_dict(), best_model_path)
+                print(f"Model saved at {best_model_path} with validation loss: {best_val_loss:.4f}")
+            # Compute F1-score for classification tasks
+            f1 = None
+            if task_type == "classification":
+                f1 = f1_score(all_targets, all_preds, average="macro")
+                print(f"Epoch {epoch + 1}, Validation F1-Score: {f1:.4f}")
+                f1_scores.append(f1)
+        scheduler.step()
+        # Log results
+        with open(log_file, mode='a', newline='') as file:
+            writer = csv.writer(file)
+            writer.writerow([task, input_type, epoch + 1, avg_train_loss, avg_val_loss, f1 if f1 is not None else "-", scheduler.get_last_lr()[0], f"{time_now}"])
+    # Plot training and validation losses
+    plt.figure(figsize=(10, 6))
+    plt.plot(range(1, epochs + 1), train_losses, label="Training Loss")
+    plt.plot(range(1, epochs + 1), val_losses, label="Validation Loss", linestyle="--")
+    plt.xlabel("Epochs")
+    plt.ylabel("Loss")
+    plt.title("Training and Validation Loss")
+    plt.legend()
+    plt.grid(True)
+    # plt.savefig(os.path.join(results_folder, "loss_curve.png"))
+    plt.show()
+    return wrapper, best_model_path, train_losses, val_losses, f1_scores if task_type == "classification" else 0, attn_maps

utils.py ADDED Viewed

	@@ -0,0 +1,247 @@

+from torch.utils.data import DataLoader, Dataset, random_split, TensorDataset
+import torch
+import numpy as np
+#%%
+def create_dataloader(grouped_data, batch_size, shuffle):
+    dataloaders = {}
+    for seq_length, group in grouped_data.items():
+        print(f"dataloader in progress ...\nkey: {seq_length}")
+        ## Uncomment the following line if you run out of memory during pre-training
+        # batch_size = batch_size // 8 if seq_length >= 5 else batch_size
+        # Unpack samples for the current group
+        input_ids, masked_tokens, masked_pos = zip(*group)
+        # Convert to tensors
+        input_ids_tensor = torch.tensor(input_ids, dtype=torch.float32)
+        masked_tokens_tensor = torch.tensor(masked_tokens, dtype=torch.float32)
+        masked_pos_tensor = torch.tensor(masked_pos, dtype=torch.long)
+        # Create TensorDataset and DataLoader
+        dataset = TensorDataset(input_ids_tensor, masked_tokens_tensor, masked_pos_tensor)
+        dataloaders[seq_length] = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, pin_memory=True)
+    return dataloaders
+#%%
+def count_parameters(model):
+    return sum(p.numel() for p in model.parameters() if p.requires_grad)
+#%%
+import matplotlib.pyplot as plt
+from sklearn.decomposition import PCA
+from sklearn.manifold import TSNE
+import umap
+def visualize_embeddings(embeddings, labels, method="pca", label=None):
+    """
+    Visualize embeddings using PCA, UMAP, or t-SNE with color-coded labels.
+    Args:
+        embeddings (torch.Tensor or np.ndarray): Embeddings to visualize, shape (n_samples, n_features).
+        labels (torch.Tensor or np.ndarray): Class labels corresponding to embeddings, shape (n_samples,).
+        method (str): Dimensionality reduction method ('pca', 'umap', or 'tsne').
+        title (str): Title of the plot.
+    """
+    # Convert to numpy if input is a torch.Tensor
+    if isinstance(embeddings, torch.Tensor):
+        embeddings = embeddings.cpu().numpy()
+    if isinstance(labels, torch.Tensor):
+        labels = labels.cpu().numpy()
+    # Apply the selected dimensionality reduction method
+    if method.lower() == "pca":
+        reducer = PCA(n_components=2)
+    elif method.lower() == "umap":
+        reducer = umap.UMAP(n_components=2, n_neighbors=16, random_state=42)
+    elif method.lower() == "tsne":
+        reducer = TSNE(n_components=2, random_state=42, init="random")
+    else:
+        raise ValueError("Invalid method. Choose from 'pca', 'umap', or 'tsne'.")
+    reduced_embeddings = reducer.fit_transform(embeddings)
+    # Create a scatter plot with color-coding based on labels
+    plt.figure(figsize=(10, 8))
+    num_classes = len(np.unique(labels))
+    colors = plt.cm.get_cmap("tab10", num_classes)
+    for class_idx in range(num_classes):
+        class_points = reduced_embeddings[labels == class_idx]
+        plt.scatter(
+            class_points[:, 0], class_points[:, 1],
+            label=f"Class {class_idx}",
+            alpha=0.6
+        )
+    # Customize the plot
+    plt.title(f"{label} ({method.upper()})")
+    plt.xlabel("Component 1")
+    plt.ylabel("Component 2")
+    plt.legend()
+    plt.show()
+#%%
+def generate_gaussian_noise(data, snr_db):
+    """
+    Generate Gaussian noise given an SNR and apply it to the data.
+    Args:
+        data (torch.Tensor): Input data tensor of shape (n_samples, seq_len, feature_dim).
+        snr_db (float): Signal-to-Noise Ratio in decibels (dB).
+    Returns:
+        torch.Tensor: Data with Gaussian noise applied.
+    """
+    # Separate the input data to exclude the first channel
+    a = data[:, 1:, :]  # Shape: (n_samples, seq_len-1, feature_dim)
+    flat_data = a.view(a.size(0), -1)  # Flatten data to calculate power
+    signal_power = torch.mean(flat_data**2, dim=1, keepdim=True)  # Shape: (n_samples, 1)
+    snr_linear = 10 ** (snr_db / 10)
+    noise_power = signal_power / snr_linear
+    noise = torch.randn_like(flat_data) * torch.sqrt(noise_power)
+    noise = noise.view_as(a)
+    noise = torch.cat((torch.zeros_like(data[:, :1, :]), noise), dim=1)  # Add zero noise for the first channel
+    return noise
+#%%
+def plot_coverage(rxs, cov_map, dpi=200, figsize=(6,4), cbar_title=None, title=False,
+                  scat_sz=.5, tx_pos=None, tx_ori=None, legend=False, lims=None,
+                  proj_3D=False, equal_aspect=False, tight=True, cmap='tab20'):
+    plt_params = {'cmap': cmap}
+    if lims:
+        plt_params['vmin'], plt_params['vmax'] = lims[0], lims[1]
+    n = 3 if proj_3D else 2 # n coordinates to consider 2 = xy | 3 = xyz
+    xyz = {'x': rxs[:,0], 'y': rxs[:,1]}
+    if proj_3D:
+        xyz['zs'] = rxs[:,2]
+    fig, ax = plt.subplots(dpi=dpi, figsize=figsize,
+                           subplot_kw={'projection': '3d'} if proj_3D else {})
+    im = plt.scatter(**xyz, c=cov_map, s=scat_sz, marker='s', **plt_params)
+    cbar = plt.colorbar(im, label='' if not cbar_title else cbar_title)
+    plt.xlabel('x (m)')
+    plt.ylabel('y (m)')
+    # TX position
+    if tx_pos is not None:
+        ax.scatter(*tx_pos[:n], marker='P', c='r', label='TX')
+    # TX orientation
+    if tx_ori is not None and tx_pos is not None: # ori = [azi, el]
+        # positive azimuths point left (like positive angles in a unit circle)
+        # positive elevations point up
+        r = 30 # ref size of pointing direction
+        tx_lookat = np.copy(tx_pos)
+        tx_lookat[:2] += r * np.array([np.cos(tx_ori[2]), np.sin(tx_ori[2])]) # azimuth
+        tx_lookat[2] += r * np.sin(tx_ori[1]) # elevation
+        line_components = [[tx_pos[i], tx_lookat[i]] for i in range(n)]
+        line = {key:val for key,val in zip(['xs', 'ys', 'zs'], line_components)}
+        if n == 2:
+            ax.plot(line_components[0], line_components[1], c='k', alpha=.5, zorder=3)
+        else:
+            ax.plot(**line, c='k', alpha=.5, zorder=3)
+    if title:
+        ax.set_title(title)
+    if legend:
+        plt.legend(loc='upper center', ncols=10, framealpha=.5)
+    if tight:
+        s = 1
+        mins, maxs = np.min(rxs, axis=0)-s, np.max(rxs, axis=0)+s
+        if not proj_3D:
+            plt.xlim([mins[0], maxs[0]])
+            plt.ylim([mins[1], maxs[1]])
+        else:
+            ax.axes.set_xlim3d([mins[0], maxs[0]])
+            ax.axes.set_ylim3d([mins[1], maxs[1]])
+            if tx_pos is None:
+                ax.axes.set_zlim3d([mins[2], maxs[2]])
+            else:
+                ax.axes.set_zlim3d([np.min([mins[2], tx_pos[2]]),
+                                    np.max([mins[2], tx_pos[2]])])
+    if equal_aspect and not proj_3D: # disrups the plot
+        plt.axis('scaled')
+    return fig, ax, cbar
+#%%
+def prepare_loaders(
+    preprocessed_data,
+    labels=None,
+    selected_patches_idxs=None,
+    input_type="raw",
+    task_type="classification",
+    feature_selection=False,
+    train_ratio=0.8,
+    batch_size=64,
+    seed=42  # Default seed for reproducibility
+):
+    """
+    Prepares datasets and data loaders for training and validation.
+    Args:
+        preprocessed_data (torch.Tensor): The input data, either raw or preprocessed.
+        labels (torch.Tensor, optional): The labels for classification tasks.
+        selected_patches_idxs (torch.Tensor, optional): Indices of selected patches for feature selection.
+        input_type (str): "raw" or "processed" to specify input data type.
+        task_type (str): "classification" or "regression".
+        feature_selection (bool): Whether to perform feature selection based on selected_patches_idxs.
+        train_ratio (float): Proportion of data to use for training (remaining for validation).
+        batch_size (int): Batch size for data loaders.
+        seed (int): Random seed for reproducibility.
+    Returns:
+        tuple: (train_loader, val_loader)
+    """
+    # Set random seed for reproducibility
+    torch.manual_seed(seed)
+    # Prepare samples
+    if input_type == "raw":
+        if feature_selection and selected_patches_idxs is not None:
+            batch_indices = torch.arange(preprocessed_data.size(0)).unsqueeze(1)  # Shape: [batch_size, 1]
+            samples = torch.tensor(preprocessed_data[batch_indices, selected_patches_idxs], dtype=torch.float32)
+        else:
+            samples = torch.tensor(preprocessed_data[:, 1:], dtype=torch.float32)  # raw_chs
+    else:
+        samples = torch.tensor(preprocessed_data, dtype=torch.float32)
+    # Prepare dataset
+    if task_type == "classification":
+        if labels is None:
+            raise ValueError("Labels are required for classification tasks.")
+        labels = torch.tensor(labels, dtype=torch.long)
+        dataset = TensorDataset(samples, labels)
+        target = 0  # REVISE if needed
+    elif task_type == "regression":
+        target = samples[:, 1:, :].view(samples.size(0), -1)  # Reshape for regression targets
+        dataset = TensorDataset(samples, target)
+    else:
+        raise ValueError("Invalid task_type. Choose 'classification' or 'regression'.")
+    # Set random seed for reproducibility
+    generator = torch.Generator().manual_seed(seed)
+    # Split dataset into training and validation
+    n_samples = len(dataset)
+    train_size = int(train_ratio * n_samples)
+    val_size = n_samples - train_size
+    train_dataset, val_dataset = random_split(dataset, [train_size, val_size], generator=generator)
+    # Create DataLoaders
+    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, generator=generator)
+    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
+    print(f"Train size: {len(train_dataset)}, Validation size: {len(val_dataset)}")
+    return train_loader, val_loader, samples, target