Upload PAL_B_RM_opt

Browse files

Files changed (12) hide show

config.json +4 -0
configuration_pal_b_rm.py +35 -0
connector.py +112 -0
custom_sfx.py +61 -0
itemLearner.py +53 -0
learner.py +138 -0
modeling_pal_b_rm.py +28 -0
projector.py +81 -0
pytorch_model.bin +1 -1
tensor_initializer.py +32 -0
tensor_merger.py +16 -0
userLearner.py +151 -0

config.json CHANGED Viewed

@@ -2,6 +2,10 @@
   "architectures": [
     "PAL_B_RM_opt"
   ],
   "d_hid": 512,
   "d_pref": 512,
   "initializer_type": "gaussian",

   "architectures": [
     "PAL_B_RM_opt"
   ],
+  "auto_map": {
+    "AutoConfig": "configuration_pal_b_rm.PAL_B_Config",
+    "AutoModel": "modeling_pal_b_rm.PAL_B_RM_opt"
+  },
   "d_hid": 512,
   "d_pref": 512,
   "initializer_type": "gaussian",

configuration_pal_b_rm.py ADDED Viewed

	@@ -0,0 +1,35 @@

+from transformers import PretrainedConfig
+class PAL_B_Config(PretrainedConfig):
+    model_type = "facebook/opt"   # opt family model aligned PAL reward model
+    def __init__(
+        self,
+        d_hid: int = 512,
+        d_pref: int = 512,
+        k: int = 2,
+        llm_name: str = "facebook/opt-350m",
+        pref_learner_type: str = "angle",
+        proj_arch: str = "mlp2-gelu-dropout0",
+        initializer_type: str = "gaussian",
+        is_expectation_norm_init: bool = False,
+        sfx_type: str = "softmax",
+        sfx_temperature: float = 1.0,
+        is_temperature_learnable: bool = False,
+        is_gumbel_hard: bool = None,
+        **kwargs,
+    ):
+        self.d_hid = d_hid
+        self.d_pref = d_pref
+        self.k = k
+        self.llm_name = llm_name
+        self.pref_learner_type = pref_learner_type
+        self.proj_arch = proj_arch
+        self.initializer_type = initializer_type
+        self.is_expectation_norm_init = is_expectation_norm_init
+        self.sfx_type = sfx_type
+        self.sfx_temperature = sfx_temperature
+        self.is_temperature_learnable = is_temperature_learnable
+        self.is_gumbel_hard = is_gumbel_hard
+        super().__init__(**kwargs)

connector.py ADDED Viewed

	@@ -0,0 +1,112 @@

+from .projector import Projector
+import torch.nn as nn
+import re
+# class Connector(nn.Module):
+#     def __init__(self, cnct_arch:str, in_dims:int, out_dims:int):
+#         super().__init__()
+#         # projector_type structure ["mlp?-relu-dropout?-residual","identity"]
+#         self.cnct_arch = cnct_arch
+#         if cnct_arch == 'identity':
+#             self.m = nn.Identity()
+#         pattern = r"mlp(\d+)-(relu|gelu|linear)-dropout(\d+)?(-residual-batchnorm|-batchnorm-residual|-residual|-batchnorm|-nobias)?"
+#         match = re.match(pattern, cnct_arch)
+#         if match:
+#             layers = int(match.group(1))
+#             act = match.group(2)
+#             dropout_p = int(match.group(3))
+#             num_digit = len(match.group(3))
+#             dropout_p = dropout_p / 10**num_digit
+#             # print("match.group(4): ", match.group(4))
+#             nobias = False
+#             if match.group(4) != None:
+#                 residual = True if ("-residual" in match.group(4)) else False
+#                 batchnorm = True if ("-batchnorm" in match.group(4)) else False
+#                 nobias = True if ("-nobias" in match.group(4)) else False
+#             else:
+#                 residual = False
+#                 batchnorm = False
+#             latent_dims = [out_dims] * layers
+#             self.m = Projector(
+#                 in_dims=in_dims,
+#                 out_dims=out_dims,
+#                 latent_dims=latent_dims,
+#                 bias=not nobias,
+#                 dropout_p=dropout_p,
+#                 activation=act,
+#                 identity_map=residual,
+#                 use_batchnorm=batchnorm,
+#             )
+#     def forward(self,x):
+#         return self.m(x)
+class Connector(nn.Module):
+    def __init__(self, in_dims: int, out_dims: int, cnct_arch:str):
+        super().__init__()
+        pattern = r"mlp(\d+)-(relu|gelu|linear)-dropout(\d+)?(-residual-batchnorm|-batchnorm-residual|-residual|-batchnorm|-nobias)?"
+        match = re.match(pattern, cnct_arch)
+        if match:
+            layers = int(match.group(1))
+            act = match.group(2)
+            dropout_p = int(match.group(3))
+            num_digit = len(match.group(3))
+            dropout_p = dropout_p / 10**num_digit
+            if match.group(4) != None:
+                residual = True if ("-residual" in match.group(4)) else False
+                batchnorm = True if ("-batchnorm" in match.group(4)) else False
+                nobias = True if ("-nobias" in match.group(4)) else False
+            else:
+                residual = False
+                batchnorm = False
+                nobias = False
+            latent_dims = [out_dims] * layers
+            self.mlp = Projector(
+                in_dims=in_dims,
+                out_dims=out_dims,
+                latent_dims=latent_dims,
+                bias=not nobias,
+                dropout_p=dropout_p,
+                activation=act,
+                identity_map=residual,
+                use_batchnorm=batchnorm,
+            )
+        elif cnct_arch == 'identity':
+            self.mlp = nn.Identity()
+        else:
+            raise ValueError(f'no such connection architecture {cnct_arch}')
+    def __call__(self, x):
+        ret = self.mlp(x)
+        return ret
+if __name__ == "__main__":
+    m = Connector(cnct_arch='identity',in_dims=4096,out_dims=768)
+    print(m)
+    m = Connector(cnct_arch='mlp1-relu-dropout2-residual',in_dims=4096,out_dims=768)
+    print(m)
+    m = Connector(cnct_arch='mlp1-relu-dropout2-batchnorm',in_dims=4096,out_dims=768)
+    print(m)
+    m = Connector(cnct_arch='mlp1-relu-dropout2-residual-batchnorm',in_dims=4096,out_dims=768)
+    print(m)
+    m = Connector(cnct_arch='mlp3-gelu-dropout2',in_dims=4096,out_dims=768)
+    print(m)
+    m = Connector(cnct_arch='mlp16-relu-dropout75',in_dims=4096,out_dims=768)
+    print(m)
+    m = Connector(cnct_arch='mlp0-linear-dropout0', in_dims=4096, out_dims=768)
+    print(m)
+    m = Connector(cnct_arch='mlp0-linear-dropout0-nobias', in_dims=4096, out_dims=768)
+    print(m)
+    m = Connector(cnct_arch='mlp2-linear-dropout0-nobias', in_dims=4096, out_dims=768)
+    print(m)
+    m = Connector(cnct_arch='mlp2-gelu-dropout0', in_dims=512, out_dims=512)
+    count = 0
+    for p in m.parameters():
+        count += p.numel()
+    print(count)

custom_sfx.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from typing import Literal, Optional
+import logging
+logging.basicConfig(level=logging.WARNING, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+class CustomSoftMax(nn.Module):
+    def __init__(
+        self,
+        sfx_type: Literal['gumbel_softmax', 'softmax'],
+        temperature: float,
+        is_temperature_learnable: bool,
+        is_gumbel_hard: Optional[bool]=None, # [True/False]
+        *args,
+        **kwargs,
+    ) -> None:
+        super().__init__()
+        self.sfx_type = sfx_type
+        assert not is_temperature_learnable, 'is_temperature_learnable is prohibited in this version, will go to negative'
+        self.temperature = nn.Parameter(torch.tensor([float(temperature)]),requires_grad=is_temperature_learnable)
+        self.is_gumbel_hard = is_gumbel_hard
+        self.args = args
+        self.kwargs = kwargs
+    def forward(self, x):
+        # x: (bs, dims)
+        if self.sfx_type == 'gumbel_softmax':
+            if self.is_gumbel_hard is not None:
+                return F.gumbel_softmax(x, tau=self.temperature, hard=self.is_gumbel_hard, dim=1)
+            else:
+                raise ValueError('is_gumbel_hard is not passed')
+        elif self.sfx_type == 'softmax':
+            return F.softmax(x/self.temperature, dim=1)
+        else:
+            raise NotImplementedError(f'{self.sfx_type} is not implemented yet')
+if __name__ == "__main__":
+    sfx = CustomSoftMax(sfx_type='gumbel_softmax', temperature=1, is_temperature_learnable=False, is_gumbel_hard=True)
+    x = torch.randn(10,3)   # (bs, dims)
+    print(x.shape)
+    print(sfx(x))
+    sfx = CustomSoftMax(sfx_type='gumbel_softmax', temperature=1, is_temperature_learnable=True, is_gumbel_hard=True)
+    x = torch.randn(10,3)   # (bs, dims)
+    print(x.shape)
+    print(sfx(x))
+    sfx = CustomSoftMax(sfx_type='softmax', temperature=1, is_temperature_learnable=False)
+    x = torch.randn(10,3)
+    print(sfx(x))
+    sfx = CustomSoftMax(sfx_type='softmax',temperature=0.01, is_temperature_learnable=True, is_gumbel_hard=None)
+    x = torch.randn(10,3)
+    print(sfx(x))

itemLearner.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from .connector import Connector
+from .projector import Projector
+from .tensor_merger import TensorMerger
+import numpy as np
+from typing import Literal, Optional, Tuple
+import logging
+logging.basicConfig(level=logging.WARNING, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+class ItemLearner(nn.Module):
+    llm: nn.Module
+    projector: nn.Module
+    def __init__(self, llm, projector):
+        super().__init__()
+        self.llm = llm
+        self.projector = projector
+    def forward(self, x, rm_cached=None):    # only pass the generated data
+        '''
+        x = {'input_ids': torch.tensor, 'attention_mask': torch.tensor}
+        '''
+        input_ids = x['input_ids']
+        attention_mask = x['attention_mask']
+        if rm_cached is None:
+            llm_res = self.llm(
+                input_ids=input_ids,
+                attention_mask=attention_mask,
+            )
+        else:
+            llm_res = self.llm(
+                input_ids=input_ids[:, -1:], # attention_mask=attention_mask,
+                past_key_values=rm_cached["item_learner"],
+                use_cache=False
+            )
+            rm_cached["item_learner"] = llm_res.past_key_values
+        embeds = llm_res.last_hidden_state
+        # embeds shape: (bs, seq_len, hidden_size)
+        shape = embeds.shape
+        embeds = embeds.view(-1, shape[-1]) # (bs*seq_len, hidden_size)
+        projected_embeds = self.projector(embeds)
+        if rm_cached is None:
+            return projected_embeds.view(shape[0], shape[1], -1)
+        else:
+            return projected_embeds.view(shape[0], shape[1], -1), rm_cached

learner.py ADDED Viewed

	@@ -0,0 +1,138 @@

+#!/usr/bin/env python
+# -*-coding:utf-8 -*-
+'''
+@Desc: This is the implementation of PAL-B
+'''
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from transformers import AutoModel, AutoConfig
+from .connector import Connector
+from .tensor_initializer import TensorInitializer
+from .custom_sfx import CustomSoftMax
+from .itemLearner import ItemLearner
+from .userLearner import UserLearner
+from collections import defaultdict
+from typing import Literal, Optional, Tuple
+import logging
+logger = logging.getLogger(__name__)
+class BasePrefLearner(nn.Module):
+    def __init__(
+        self,
+        d_hid: int,
+        d_pref: int,
+        k: int,
+        llm_name: str,
+        pref_learner_type: Literal["dist","dist_normalization","angle","norm","dist_logistic","angle_hinge"],
+        proj_arch: str,
+        initializer_type: Literal["gaussian"],
+        is_expectation_norm_init: bool, # the tensor initialization parameters
+        sfx_type: Literal["gumbel_softmax", "softmax"],
+        sfx_temperature: float,
+        is_temperature_learnable: bool,
+        is_gumbel_hard: Optional[bool]=None,
+        is_partition: bool=False,
+        seed: int=42,
+        **kwargs
+    ):
+        super().__init__()
+        self.pref_learner_type = pref_learner_type
+        self.is_temperature_learnable = is_temperature_learnable
+        # init all necessary modules
+        model_config = AutoConfig.from_pretrained(llm_name)
+        self.llm = AutoModel.from_pretrained(llm_name,from_tf=bool(".ckpt" in llm_name),config=model_config)
+        self.tensor_initializer = TensorInitializer(initializer_type, seed, is_expectation_norm_init=is_expectation_norm_init)
+        self.projector_f = Connector(cnct_arch=proj_arch,in_dims=d_hid,out_dims=d_pref)
+        self.projectors_gk = [Connector(cnct_arch=proj_arch,in_dims=d_hid,out_dims=d_pref) for _ in range(k)]
+        self.logit_scale = nn.Parameter(torch.ones([]) * np.log(1 / 0.07))
+        self.softmax_w = CustomSoftMax(sfx_type=sfx_type,
+                                       temperature=sfx_temperature,
+                                       is_temperature_learnable=is_temperature_learnable,
+                                       is_gumbel_hard=is_gumbel_hard)
+        self.item_learner = ItemLearner(
+            llm = self.llm,
+            projector=self.projector_f,
+        )
+        self.is_partition = is_partition
+        self.user_learner = UserLearner(k=k, llm=self.llm, projectors=self.projectors_gk, softmax=self.softmax_w, is_partition=is_partition)
+        logger.critical('🛑 Remember to call update_trainable_params() after the model is initialized.')
+    def update_trainable_params(self, fix_modules: Tuple[str,...]=()):
+        # capture params
+        self.trainable_params = defaultdict(list)
+        if "llm" not in fix_modules:
+            self.trainable_params["llm"] = self.llm.parameters()
+        else:
+            self.llm.eval()
+        if "itemLearnerProjector" not in fix_modules:
+            self.trainable_params["projector_f"].extend(self.item_learner.projector.parameters())
+        if "userLearnerProjector" not in fix_modules:
+            self.trainable_params["projectors_gk"].extend(list(self.user_learner.projectors.parameters()))
+        if "W" not in fix_modules:
+            self.trainable_params["W"] = self.user_learner.W.parameters()
+        if self.pref_learner_type in ["angle","dist_logistic"] and "logit_scale" not in fix_modules:
+            self.trainable_params["logit_scale"] = self.logit_scale
+        if self.is_temperature_learnable and "temperature" not in fix_modules:
+            self.trainable_params["temperature"] = self.softmax_w.temperature
+    def map_to_pref_embedding_space(self, x, rm_cached=None):
+        # ({
+        # 'input_ids': prompt_input_ids,\
+        # 'attention_mask': prompt_attention_mask,
+        # },\
+        # {
+        # 'input_ids': eval_input_ids,\
+        # 'attention_mask': eval_attention_mask,\
+        # })
+        prompt, items = x
+        if rm_cached is None:
+            items_prime = self.item_learner(items)
+            prompt_prime = self.user_learner(prompt)
+            return items_prime, prompt_prime
+        else:
+            items_prime, rm_cached = self.item_learner(items, rm_cached)
+            prompt_prime, rm_cached = self.user_learner(prompt, rm_cached)
+            return items_prime, prompt_prime, rm_cached
+class PrefLearner(BasePrefLearner):   # <f(x),f(u)>
+    def __init__(self,*args, **kwargs):
+        super().__init__(*args, **kwargs)
+    def forward(self, x, rm_cached=None):
+        items, prompt = x
+        if rm_cached is None:
+            items_prime, prompt_prime = self.map_to_pref_embedding_space((prompt, items))
+        else:
+            items_prime, prompt_prime, rm_cached = self.map_to_pref_embedding_space((prompt, items), rm_cached)
+        logger.info(f"{items_prime[0]=}")
+        logger.info(f"{prompt_prime[0]=}")
+        logger.info(f"{items_prime.shape=}")
+        logger.info(f"{prompt_prime.shape=}")
+        if self.pref_learner_type == 'angle':
+            prompt_last_prime = prompt_prime[:, -1, :]
+            prompt_last_prime = prompt_last_prime.unsqueeze(1)
+            prompt_last_prime = prompt_last_prime / torch.norm(prompt_last_prime, dim=-1, keepdim=True)
+            items_last_prime = items_prime[:, -1, :]
+            items_last_prime = items_last_prime.unsqueeze(1)
+            items_last_prime = items_last_prime / torch.norm(items_last_prime, dim=-1, keepdim=True)
+            logit_scale = self.logit_scale.exp()
+            clamped_logit_scale = torch.clamp(logit_scale, max=100)
+            logger.info(f"{prompt_last_prime.shape=}")
+            logger.info(f"{items_last_prime.shape=}")
+            sim_score = (prompt_last_prime * items_last_prime).sum(dim=-1) * clamped_logit_scale   # (bs, max_token_length)
+            if rm_cached is None:
+                return sim_score
+            else:
+                return sim_score, rm_cached
+        else:
+            raise NotImplementedError

modeling_pal_b_rm.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from transformers import PreTrainedModel
+from .learner import PrefLearner
+from .configuration_pal_b_rm import PAL_B_Config
+class PAL_B_RM_opt(PreTrainedModel):
+    config_class = PAL_B_Config
+    def __init__(self, config):
+        super().__init__(config)
+        self.model = PrefLearner(
+            d_hid=config.d_hid,
+            d_pref=config.d_pref,
+            k=config.k,
+            llm_name=config.llm_name,
+            pref_learner_type=config.pref_learner_type,
+            proj_arch=config.proj_arch,
+            initializer_type=config.initializer_type,
+            is_expectation_norm_init=config.is_expectation_norm_init,
+            sfx_type=config.sfx_type,
+            sfx_temperature=config.sfx_temperature,
+            is_temperature_learnable=config.is_temperature_learnable,
+            is_gumbel_hard=config.is_gumbel_hard,
+        )
+        # self.model.user_learner.init_weight(uids)
+    def forward(self, x):
+        logits = self.model(x)
+        return {'logits': logits}

projector.py ADDED Viewed

	@@ -0,0 +1,81 @@

+from typing import Sequence
+import torch.nn as nn
+import torch
+class Projector(nn.Module):
+    in_dims: int
+    out_dims: int
+    latent_dims: Sequence[int]
+    bias: bool
+    dropout_p: float
+    activation: str
+    identity_map: bool
+    use_batchnorm: bool
+    def __init__(
+        self,
+        in_dims: int,
+        out_dims: int,
+        latent_dims: Sequence[int] = tuple([]),
+        bias: bool = True,
+        dropout_p: float = 0.2,
+        activation:str='relu',
+        identity_map=False,
+        use_batchnorm: bool = False,
+    ):
+        super().__init__()
+        self.in_dims = in_dims
+        self.out_dims = out_dims
+        self.bias = bias
+        self.dropout_p = dropout_p
+        self.latent_dims = latent_dims
+        self.act = None
+        self.identity_map = identity_map
+        self.use_batchnorm = use_batchnorm
+        if activation == 'relu':
+            self.act = nn.ReLU
+        elif activation == 'gelu':
+            self.act = nn.GELU
+        elif activation == 'linear':
+            self.act = nn.Identity
+        else:
+            raise ValueError(f'no such activation {activation}')
+        if identity_map == True:
+            self.identity = nn.Identity()
+            # self.alpha = nn.Parameter(torch.tensor(0.5))
+        layer_dims = [in_dims] + list(latent_dims)
+        layers = []
+        for i in range(len(layer_dims) - 1):
+            layers.append(nn.Linear(layer_dims[i], layer_dims[i + 1], bias=self.bias))
+            if self.use_batchnorm:  # Add batch normalization layer if enabled
+                layers.append(nn.BatchNorm1d(layer_dims[i + 1]))
+            layers.extend([
+                nn.Dropout(p=self.dropout_p),
+                self.act()
+            ])
+        layers.append(nn.Linear(layer_dims[-1], out_dims, bias=self.bias))
+        self.layers = nn.Sequential(*layers)
+    def forward(self, x) -> torch.Tensor:
+        """Forward pass of the projector model.
+        Args:
+            x: The input features.
+        Returns:
+            torch.Tensor: The projected features.
+        """
+        if self.identity_map:
+            x = self.identity(x) + self.layers(x)
+        else:
+            x = self.layers(x)
+        return x

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ac37d5858537b0f20041627543ee7c4022bbf8e069e1941f8dc40d6c282425bb
 size 1334487698

 version https://git-lfs.github.com/spec/v1
+oid sha256:770877c170b8b51c6e6555de658213f0a6a1fca5c74370f1b8fed47cf6411bac
 size 1334487698

tensor_initializer.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import numpy as np
+import torch
+class TensorInitializer:
+    def __init__(self, type: str, seed: int, is_expectation_norm_init: bool = False):
+        self.initializer_type = type
+        self.rng = np.random.default_rng(seed)
+        self.is_expectation_norm_init = is_expectation_norm_init
+    def gaussian_initializer(
+            self,
+            dim: int,
+            size: int,
+        ) -> torch.Tensor:
+        mean = np.zeros(dim)
+        if self.is_expectation_norm_init:
+            # expectation normalization
+            cov = 1 / dim * np.eye(dim)
+            return torch.tensor(self.rng.multivariate_normal(mean, cov, size), dtype=torch.float32)#.float()
+        else:
+            # enforced normalization
+            cov = np.eye(dim)
+            unnorm_tensor = torch.tensor(self.rng.multivariate_normal(mean, cov, size), dtype=torch.float32)#.float()
+            return unnorm_tensor / torch.norm(unnorm_tensor, dim=1, keepdim=True)
+    def __call__(self, *args, **kwargs):
+        if self.initializer_type == 'gaussian':
+            return self.gaussian_initializer(*args, **kwargs)
+        else:
+            raise ValueError(f'Unknown initializer type: {self.initializer_type}')

tensor_merger.py ADDED Viewed

	@@ -0,0 +1,16 @@

+import torch
+class TensorMerger:
+    def __init__(self, merger_type) -> None:
+        self.merger_type = merger_type
+    def concat(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+        return torch.cat([x, y], dim=1)
+    def __call__(self, x: torch.Tensor, y: torch.Tensor):
+        if self.merger_type == 'concat':
+            return self.concat(x,y)
+        else:
+            raise ValueError(f'Unknown merger type: {self.merger_type}')

userLearner.py ADDED Viewed

	@@ -0,0 +1,151 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from .connector import Connector
+from .projector import Projector
+from .tensor_initializer import TensorInitializer
+from .custom_sfx import CustomSoftMax
+import numpy as np
+import warnings
+from typing import Literal
+import logging
+logger = logging.getLogger(__name__)
+class UserLearner(nn.Module):
+    k: int      # the number of groups
+    llm: nn.Module
+    projectors: list[Projector]
+    u_id_set: set
+    softmax: nn.Module
+    is_partition: bool
+    def __init__(
+        self,
+        k: int,
+        llm: nn.Module,
+        projectors: list[Projector],
+        softmax: nn.Module,
+        is_partition: bool=False,
+    ):
+        super().__init__()
+        self.k = k
+        self.llm = llm
+        self.softmax = softmax
+        # init user_id registration table and user weights dictionary
+        self.u_id_set = set()
+        self.W = nn.ParameterDict()
+        self.tmp_store_user_ideal_points = None
+        # register all k projectors in the moduledict
+        assert len(projectors) == k, f"The num of projectors should match up with num of groups: {k} != {len(projectors)}"
+        self.projectors = nn.ModuleDict()
+        for i in range(k):
+            self.projectors[str(i)] = projectors[i]
+        self.is_partition = is_partition
+    def init_weight(self, u_ids:list, reinit:bool=False):
+        for u_id in u_ids:
+            if u_id not in self.u_id_set or reinit:
+                self.W[u_id] = nn.Parameter(
+                    torch.randn((self.k), dtype=torch.float32),
+                    requires_grad=True,
+                ).to(next(self.projectors[str(0)].parameters()).device)
+                self.u_id_set.add(u_id)
+            else:
+                logger.warning('👋 wait? same user?')
+    def get_sfx_w(self, u_ids:list):
+        w = torch.stack([self.W[key] for key in u_ids], dim=0)   # (bs, k)
+        w = self.softmax(w)
+        return w
+    def get_hardmax_w(self, u_ids:list):
+        w = torch.stack([self.W[key] for key in u_ids], dim=0)
+        w = F.one_hot(w.argmax(dim=1), num_classes=self.k).float()  # (bs, k)
+        return w
+    def infer_gk(self, prompt_tokens, rm_cached=None):
+        '''
+        prompt_tokens: {'input_ids': torch.tensor, 'attention_mask': torch.tensor}
+        If you want to activate rm_cached, please pass in the rm_cached dict or empty dict.
+        '''
+        input_ids = prompt_tokens['input_ids']
+        attention_mask = prompt_tokens['attention_mask']
+        if rm_cached is None:
+            embeds = self.llm(
+                input_ids=input_ids,
+                attention_mask=attention_mask,
+            ).last_hidden_state
+        else:
+            res = self.llm(
+                input_ids=input_ids[:, -1:],
+                # attention_mask=attention_mask,
+                past_key_values=rm_cached["user_learner"],
+                use_cache=True
+            )
+            rm_cached["user_learner"] = res.past_key_values
+            embeds = res.last_hidden_state
+        # embeds shape: (bs, seq_len, hid_dim)
+        shape = embeds.shape
+        embeds = embeds.view(-1, shape[-1])  # (bs*seq_len, hid_dim)
+        # g(embeds) shape: (bs*seq_len, hid_dim) -> (bs*seq_len, pref_dim)
+        logits = torch.stack([g(embeds).view(shape[0], shape[1], -1) for g in self.projectors.values()],dim=1)
+        if rm_cached is None:
+            return logits
+        else:
+            return logits, rm_cached   # (bs, k, seq_len, hidden_size)
+    def return_user_ideal_points(self):
+        if self.tmp_store_user_ideal_points == None:
+            raise ValueError('No user ideal points stored')
+        return self.tmp_store_user_ideal_points
+    def forward(self, prompt_tokens, rm_cached=None):    # only pass the prompt tokens
+        '''
+        prompt_tokens: {'input_ids': torch.tensor, 'attention_mask': torch.tensor}
+        '''
+        if rm_cached is None:
+            prompt_logits = self.infer_gk(prompt_tokens)
+        else:
+            prompt_logits, rm_cached = self.infer_gk(prompt_tokens, rm_cached)
+        bs = prompt_tokens['input_ids'].size(0)
+        assert sum(mix_weight) == 1
+        # w = self.softmax(mix_weight.repeat(bs, 1))
+        w = mix_weight.repeat(bs, 1)
+        logger.info(f"{w=}")
+        logger.info(f"{w.shape=}")
+        w = w.unsqueeze(-1).unsqueeze(-1)
+        y_hat = (w * prompt_logits).sum(dim=1)
+        self.tmp_store_user_ideal_points = y_hat
+        return y_hat, rm_cached
+    def eval(self):
+        super().eval()
+        if self.is_partition:
+            warnings.warn("🤖 UserPromptLearner(Partition version) is in eval mode: argmax")
+            self.is_argmax = True
+        else:
+            warnings.warn("🤖 UserPromptLearner(Mixture version) is in eval mode: sfx")
+            self.is_argmax = False
+    def train(self, mode: bool = True):
+        super().train(mode)
+        if mode:
+            if self.is_partition:
+                warnings.warn("🤖 UserPromptLearner(Partition version) is in train mode: sfx")
+                self.is_argmax = False
+            else:
+                warnings.warn("🤖 UserPromptLearner(Mixture version) is in train mode: sfx")
+                self.is_argmax = False
+        else:
+            if self.is_partition:
+                warnings.warn("🤖 UserPromptLearner(Partition version) is in eval mode: argmax")
+                self.is_argmax = True
+            else:
+                warnings.warn("🤖 UserPromptLearner(Mixture version) is in eval mode: sfx")
+                self.is_argmax = False