TIGER-Lab
/

AceCodeRM-7B

@@ -39,178 +39,98 @@ We introduce AceCoder, the first work to propose a fully automated pipeline for
 - To use the RM to produce rewards, please apply the following example codes:
 ```python
-import torch
-import torch.nn as nn
-from transformers import Qwen2ForCausalLM, AutoTokenizer
-class ValueHead(nn.Module):
-    r"""
-    The ValueHead class implements a head for GPT2 that returns a scalar for each output token.
-    """
-    def __init__(self, config, **kwargs):
-        super().__init__()
-        if not hasattr(config, "summary_dropout_prob"):
-            summary_dropout_prob = kwargs.pop("summary_dropout_prob", 0.1)
-        else:
-            summary_dropout_prob = config.summary_dropout_prob
-        self.dropout = (
-            nn.Dropout(summary_dropout_prob) if summary_dropout_prob else nn.Identity()
-        )
-        # some models such as OPT have a projection layer before the word embeddings - e.g. OPT-350m
-        if hasattr(config, "hidden_size"):
-            hidden_size = config.hidden_size
-        if hasattr(config, "word_embed_proj_dim"):
-            hidden_size = config.word_embed_proj_dim
-        elif hasattr(config, "is_encoder_decoder"):
-            if config.is_encoder_decoder and hasattr(config, "decoder"):
-                if hasattr(config.decoder, "hidden_size"):
-                    hidden_size = config.decoder.hidden_size
-        self.summary = nn.Linear(hidden_size, 1)
-        self.flatten = nn.Flatten()
-    def forward(self, hidden_states):
-        output = self.dropout(hidden_states)
-        # For now force upcast in fp32 if needed. Let's keep the
-        # output in fp32 for numerical stability.
-        if output.dtype != self.summary.weight.dtype:
-            output = output.to(self.summary.weight.dtype)
-        output = self.summary(output)
-        return output
-class Qwen2ForCausalRM(Qwen2ForCausalLM):
-    def __init__(self, config):
-        super().__init__(config)
-        self.v_head = ValueHead(config)
-    def forward(
-        self,
-        input_ids=None,
-        past_key_values=None,
-        attention_mask=None,
-        return_past_key_values=False,
-        **kwargs,
-    ):
-        r"""
-        Applies a forward pass to the wrapped model and returns the logits of the value head.
-        Args:
-            input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
-                Indices of input sequence tokens in the vocabulary.
-            past_key_values (`tuple(tuple(torch.FloatTensor))`, `optional`):
-                Contains pre-computed hidden-states (key and values in the attention blocks) as computed by the model
-                (see `past_key_values` input) to speed up sequential decoding.
-            attention_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, `optional`):
-                Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``:
-                - 1 for tokens that are **not masked**,
-                - 0 for tokens that are **masked**.
-            return_past_key_values (bool): A flag indicating if the computed hidden-states should be returned.
-            kwargs (`dict`, `optional`):
-                Additional keyword arguments, that are passed to the wrapped model.
-        """
-        kwargs["output_hidden_states"] = (
-            True  # this had already been set in the LORA / PEFT examples
-        )
-        kwargs["past_key_values"] = past_key_values
-        # if (
-        #     self.is_peft_model
-        #     and
-        #     self.pretrained_model.active_peft_config.peft_type == "PREFIX_TUNING"
-        # ):
-        #     kwargs.pop("past_key_values")
-        base_model_output = super().forward(
-            input_ids=input_ids,
-            attention_mask=attention_mask,
-            **kwargs,
-        )
-        last_hidden_state = base_model_output.hidden_states[-1]
-        lm_logits = base_model_output.logits
-        loss = base_model_output.loss
-        if last_hidden_state.device != self.v_head.summary.weight.device:
-            last_hidden_state = last_hidden_state.to(self.v_head.summary.weight.device)
-        value = self.v_head(last_hidden_state).squeeze(-1)
-        # force upcast in fp32 if logits are in half-precision
-        if lm_logits.dtype != torch.float32:
-            lm_logits = lm_logits.float()
-        if return_past_key_values:
-            return (lm_logits, loss, value, base_model_output.past_key_values)
-        else:
-            return (lm_logits, loss, value)
 model_path = "TIGER-Lab/AceCodeRM-7B"
 model = Qwen2ForCausalRM.from_pretrained(model_path, device_map="auto")
 tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
-program_correct = """def runningSum(nums):
     result = []
     current_sum = 0
     for num in nums:
         current_sum += num
         result.append(current_sum)
-    return result"""
-program_incorrect = """def runningSum(nums):
-    result = []
-    for i in range(len(nums)):
-        if i == 0:
-            result.append(nums[i])
-        else:
-            result.append(nums[i] + nums[i-1])
-    return result"""
-input_chat = [
-        [
-            [
-                {
-                    "content": question,
-                    "role": "user",
-                },
-                {
-                    "role": "assistant",
-                    "content": program_correct,
-                },
-            ],
-            [
-                {
-                    "content": question,
-                    "role": "user",
-                },
-                {
-                    "role": "assistant",
-                    "content": program_incorrect,
-                },
-            ],
-        ]
-    ]
 input_tokens = tokenizer.apply_chat_template(
-        input_chat,
-        tokenize=True,
-        return_dict=True,
-        padding=True,
-        return_tensors="pt",
-    ).to(model.device)
 _, _, values = model(
     **input_tokens,
     output_hidden_states=True,
     return_dict=True,
-    use_cache=False,
 )
 masks = input_tokens["attention_mask"]
-chosen_scores = values.gather(
     dim=-1, index=(masks.sum(dim=-1, keepdim=True) - 1)
 ) # find the last token (eos) in each sequence, a
-chosen_scores = chosen_scores.squeeze()
-print(chosen_scores)
 ```

 - To use the RM to produce rewards, please apply the following example codes:
 ```python
+"""pip install git+https://github.com/TIGER-AI-Lab/AceCoder"""
+from acecoder import Qwen2ForCausalRM
+from transformers import AutoTokenizer
 model_path = "TIGER-Lab/AceCodeRM-7B"
 model = Qwen2ForCausalRM.from_pretrained(model_path, device_map="auto")
 tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
+question = """\
+Given an array of numbers, write a function runningSum that returns an array where each element at index i is the sum of all elements from index 0 to i (inclusive).
+For example:
+Input: nums = [1,2,3,4]
+Output: [1,3,6,10]
+"""
+program_with_3_errors = """\
+def runningSum(nums):
+    result = []
+    current_sum = 0
+    for i in range(1, len(nums)):
+        result.append(nums[i])
+        current_sum += nums[i]
+    return result
+"""
+program_with_2_errors = """\
+def runningSum(nums):
+    result = []
+    current_sum = 0
+    for i in range(0, len(nums)):
+        result.append(nums[i])
+        current_sum += nums[i]
+    return result
+"""
+program_with_1_errors = """\
+def runningSum(nums):
+    result = []
+    current_sum = 0
+    for i in range(0, len(nums)):
+        result.append(current_sum)
+        current_sum += nums[i]
+    return result
+"""
+program_correct = """\
+def runningSum(nums):
     result = []
     current_sum = 0
     for num in nums:
         current_sum += num
         result.append(current_sum)
+    return result
+"""
+program_chats = [
+    [
+        {
+            "content": question,
+            "role": "user",
+        },
+        {
+            "role": "assistant",
+            "content": program
+        }
+    ] for program in [program_with_3_errors, program_with_2_errors, program_with_1_errors, program_correct]
+]
 input_tokens = tokenizer.apply_chat_template(
+    program_chats,
+    tokenize=True,
+    return_dict=True,
+    padding=True,
+    return_tensors="pt",
+).to(model.device)
 _, _, values = model(
     **input_tokens,
     output_hidden_states=True,
     return_dict=True,
+    use_cache=False,
 )
 masks = input_tokens["attention_mask"]
+rm_scores = values.gather(
     dim=-1, index=(masks.sum(dim=-1, keepdim=True) - 1)
 ) # find the last token (eos) in each sequence, a
+rm_scores = rm_scores.squeeze()
+print("RM Scores:", rm_scores)
+print("Score of program with 3 errors:", rm_scores[0].item())
+print("Score of program with 2 errors:", rm_scores[1].item())
+print("Score of program with 1 errors:", rm_scores[2].item())
+print("Score of correct program:", rm_scores[3].item())
 ```