DongfuJiang commited on
Commit
275bf8b
·
verified ·
1 Parent(s): 180714c

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +74 -154
README.md CHANGED
@@ -39,178 +39,98 @@ We introduce AceCoder, the first work to propose a fully automated pipeline for
39
  - To use the RM to produce rewards, please apply the following example codes:
40
 
41
  ```python
42
- import torch
43
- import torch.nn as nn
44
- from transformers import Qwen2ForCausalLM, AutoTokenizer
45
- class ValueHead(nn.Module):
46
- r"""
47
- The ValueHead class implements a head for GPT2 that returns a scalar for each output token.
48
- """
49
-
50
- def __init__(self, config, **kwargs):
51
- super().__init__()
52
- if not hasattr(config, "summary_dropout_prob"):
53
- summary_dropout_prob = kwargs.pop("summary_dropout_prob", 0.1)
54
- else:
55
- summary_dropout_prob = config.summary_dropout_prob
56
-
57
- self.dropout = (
58
- nn.Dropout(summary_dropout_prob) if summary_dropout_prob else nn.Identity()
59
- )
60
-
61
- # some models such as OPT have a projection layer before the word embeddings - e.g. OPT-350m
62
- if hasattr(config, "hidden_size"):
63
- hidden_size = config.hidden_size
64
- if hasattr(config, "word_embed_proj_dim"):
65
- hidden_size = config.word_embed_proj_dim
66
- elif hasattr(config, "is_encoder_decoder"):
67
- if config.is_encoder_decoder and hasattr(config, "decoder"):
68
- if hasattr(config.decoder, "hidden_size"):
69
- hidden_size = config.decoder.hidden_size
70
-
71
- self.summary = nn.Linear(hidden_size, 1)
72
-
73
- self.flatten = nn.Flatten()
74
-
75
- def forward(self, hidden_states):
76
- output = self.dropout(hidden_states)
77
-
78
- # For now force upcast in fp32 if needed. Let's keep the
79
- # output in fp32 for numerical stability.
80
- if output.dtype != self.summary.weight.dtype:
81
- output = output.to(self.summary.weight.dtype)
82
-
83
- output = self.summary(output)
84
- return output
85
-
86
-
87
- class Qwen2ForCausalRM(Qwen2ForCausalLM):
88
- def __init__(self, config):
89
- super().__init__(config)
90
- self.v_head = ValueHead(config)
91
-
92
- def forward(
93
- self,
94
- input_ids=None,
95
- past_key_values=None,
96
- attention_mask=None,
97
- return_past_key_values=False,
98
- **kwargs,
99
- ):
100
- r"""
101
- Applies a forward pass to the wrapped model and returns the logits of the value head.
102
-
103
- Args:
104
- input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
105
- Indices of input sequence tokens in the vocabulary.
106
- past_key_values (`tuple(tuple(torch.FloatTensor))`, `optional`):
107
- Contains pre-computed hidden-states (key and values in the attention blocks) as computed by the model
108
- (see `past_key_values` input) to speed up sequential decoding.
109
- attention_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, `optional`):
110
- Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``:
111
- - 1 for tokens that are **not masked**,
112
- - 0 for tokens that are **masked**.
113
- return_past_key_values (bool): A flag indicating if the computed hidden-states should be returned.
114
- kwargs (`dict`, `optional`):
115
- Additional keyword arguments, that are passed to the wrapped model.
116
- """
117
- kwargs["output_hidden_states"] = (
118
- True # this had already been set in the LORA / PEFT examples
119
- )
120
- kwargs["past_key_values"] = past_key_values
121
-
122
- # if (
123
- # self.is_peft_model
124
- # and
125
- # self.pretrained_model.active_peft_config.peft_type == "PREFIX_TUNING"
126
- # ):
127
- # kwargs.pop("past_key_values")
128
-
129
- base_model_output = super().forward(
130
- input_ids=input_ids,
131
- attention_mask=attention_mask,
132
- **kwargs,
133
- )
134
-
135
- last_hidden_state = base_model_output.hidden_states[-1]
136
- lm_logits = base_model_output.logits
137
- loss = base_model_output.loss
138
-
139
- if last_hidden_state.device != self.v_head.summary.weight.device:
140
- last_hidden_state = last_hidden_state.to(self.v_head.summary.weight.device)
141
-
142
- value = self.v_head(last_hidden_state).squeeze(-1)
143
-
144
- # force upcast in fp32 if logits are in half-precision
145
- if lm_logits.dtype != torch.float32:
146
- lm_logits = lm_logits.float()
147
-
148
- if return_past_key_values:
149
- return (lm_logits, loss, value, base_model_output.past_key_values)
150
- else:
151
- return (lm_logits, loss, value)
152
 
153
  model_path = "TIGER-Lab/AceCodeRM-7B"
154
  model = Qwen2ForCausalRM.from_pretrained(model_path, device_map="auto")
155
  tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
156
- program_correct = """def runningSum(nums):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  result = []
158
  current_sum = 0
159
  for num in nums:
160
  current_sum += num
161
  result.append(current_sum)
162
- return result"""
163
- program_incorrect = """def runningSum(nums):
164
- result = []
165
- for i in range(len(nums)):
166
- if i == 0:
167
- result.append(nums[i])
168
- else:
169
- result.append(nums[i] + nums[i-1])
170
- return result"""
171
- input_chat = [
172
- [
173
- [
174
- {
175
- "content": question,
176
- "role": "user",
177
- },
178
- {
179
- "role": "assistant",
180
- "content": program_correct,
181
- },
182
- ],
183
- [
184
- {
185
- "content": question,
186
- "role": "user",
187
- },
188
- {
189
- "role": "assistant",
190
- "content": program_incorrect,
191
- },
192
- ],
193
- ]
194
- ]
195
  input_tokens = tokenizer.apply_chat_template(
196
- input_chat,
197
- tokenize=True,
198
- return_dict=True,
199
- padding=True,
200
- return_tensors="pt",
201
- ).to(model.device)
 
202
  _, _, values = model(
203
  **input_tokens,
204
  output_hidden_states=True,
205
  return_dict=True,
206
- use_cache=False,
207
  )
208
  masks = input_tokens["attention_mask"]
209
- chosen_scores = values.gather(
210
  dim=-1, index=(masks.sum(dim=-1, keepdim=True) - 1)
211
  ) # find the last token (eos) in each sequence, a
212
- chosen_scores = chosen_scores.squeeze()
213
- print(chosen_scores)
 
 
 
 
 
214
  ```
215
 
216
 
 
39
  - To use the RM to produce rewards, please apply the following example codes:
40
 
41
  ```python
42
+ """pip install git+https://github.com/TIGER-AI-Lab/AceCoder"""
43
+ from acecoder import Qwen2ForCausalRM
44
+ from transformers import AutoTokenizer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
  model_path = "TIGER-Lab/AceCodeRM-7B"
47
  model = Qwen2ForCausalRM.from_pretrained(model_path, device_map="auto")
48
  tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
49
+
50
+ question = """\
51
+ Given an array of numbers, write a function runningSum that returns an array where each element at index i is the sum of all elements from index 0 to i (inclusive).
52
+ For example:
53
+ Input: nums = [1,2,3,4]
54
+ Output: [1,3,6,10]
55
+ """
56
+
57
+ program_with_3_errors = """\
58
+ def runningSum(nums):
59
+ result = []
60
+ current_sum = 0
61
+ for i in range(1, len(nums)):
62
+ result.append(nums[i])
63
+ current_sum += nums[i]
64
+ return result
65
+ """
66
+
67
+ program_with_2_errors = """\
68
+ def runningSum(nums):
69
+ result = []
70
+ current_sum = 0
71
+ for i in range(0, len(nums)):
72
+ result.append(nums[i])
73
+ current_sum += nums[i]
74
+ return result
75
+ """
76
+
77
+ program_with_1_errors = """\
78
+ def runningSum(nums):
79
+ result = []
80
+ current_sum = 0
81
+ for i in range(0, len(nums)):
82
+ result.append(current_sum)
83
+ current_sum += nums[i]
84
+ return result
85
+ """
86
+ program_correct = """\
87
+ def runningSum(nums):
88
  result = []
89
  current_sum = 0
90
  for num in nums:
91
  current_sum += num
92
  result.append(current_sum)
93
+ return result
94
+ """
95
+
96
+ program_chats = [
97
+ [
98
+ {
99
+ "content": question,
100
+ "role": "user",
101
+ },
102
+ {
103
+ "role": "assistant",
104
+ "content": program
105
+ }
106
+ ] for program in [program_with_3_errors, program_with_2_errors, program_with_1_errors, program_correct]
107
+ ]
108
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  input_tokens = tokenizer.apply_chat_template(
110
+ program_chats,
111
+ tokenize=True,
112
+ return_dict=True,
113
+ padding=True,
114
+ return_tensors="pt",
115
+ ).to(model.device)
116
+
117
  _, _, values = model(
118
  **input_tokens,
119
  output_hidden_states=True,
120
  return_dict=True,
121
+ use_cache=False,
122
  )
123
  masks = input_tokens["attention_mask"]
124
+ rm_scores = values.gather(
125
  dim=-1, index=(masks.sum(dim=-1, keepdim=True) - 1)
126
  ) # find the last token (eos) in each sequence, a
127
+ rm_scores = rm_scores.squeeze()
128
+
129
+ print("RM Scores:", rm_scores)
130
+ print("Score of program with 3 errors:", rm_scores[0].item())
131
+ print("Score of program with 2 errors:", rm_scores[1].item())
132
+ print("Score of program with 1 errors:", rm_scores[2].item())
133
+ print("Score of correct program:", rm_scores[3].item())
134
  ```
135
 
136