autoprogrammer commited on
Commit
a71d5b3
·
verified ·
1 Parent(s): 81658fb

Update modeling_densebackward_olmoe0125.py

Browse files
Files changed (1) hide show
  1. modeling_densebackward_olmoe0125.py +31 -4
modeling_densebackward_olmoe0125.py CHANGED
@@ -152,21 +152,48 @@ class DenseBackwardOLMoEForCausalLM(OlmoeForCausalLM):
152
  base_model_prefix = "olmoe"
153
 
154
  def __init__(self, config):
 
155
  super().__init__(config)
 
 
 
 
 
 
 
 
 
 
 
 
156
  # 遍历模型中所有 decoder 层,替换每个 OlmoeSparseMoeBlock 为 DenseBackward 版本
157
  # 此处假设官方模型在 self.model.layers 中组织 decoder 层,
158
  # 且每层中 mlp 模块包含属性 sparse_moe_block。
159
  for layer in self.model.layers:
160
- if hasattr(layer.mlp, "sparse_moe_block"):
161
- orig_block = layer.mlp.sparse_moe_block
 
162
  # 通过直接复制原版属性创建新的块
163
  new_block = DenseBackwardOlmoeSparseMoeBlock(config) # 或其他适当参数
164
  # 然后手动复制需要共享的属性:
165
  new_block.gate = orig_block.gate
166
  new_block.experts = orig_block.experts
167
- new_block.router = orig_block.router
168
  new_block.num_experts = orig_block.num_experts
169
  new_block.top_k = orig_block.top_k
170
  new_block.norm_topk_prob = orig_block.norm_topk_prob
171
- layer.mlp.sparse_moe_block = new_block
 
172
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  base_model_prefix = "olmoe"
153
 
154
  def __init__(self, config):
155
+ # 首先调用父类初始化方法
156
  super().__init__(config)
157
+
158
+ # 不要尝试重新赋值self,而是从预训练模型加载并更新当前模型
159
+ pretrained_model = OlmoeForCausalLM.from_pretrained("allenai/OLMoE-1B-7B-0125")
160
+
161
+ # 复制预训练模型的状态到当前模型
162
+ self.config = pretrained_model.config
163
+ self.model = pretrained_model.model
164
+ self.vocab_size = pretrained_model.vocab_size
165
+ self.router_aux_loss_coef = pretrained_model.router_aux_loss_coef
166
+ self.num_experts = pretrained_model.num_experts
167
+ self.lm_head = pretrained_model.lm_head
168
+
169
  # 遍历模型中所有 decoder 层,替换每个 OlmoeSparseMoeBlock 为 DenseBackward 版本
170
  # 此处假设官方模型在 self.model.layers 中组织 decoder 层,
171
  # 且每层中 mlp 模块包含属性 sparse_moe_block。
172
  for layer in self.model.layers:
173
+ if hasattr(layer.mlp, "gate"):
174
+ print("111")
175
+ orig_block = layer.mlp
176
  # 通过直接复制原版属性创建新的块
177
  new_block = DenseBackwardOlmoeSparseMoeBlock(config) # 或其他适当参数
178
  # 然后手动复制需要共享的属性:
179
  new_block.gate = orig_block.gate
180
  new_block.experts = orig_block.experts
 
181
  new_block.num_experts = orig_block.num_experts
182
  new_block.top_k = orig_block.top_k
183
  new_block.norm_topk_prob = orig_block.norm_topk_prob
184
+ layer.mlp = new_block
185
+ print(type(layer.mlp))
186
 
187
+ def main():
188
+ config = DenseBackwardOLMoEConfig( # 官方模型参数
189
+ model_marker="DenseBackward_olmoe_marker",
190
+ )
191
+ # 创建自定义模型实例
192
+ model = DenseBackwardOLMoEForCausalLM(config)
193
+ print(type(model))
194
+ print(type(model.model))
195
+ print(type(model.model.layers[0]))
196
+ print(type(model.model.layers[0].mlp))
197
+ print(type(model.model.layers[0].mlp.experts))
198
+ if __name__ == "__main__":
199
+ main()