airevo2 commited on
Commit
59e22b7
·
1 Parent(s): 5867a45

自定义文件

Browse files
__init__.py CHANGED
@@ -1,6 +1,6 @@
1
  # 导出自定义配置和模型类
2
- from .configuration_densebackward_olmoe0125 import DenseBackwardOLMoEConfig
3
- from .modeling_densebackward_olmoe0125 import DenseBackwardOLMoEForCausalLM, DenseBackwardOlmoeSparseMoeBlock
4
 
5
  # 显式注册模型类型
6
  from transformers.models.auto.configuration_auto import CONFIG_MAPPING
 
1
  # 导出自定义配置和模型类
2
+ from .configuration_densebackward_olmoe0125_v1 import DenseBackwardOLMoEConfig
3
+ from .modeling_densebackward_olmoe0125_v1 import DenseBackwardOLMoEForCausalLM, DenseBackwardOlmoeSparseMoeBlock
4
 
5
  # 显式注册模型类型
6
  from transformers.models.auto.configuration_auto import CONFIG_MAPPING
config.json CHANGED
@@ -4,9 +4,9 @@
4
  "DenseBackwardOLMoEForCausalLM"
5
  ],
6
  "auto_map": {
7
- "AutoConfig": "configuration_densebackward_olmoe0125.DenseBackwardOLMoEConfig",
8
- "AutoModel": "modeling_densebackward_olmoe0125.DenseBackwardOLMoEForCausalLM",
9
- "AutoModelForCausalLM": "modeling_densebackward_olmoe0125.DenseBackwardOLMoEForCausalLM"
10
  },
11
  "attention_bias": false,
12
  "attention_dropout": 0.0,
 
4
  "DenseBackwardOLMoEForCausalLM"
5
  ],
6
  "auto_map": {
7
+ "AutoConfig": "configuration_densebackward_olmoe0125_v1.DenseBackwardOLMoEConfig",
8
+ "AutoModel": "modeling_densebackward_olmoe0125_v1.DenseBackwardOLMoEForCausalLM",
9
+ "AutoModelForCausalLM": "modeling_densebackward_olmoe0125_v1.DenseBackwardOLMoEForCausalLM"
10
  },
11
  "attention_bias": false,
12
  "attention_dropout": 0.0,
configuration_densebackward_olmoe0125.py → configuration_densebackward_olmoe0125_v1.py RENAMED
File without changes
modeling_densebackward_olmoe0125.py → modeling_densebackward_olmoe0125_v1.py RENAMED
@@ -6,7 +6,7 @@ import torch.nn.functional as F
6
 
7
  # 导入官方实现(注意根据你的 transformers 版本调整导入路径)
8
  from transformers.models.olmoe.modeling_olmoe import OlmoeForCausalLM, OlmoeSparseMoeBlock, OlmoeMLP
9
- from .configuration_densebackward_olmoe0125 import DenseBackwardOLMoEConfig
10
 
11
 
12
  class DenseBackwardOlmoeSparseMoeBlock(OlmoeSparseMoeBlock):
@@ -77,6 +77,10 @@ class DenseBackwardOlmoeSparseMoeBlock(OlmoeSparseMoeBlock):
77
  # 使用所有专家的输出和路由权重计算密集输出
78
  routing_weights_expanded = routing_weights.unsqueeze(-1) # (N_tokens, num_experts, 1)
79
  routing_weights_expanded = routing_weights_expanded.to(dtype=dtype)
 
 
 
 
80
  dense_outputs = (all_expert_outputs * routing_weights_expanded).sum(dim=1) # (N_tokens, hidden_dim)
81
 
82
  # ---------- 组合稀疏前向和密集反向 ----------
 
6
 
7
  # 导入官方实现(注意根据你的 transformers 版本调整导入路径)
8
  from transformers.models.olmoe.modeling_olmoe import OlmoeForCausalLM, OlmoeSparseMoeBlock, OlmoeMLP
9
+ from .configuration_densebackward_olmoe0125_v1 import DenseBackwardOLMoEConfig
10
 
11
 
12
  class DenseBackwardOlmoeSparseMoeBlock(OlmoeSparseMoeBlock):
 
77
  # 使用所有专家的输出和路由权重计算密集输出
78
  routing_weights_expanded = routing_weights.unsqueeze(-1) # (N_tokens, num_experts, 1)
79
  routing_weights_expanded = routing_weights_expanded.to(dtype=dtype)
80
+ print(expanded_weights.shape)
81
+ print("sparse",expanded_weights)
82
+ print(routing_weights_expanded.shape)
83
+ print("dense",routing_weights_expanded)
84
  dense_outputs = (all_expert_outputs * routing_weights_expanded).sum(dim=1) # (N_tokens, hidden_dim)
85
 
86
  # ---------- 组合稀疏前向和密集反向 ----------