airevo2
commited on
Commit
·
59e22b7
1
Parent(s):
5867a45
自定义文件
Browse files
__init__.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
# 导出自定义配置和模型类
|
2 |
-
from .
|
3 |
-
from .
|
4 |
|
5 |
# 显式注册模型类型
|
6 |
from transformers.models.auto.configuration_auto import CONFIG_MAPPING
|
|
|
1 |
# 导出自定义配置和模型类
|
2 |
+
from .configuration_densebackward_olmoe0125_v1 import DenseBackwardOLMoEConfig
|
3 |
+
from .modeling_densebackward_olmoe0125_v1 import DenseBackwardOLMoEForCausalLM, DenseBackwardOlmoeSparseMoeBlock
|
4 |
|
5 |
# 显式注册模型类型
|
6 |
from transformers.models.auto.configuration_auto import CONFIG_MAPPING
|
config.json
CHANGED
@@ -4,9 +4,9 @@
|
|
4 |
"DenseBackwardOLMoEForCausalLM"
|
5 |
],
|
6 |
"auto_map": {
|
7 |
-
"AutoConfig": "
|
8 |
-
"AutoModel": "
|
9 |
-
"AutoModelForCausalLM": "
|
10 |
},
|
11 |
"attention_bias": false,
|
12 |
"attention_dropout": 0.0,
|
|
|
4 |
"DenseBackwardOLMoEForCausalLM"
|
5 |
],
|
6 |
"auto_map": {
|
7 |
+
"AutoConfig": "configuration_densebackward_olmoe0125_v1.DenseBackwardOLMoEConfig",
|
8 |
+
"AutoModel": "modeling_densebackward_olmoe0125_v1.DenseBackwardOLMoEForCausalLM",
|
9 |
+
"AutoModelForCausalLM": "modeling_densebackward_olmoe0125_v1.DenseBackwardOLMoEForCausalLM"
|
10 |
},
|
11 |
"attention_bias": false,
|
12 |
"attention_dropout": 0.0,
|
configuration_densebackward_olmoe0125.py → configuration_densebackward_olmoe0125_v1.py
RENAMED
File without changes
|
modeling_densebackward_olmoe0125.py → modeling_densebackward_olmoe0125_v1.py
RENAMED
@@ -6,7 +6,7 @@ import torch.nn.functional as F
|
|
6 |
|
7 |
# 导入官方实现(注意根据你的 transformers 版本调整导入路径)
|
8 |
from transformers.models.olmoe.modeling_olmoe import OlmoeForCausalLM, OlmoeSparseMoeBlock, OlmoeMLP
|
9 |
-
from .
|
10 |
|
11 |
|
12 |
class DenseBackwardOlmoeSparseMoeBlock(OlmoeSparseMoeBlock):
|
@@ -77,6 +77,10 @@ class DenseBackwardOlmoeSparseMoeBlock(OlmoeSparseMoeBlock):
|
|
77 |
# 使用所有专家的输出和路由权重计算密集输出
|
78 |
routing_weights_expanded = routing_weights.unsqueeze(-1) # (N_tokens, num_experts, 1)
|
79 |
routing_weights_expanded = routing_weights_expanded.to(dtype=dtype)
|
|
|
|
|
|
|
|
|
80 |
dense_outputs = (all_expert_outputs * routing_weights_expanded).sum(dim=1) # (N_tokens, hidden_dim)
|
81 |
|
82 |
# ---------- 组合稀疏前向和密集反向 ----------
|
|
|
6 |
|
7 |
# 导入官方实现(注意根据你的 transformers 版本调整导入路径)
|
8 |
from transformers.models.olmoe.modeling_olmoe import OlmoeForCausalLM, OlmoeSparseMoeBlock, OlmoeMLP
|
9 |
+
from .configuration_densebackward_olmoe0125_v1 import DenseBackwardOLMoEConfig
|
10 |
|
11 |
|
12 |
class DenseBackwardOlmoeSparseMoeBlock(OlmoeSparseMoeBlock):
|
|
|
77 |
# 使用所有专家的输出和路由权重计算密集输出
|
78 |
routing_weights_expanded = routing_weights.unsqueeze(-1) # (N_tokens, num_experts, 1)
|
79 |
routing_weights_expanded = routing_weights_expanded.to(dtype=dtype)
|
80 |
+
print(expanded_weights.shape)
|
81 |
+
print("sparse",expanded_weights)
|
82 |
+
print(routing_weights_expanded.shape)
|
83 |
+
print("dense",routing_weights_expanded)
|
84 |
dense_outputs = (all_expert_outputs * routing_weights_expanded).sum(dim=1) # (N_tokens, hidden_dim)
|
85 |
|
86 |
# ---------- 组合稀疏前向和密集反向 ----------
|