QscQ commited on
Commit
3a77662
·
1 Parent(s): b620f7e

change files

Browse files
Files changed (2) hide show
  1. config.json +8 -11
  2. model.safetensors.index.json +0 -0
config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "architectures": [
3
- "MiniMaxText01ForCausalLM"
4
  ],
5
  "attention_dropout": 0.0,
6
  "attn_type_list": [
@@ -86,24 +86,21 @@
86
  1
87
  ],
88
  "auto_map": {
89
- "AutoConfig": "configuration_minimax_text_01.MiniMaxText01Config",
90
- "AutoModelForCausalLM": "modeling_minimax_text_01.MiniMaxText01ForCausalLM"
91
  },
92
- "bos_token_id": null,
93
- "eos_token_id": null,
94
  "head_dim": 128,
95
  "hidden_act": "silu",
96
  "hidden_size": 6144,
97
  "initializer_range": 0.02,
98
  "intermediate_size": 9216,
99
  "layernorm_full_attention_alpha": 3.5565588200778455,
100
- "layernorm_full_attention_beta": 1.0,
101
  "layernorm_linear_attention_alpha": 3.5565588200778455,
102
- "layernorm_linear_attention_beta": 1.0,
103
  "layernorm_mlp_alpha": 3.5565588200778455,
104
- "layernorm_mlp_beta": 1.0,
105
- "max_position_embeddings": 10240000,
106
- "model_type": "minimax_text_01",
107
  "num_attention_heads": 64,
108
  "num_experts_per_tok": 2,
109
  "num_hidden_layers": 80,
@@ -120,7 +117,7 @@
120
  "shared_moe_mode": "sigmoid",
121
  "sliding_window": null,
122
  "tie_word_embeddings": false,
123
- "transformers_version": "4.45.2",
124
  "use_cache": true,
125
  "vocab_size": 200064
126
  }
 
1
  {
2
  "architectures": [
3
+ "AbabForCausalLM"
4
  ],
5
  "attention_dropout": 0.0,
6
  "attn_type_list": [
 
86
  1
87
  ],
88
  "auto_map": {
89
+ "AutoConfig": "configuration_abab.AbabConfig",
90
+ "AutoModelForCausalLM": "modeling_abab.AbabForCausalLM"
91
  },
92
+ "bos_token_id": 1,
93
+ "eos_token_id": 2,
94
  "head_dim": 128,
95
  "hidden_act": "silu",
96
  "hidden_size": 6144,
97
  "initializer_range": 0.02,
98
  "intermediate_size": 9216,
99
  "layernorm_full_attention_alpha": 3.5565588200778455,
 
100
  "layernorm_linear_attention_alpha": 3.5565588200778455,
 
101
  "layernorm_mlp_alpha": 3.5565588200778455,
102
+ "max_position_embeddings": 131072,
103
+ "model_type": "mixtral",
 
104
  "num_attention_heads": 64,
105
  "num_experts_per_tok": 2,
106
  "num_hidden_layers": 80,
 
117
  "shared_moe_mode": "sigmoid",
118
  "sliding_window": null,
119
  "tie_word_embeddings": false,
120
+ "transformers_version": "4.49.0",
121
  "use_cache": true,
122
  "vocab_size": 200064
123
  }
model.safetensors.index.json CHANGED
The diff for this file is too large to render. See raw diff