Bo1015 commited on
Commit
0b4a4a3
·
verified ·
1 Parent(s): 4d6d2ec

Upload 2 files

Browse files
Files changed (2) hide show
  1. config.json +13 -13
  2. configuration_proteinglm.py +7 -7
config.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
- "_name_or_path": "proteinglm-3b-mlm",
3
  "add_bias_linear": true,
4
  "add_qkv_bias": true,
5
  "apply_query_key_layer_scaling": true,
6
  "apply_residual_connection_post_layernorm": true,
7
  "architectures": [
8
- "ProteinGLMModel"
9
  ],
10
  "attention_dropout": 0.0,
11
  "attention_softmax_in_fp32": true,
12
  "auto_map": {
13
- "AutoConfig": "configuration_proteinglm.ProteinGLMConfig",
14
- "AutoModel": "modeling_proteinglm.ProteinGLMForMaskedLM",
15
- "AutoModelForCausalLM": "modeling_proteinglm.ProteinGLMForCasualLM",
16
- "AutoModelForMaskedLM": "modeling_proteinglm.ProteinGLMForMaskedLM",
17
- "AutoModelForSequenceClassification": "modeling_proteinglm.ProteinGLMForSequenceClassification",
18
- "AutoModelForTokenClassification": "modeling_proteinglm.ProteinGLMForTokenClassification"
19
  },
20
  "bias_dropout_fusion": true,
21
  "deepnorm": true,
@@ -23,14 +23,15 @@
23
  "ffn_hidden_size": 6832,
24
  "fp32_residual_connection": false,
25
  "glu_activation": "geglu",
 
26
  "head_num": 1,
27
  "hidden_dropout": 0.0,
28
  "hidden_size": 2560,
29
- "initializer_range": 0.02,
30
- "is_causal": true,
31
  "kv_channels": 64,
32
  "layernorm_epsilon": 1e-05,
33
- "model_type": "ProteinGLM",
34
  "moe": false,
35
  "multi_query_attention": false,
36
  "multi_query_group_num": 1,
@@ -42,11 +43,10 @@
42
  "quantization_bit": 0,
43
  "rmsnorm": false,
44
  "rotary_embedding_2d": false,
45
- "seq_length": 1024,
46
  "torch_dtype": "float32",
47
  "transformers_version": "4.41.2",
48
  "untie_head": false,
49
- "use_cache": true,
50
  "use_pytorch_sdpa": true,
51
  "vocab_size": 128
52
  }
 
1
  {
2
+ "_name_or_path": "BioMap/xtrimopglm-3b-mlm",
3
  "add_bias_linear": true,
4
  "add_qkv_bias": true,
5
  "apply_query_key_layer_scaling": true,
6
  "apply_residual_connection_post_layernorm": true,
7
  "architectures": [
8
+ "xTrimoPGLMModel"
9
  ],
10
  "attention_dropout": 0.0,
11
  "attention_softmax_in_fp32": true,
12
  "auto_map": {
13
+ "AutoConfig": "configuration_xtrimopglm.xTrimoPGLMConfig",
14
+ "AutoModel": "modeling_xtrimopglm.xTrimoPGLMForMaskedLM",
15
+ "AutoModelForCausalLM": "modeling_xtrimopglm.xTrimoPGLMForCasualLM",
16
+ "AutoModelForMaskedLM": "modeling_xtrimopglm.xTrimoPGLMForMaskedLM",
17
+ "AutoModelForSequenceClassification": "modeling_xtrimopglm.xTrimoPGLMForSequenceClassification",
18
+ "AutoModelForTokenClassification": "modeling_xtrimopglm.xTrimoPGLMForTokenClassification"
19
  },
20
  "bias_dropout_fusion": true,
21
  "deepnorm": true,
 
23
  "ffn_hidden_size": 6832,
24
  "fp32_residual_connection": false,
25
  "glu_activation": "geglu",
26
+ "initializer_range": 0.02,
27
  "head_num": 1,
28
  "hidden_dropout": 0.0,
29
  "hidden_size": 2560,
30
+ "is_causal": false,
31
+ "use_cache": true,
32
  "kv_channels": 64,
33
  "layernorm_epsilon": 1e-05,
34
+ "model_type": "xTrimoPGLM",
35
  "moe": false,
36
  "multi_query_attention": false,
37
  "multi_query_group_num": 1,
 
43
  "quantization_bit": 0,
44
  "rmsnorm": false,
45
  "rotary_embedding_2d": false,
46
+ "seq_length": 2048,
47
  "torch_dtype": "float32",
48
  "transformers_version": "4.41.2",
49
  "untie_head": false,
 
50
  "use_pytorch_sdpa": true,
51
  "vocab_size": 128
52
  }
configuration_proteinglm.py CHANGED
@@ -5,16 +5,17 @@ class ProteinGLMConfig(PretrainedConfig):
5
  model_type = "ProteinGLM"
6
  def __init__(
7
  self,
8
- num_layers=36,
9
  padded_vocab_size=128,
10
- hidden_size=2560,
11
  ffn_hidden_size=6832,
12
  kv_channels=64,
13
  num_attention_heads=40,
14
- seq_length=1024,
15
  hidden_dropout=0.0,
16
  attention_dropout=0.0,
17
  layernorm_epsilon=1e-5,
 
18
  glu_activation='geglu',
19
  rmsnorm=False,
20
  deepnorm=True,
@@ -31,9 +32,8 @@ class ProteinGLMConfig(PretrainedConfig):
31
  quantization_bit=0,
32
  rotary_embedding_2d=False,
33
  use_pytorch_sdpa=True,
34
- is_causal=True,
35
  use_cache=True,
36
- initializer_range=0.02,
37
  moe=False,
38
  num_experts=0,
39
  experts_per_token=0,
@@ -60,6 +60,7 @@ class ProteinGLMConfig(PretrainedConfig):
60
  self.attention_dropout = attention_dropout
61
  self.layernorm_epsilon = layernorm_epsilon
62
  self.glu_activation = glu_activation
 
63
  self.rmsnorm = rmsnorm
64
  self.deepnorm = deepnorm
65
  self.apply_residual_connection_post_layernorm = apply_residual_connection_post_layernorm
@@ -75,8 +76,7 @@ class ProteinGLMConfig(PretrainedConfig):
75
  self.quantization_bit = quantization_bit
76
  self.rotary_embedding_2d = rotary_embedding_2d
77
  self.is_causal = is_causal
78
- self.use_cache = use_cache
79
- self.initializer_range = initializer_range
80
  self.use_pytorch_sdpa = use_pytorch_sdpa
81
  self.moe = moe
82
  self.num_experts = num_experts
 
5
  model_type = "ProteinGLM"
6
  def __init__(
7
  self,
8
+ num_layers=28,
9
  padded_vocab_size=128,
10
+ hidden_size=4096,
11
  ffn_hidden_size=6832,
12
  kv_channels=64,
13
  num_attention_heads=40,
14
+ seq_length=2048,
15
  hidden_dropout=0.0,
16
  attention_dropout=0.0,
17
  layernorm_epsilon=1e-5,
18
+ initializer_range=0.02,
19
  glu_activation='geglu',
20
  rmsnorm=False,
21
  deepnorm=True,
 
32
  quantization_bit=0,
33
  rotary_embedding_2d=False,
34
  use_pytorch_sdpa=True,
35
+ is_causal=False,
36
  use_cache=True,
 
37
  moe=False,
38
  num_experts=0,
39
  experts_per_token=0,
 
60
  self.attention_dropout = attention_dropout
61
  self.layernorm_epsilon = layernorm_epsilon
62
  self.glu_activation = glu_activation
63
+ self.initializer_range = initializer_range
64
  self.rmsnorm = rmsnorm
65
  self.deepnorm = deepnorm
66
  self.apply_residual_connection_post_layernorm = apply_residual_connection_post_layernorm
 
76
  self.quantization_bit = quantization_bit
77
  self.rotary_embedding_2d = rotary_embedding_2d
78
  self.is_causal = is_causal
79
+ self.use_cache=use_cache
 
80
  self.use_pytorch_sdpa = use_pytorch_sdpa
81
  self.moe = moe
82
  self.num_experts = num_experts