biomap-research
/

proteinglm-3b-mlm

@@ -1,21 +1,21 @@
 {
-    "_name_or_path": "proteinglm-3b-mlm",
     "add_bias_linear": true,
     "add_qkv_bias": true,
     "apply_query_key_layer_scaling": true,
     "apply_residual_connection_post_layernorm": true,
     "architectures": [
-      "ProteinGLMModel"
     ],
     "attention_dropout": 0.0,
     "attention_softmax_in_fp32": true,
     "auto_map": {
-      "AutoConfig": "configuration_proteinglm.ProteinGLMConfig",
-      "AutoModel": "modeling_proteinglm.ProteinGLMForMaskedLM",
-      "AutoModelForCausalLM": "modeling_proteinglm.ProteinGLMForCasualLM",
-      "AutoModelForMaskedLM": "modeling_proteinglm.ProteinGLMForMaskedLM",
-      "AutoModelForSequenceClassification": "modeling_proteinglm.ProteinGLMForSequenceClassification",
-      "AutoModelForTokenClassification": "modeling_proteinglm.ProteinGLMForTokenClassification"
     },
     "bias_dropout_fusion": true,
     "deepnorm": true,
@@ -23,14 +23,15 @@
     "ffn_hidden_size": 6832,
     "fp32_residual_connection": false,
     "glu_activation": "geglu",
     "head_num": 1,
     "hidden_dropout": 0.0,
     "hidden_size": 2560,
-    "initializer_range": 0.02,
-    "is_causal": true,
     "kv_channels": 64,
     "layernorm_epsilon": 1e-05,
-    "model_type": "ProteinGLM",
     "moe": false,
     "multi_query_attention": false,
     "multi_query_group_num": 1,
@@ -42,11 +43,10 @@
     "quantization_bit": 0,
     "rmsnorm": false,
     "rotary_embedding_2d": false,
-    "seq_length": 1024,
     "torch_dtype": "float32",
     "transformers_version": "4.41.2",
     "untie_head": false,
-    "use_cache": true,
     "use_pytorch_sdpa": true,
     "vocab_size": 128
   }

 {
+    "_name_or_path": "BioMap/xtrimopglm-3b-mlm",
     "add_bias_linear": true,
     "add_qkv_bias": true,
     "apply_query_key_layer_scaling": true,
     "apply_residual_connection_post_layernorm": true,
     "architectures": [
+      "xTrimoPGLMModel"
     ],
     "attention_dropout": 0.0,
     "attention_softmax_in_fp32": true,
     "auto_map": {
+      "AutoConfig": "configuration_xtrimopglm.xTrimoPGLMConfig",
+      "AutoModel": "modeling_xtrimopglm.xTrimoPGLMForMaskedLM",
+      "AutoModelForCausalLM": "modeling_xtrimopglm.xTrimoPGLMForCasualLM",
+      "AutoModelForMaskedLM": "modeling_xtrimopglm.xTrimoPGLMForMaskedLM",
+      "AutoModelForSequenceClassification": "modeling_xtrimopglm.xTrimoPGLMForSequenceClassification",
+      "AutoModelForTokenClassification": "modeling_xtrimopglm.xTrimoPGLMForTokenClassification"
     },
     "bias_dropout_fusion": true,
     "deepnorm": true,
     "ffn_hidden_size": 6832,
     "fp32_residual_connection": false,
     "glu_activation": "geglu",
+    "initializer_range": 0.02,
     "head_num": 1,
     "hidden_dropout": 0.0,
     "hidden_size": 2560,
+    "is_causal": false,
+    "use_cache": true,
     "kv_channels": 64,
     "layernorm_epsilon": 1e-05,
+    "model_type": "xTrimoPGLM",
     "moe": false,
     "multi_query_attention": false,
     "multi_query_group_num": 1,
     "quantization_bit": 0,
     "rmsnorm": false,
     "rotary_embedding_2d": false,
+    "seq_length": 2048,
     "torch_dtype": "float32",
     "transformers_version": "4.41.2",
     "untie_head": false,
     "use_pytorch_sdpa": true,
     "vocab_size": 128
   }

configuration_proteinglm.py CHANGED Viewed

@@ -5,16 +5,17 @@ class ProteinGLMConfig(PretrainedConfig):
     model_type = "ProteinGLM"
     def __init__(
         self,
-        num_layers=36,
         padded_vocab_size=128,
-        hidden_size=2560,
         ffn_hidden_size=6832,
         kv_channels=64,
         num_attention_heads=40,
-        seq_length=1024,
         hidden_dropout=0.0,
         attention_dropout=0.0,
         layernorm_epsilon=1e-5,
         glu_activation='geglu',
         rmsnorm=False,
         deepnorm=True,
@@ -31,9 +32,8 @@ class ProteinGLMConfig(PretrainedConfig):
         quantization_bit=0,
         rotary_embedding_2d=False,
         use_pytorch_sdpa=True,
-        is_causal=True,
         use_cache=True,
-        initializer_range=0.02,
         moe=False,
         num_experts=0,
         experts_per_token=0,
@@ -60,6 +60,7 @@ class ProteinGLMConfig(PretrainedConfig):
         self.attention_dropout = attention_dropout
         self.layernorm_epsilon = layernorm_epsilon
         self.glu_activation = glu_activation
         self.rmsnorm = rmsnorm
         self.deepnorm = deepnorm
         self.apply_residual_connection_post_layernorm = apply_residual_connection_post_layernorm
@@ -75,8 +76,7 @@ class ProteinGLMConfig(PretrainedConfig):
         self.quantization_bit = quantization_bit
         self.rotary_embedding_2d = rotary_embedding_2d
         self.is_causal = is_causal
-        self.use_cache = use_cache
-        self.initializer_range = initializer_range
         self.use_pytorch_sdpa = use_pytorch_sdpa
         self.moe = moe
         self.num_experts = num_experts

     model_type = "ProteinGLM"
     def __init__(
         self,
+        num_layers=28,
         padded_vocab_size=128,
+        hidden_size=4096,
         ffn_hidden_size=6832,
         kv_channels=64,
         num_attention_heads=40,
+        seq_length=2048,
         hidden_dropout=0.0,
         attention_dropout=0.0,
         layernorm_epsilon=1e-5,
+        initializer_range=0.02,
         glu_activation='geglu',
         rmsnorm=False,
         deepnorm=True,
         quantization_bit=0,
         rotary_embedding_2d=False,
         use_pytorch_sdpa=True,
+        is_causal=False,
         use_cache=True,
         moe=False,
         num_experts=0,
         experts_per_token=0,
         self.attention_dropout = attention_dropout
         self.layernorm_epsilon = layernorm_epsilon
         self.glu_activation = glu_activation
+        self.initializer_range = initializer_range
         self.rmsnorm = rmsnorm
         self.deepnorm = deepnorm
         self.apply_residual_connection_post_layernorm = apply_residual_connection_post_layernorm
         self.quantization_bit = quantization_bit
         self.rotary_embedding_2d = rotary_embedding_2d
         self.is_causal = is_causal
+        self.use_cache=use_cache
         self.use_pytorch_sdpa = use_pytorch_sdpa
         self.moe = moe
         self.num_experts = num_experts