Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

config.json +58 -53
model.safetensors +2 -2
special_tokens_map.json +1 -1
tokenizer.json +3 -3
tokenizer_config.json +3 -3

config.json CHANGED Viewed

@@ -1,63 +1,66 @@
 {
   "add_bias_linear": false,
-  "architectures": [
-    "Zamba2ForCausalLM"
-  ],
   "attention_dropout": 0.0,
   "bos_token_id": 1,
-  "conv_dimension": 4,
   "eos_token_id": 2,
-  "expansion_factor": 2,
   "ffn_hidden_size": 8192,
-  "ft_lora": false,
-  "gated_linear_unit": true,
   "hidden_size": 2048,
   "initializer_range": 0.02,
   "kv_channels": 64,
   "layers_block_type": [
-    "m",
-    "m",
-    "m",
-    "m",
-    "m",
-    "g",
-    "m",
-    "m",
-    "m",
-    "m",
-    "m",
-    "g",
-    "m",
-    "m",
-    "m",
-    "m",
-    "m",
-    "g",
-    "m",
-    "m",
-    "m",
-    "m",
-    "m",
-    "g",
-    "m",
-    "m",
-    "m",
-    "m",
-    "m",
-    "g",
-    "m",
-    "m",
-    "m",
-    "m",
-    "m",
-    "g",
-    "m",
-    "m"
   ],
-  "lora_rank": 128,
   "mamba_headdim": 64,
   "max_position_embeddings": 4096,
   "model_type": "zamba2",
   "num_attention_heads": 32,
   "num_hidden_layers": 38,
   "num_key_value_heads": 32,
@@ -67,14 +70,16 @@
   "pad_token_id": 0,
   "rms_norm_eps": 1e-05,
   "rope_theta": 10000,
-  "sliding_window": null,
-  "state_size": 128,
-  "torch_dtype": "bfloat16",
-  "transformers_version": "4.43.0.dev0",
   "use_cache": true,
-  "use_mamba_kernels": true,
   "use_mem_rope": true,
-  "use_shared_attention_lora": true,
-  "use_shared_block_lora": true,
   "vocab_size": 32000
 }

 {
+  "adapter_rank": 128,
   "add_bias_linear": false,
   "attention_dropout": 0.0,
+  "attention_head_dim": 128,
+  "attention_hidden_size": 4096,
   "bos_token_id": 1,
+  "chunk_size": 256,
   "eos_token_id": 2,
   "ffn_hidden_size": 8192,
+  "hidden_act": "gelu",
   "hidden_size": 2048,
   "initializer_range": 0.02,
+  "intermediate_size": 8192,
   "kv_channels": 64,
   "layers_block_type": [
+    "mamba",
+    "mamba",
+    "mamba",
+    "mamba",
+    "mamba",
+    "hybrid",
+    "mamba",
+    "mamba",
+    "mamba",
+    "mamba",
+    "mamba",
+    "hybrid",
+    "mamba",
+    "mamba",
+    "mamba",
+    "mamba",
+    "mamba",
+    "hybrid",
+    "mamba",
+    "mamba",
+    "mamba",
+    "mamba",
+    "mamba",
+    "hybrid",
+    "mamba",
+    "mamba",
+    "mamba",
+    "mamba",
+    "mamba",
+    "hybrid",
+    "mamba",
+    "mamba",
+    "mamba",
+    "mamba",
+    "mamba",
+    "hybrid",
+    "mamba",
+    "mamba"
   ],
+  "mamba_d_conv": 4,
+  "mamba_d_state": 128,
+  "mamba_expand": 2,
   "mamba_headdim": 64,
+  "mamba_ngroups": 1,
   "max_position_embeddings": 4096,
   "model_type": "zamba2",
+  "n_mamba_heads": 64,
   "num_attention_heads": 32,
   "num_hidden_layers": 38,
   "num_key_value_heads": 32,
   "pad_token_id": 0,
   "rms_norm_eps": 1e-05,
   "rope_theta": 10000,
+  "time_step_floor": 0.0001,
+  "time_step_limit": null,
+  "time_step_max": 0.1,
+  "time_step_min": 0.001,
+  "transformers_version": "4.49.0.dev0",
   "use_cache": true,
+  "use_conv_bias": true,
+  "use_long_context": false,
   "use_mem_rope": true,
+  "use_shared_attention_adapter": true,
+  "use_shared_mlp_adapter": true,
   "vocab_size": 32000
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:335f73273bfdd3a00287b0976624eccf4e48d9e35e79162faef5f478b61128dc
-size 2430175920

 version https://git-lfs.github.com/spec/v1
+oid sha256:2f3e6a10dbb0874cc0f4fc7f00e3ae5f03ab3eeecb250869b5250c472282eb32
+size 4860300264

special_tokens_map.json CHANGED Viewed

@@ -14,7 +14,7 @@
     "single_word": false
   },
   "unk_token": {
-    "content": "<unk>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,

     "single_word": false
   },
   "unk_token": {
+    "content": "[PAD]",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,

tokenizer.json CHANGED Viewed

@@ -5,7 +5,7 @@
   "added_tokens": [
     {
       "id": 0,
-      "content": "<unk>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
@@ -129,14 +129,14 @@
   "model": {
     "type": "BPE",
     "dropout": null,
-    "unk_token": "<unk>",
     "continuing_subword_prefix": null,
     "end_of_word_suffix": null,
     "fuse_unk": true,
     "byte_fallback": true,
     "ignore_merges": false,
     "vocab": {
-      "<unk>": 0,
       "<s>": 1,
       "</s>": 2,
       "<0x00>": 3,

   "added_tokens": [
     {
       "id": 0,
+      "content": "[PAD]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
   "model": {
     "type": "BPE",
     "dropout": null,
+    "unk_token": null,
     "continuing_subword_prefix": null,
     "end_of_word_suffix": null,
     "fuse_unk": true,
     "byte_fallback": true,
     "ignore_merges": false,
     "vocab": {
+      "[PAD]": 0,
       "<s>": 1,
       "</s>": 2,
       "<0x00>": 3,

tokenizer_config.json CHANGED Viewed

@@ -4,7 +4,7 @@
   "add_prefix_space": null,
   "added_tokens_decoder": {
     "0": {
-      "content": "<unk>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
@@ -34,10 +34,10 @@
   "eos_token": "</s>",
   "legacy": true,
   "model_max_length": 1000000000000000019884624838656,
-  "pad_token": null,
   "sp_model_kwargs": {},
   "spaces_between_special_tokens": false,
   "tokenizer_class": "LlamaTokenizer",
-  "unk_token": "<unk>",
   "use_default_system_prompt": false
 }

   "add_prefix_space": null,
   "added_tokens_decoder": {
     "0": {
+      "content": "[PAD]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
   "eos_token": "</s>",
   "legacy": true,
   "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
   "sp_model_kwargs": {},
   "spaces_between_special_tokens": false,
   "tokenizer_class": "LlamaTokenizer",
+  "unk_token": null,
   "use_default_system_prompt": false
 }