Safetensors
transformers_zamba2
zamba2
pglo commited on
Commit
d922acf
·
verified ·
1 Parent(s): 1f81a1f

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -1,63 +1,66 @@
1
  {
 
2
  "add_bias_linear": false,
3
- "architectures": [
4
- "Zamba2ForCausalLM"
5
- ],
6
  "attention_dropout": 0.0,
 
 
7
  "bos_token_id": 1,
8
- "conv_dimension": 4,
9
  "eos_token_id": 2,
10
- "expansion_factor": 2,
11
  "ffn_hidden_size": 8192,
12
- "ft_lora": false,
13
- "gated_linear_unit": true,
14
  "hidden_size": 2048,
15
  "initializer_range": 0.02,
 
16
  "kv_channels": 64,
17
  "layers_block_type": [
18
- "m",
19
- "m",
20
- "m",
21
- "m",
22
- "m",
23
- "g",
24
- "m",
25
- "m",
26
- "m",
27
- "m",
28
- "m",
29
- "g",
30
- "m",
31
- "m",
32
- "m",
33
- "m",
34
- "m",
35
- "g",
36
- "m",
37
- "m",
38
- "m",
39
- "m",
40
- "m",
41
- "g",
42
- "m",
43
- "m",
44
- "m",
45
- "m",
46
- "m",
47
- "g",
48
- "m",
49
- "m",
50
- "m",
51
- "m",
52
- "m",
53
- "g",
54
- "m",
55
- "m"
56
  ],
57
- "lora_rank": 128,
 
 
58
  "mamba_headdim": 64,
 
59
  "max_position_embeddings": 4096,
60
  "model_type": "zamba2",
 
61
  "num_attention_heads": 32,
62
  "num_hidden_layers": 38,
63
  "num_key_value_heads": 32,
@@ -67,14 +70,16 @@
67
  "pad_token_id": 0,
68
  "rms_norm_eps": 1e-05,
69
  "rope_theta": 10000,
70
- "sliding_window": null,
71
- "state_size": 128,
72
- "torch_dtype": "bfloat16",
73
- "transformers_version": "4.43.0.dev0",
 
74
  "use_cache": true,
75
- "use_mamba_kernels": true,
 
76
  "use_mem_rope": true,
77
- "use_shared_attention_lora": true,
78
- "use_shared_block_lora": true,
79
  "vocab_size": 32000
80
  }
 
1
  {
2
+ "adapter_rank": 128,
3
  "add_bias_linear": false,
 
 
 
4
  "attention_dropout": 0.0,
5
+ "attention_head_dim": 128,
6
+ "attention_hidden_size": 4096,
7
  "bos_token_id": 1,
8
+ "chunk_size": 256,
9
  "eos_token_id": 2,
 
10
  "ffn_hidden_size": 8192,
11
+ "hidden_act": "gelu",
 
12
  "hidden_size": 2048,
13
  "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
  "kv_channels": 64,
16
  "layers_block_type": [
17
+ "mamba",
18
+ "mamba",
19
+ "mamba",
20
+ "mamba",
21
+ "mamba",
22
+ "hybrid",
23
+ "mamba",
24
+ "mamba",
25
+ "mamba",
26
+ "mamba",
27
+ "mamba",
28
+ "hybrid",
29
+ "mamba",
30
+ "mamba",
31
+ "mamba",
32
+ "mamba",
33
+ "mamba",
34
+ "hybrid",
35
+ "mamba",
36
+ "mamba",
37
+ "mamba",
38
+ "mamba",
39
+ "mamba",
40
+ "hybrid",
41
+ "mamba",
42
+ "mamba",
43
+ "mamba",
44
+ "mamba",
45
+ "mamba",
46
+ "hybrid",
47
+ "mamba",
48
+ "mamba",
49
+ "mamba",
50
+ "mamba",
51
+ "mamba",
52
+ "hybrid",
53
+ "mamba",
54
+ "mamba"
55
  ],
56
+ "mamba_d_conv": 4,
57
+ "mamba_d_state": 128,
58
+ "mamba_expand": 2,
59
  "mamba_headdim": 64,
60
+ "mamba_ngroups": 1,
61
  "max_position_embeddings": 4096,
62
  "model_type": "zamba2",
63
+ "n_mamba_heads": 64,
64
  "num_attention_heads": 32,
65
  "num_hidden_layers": 38,
66
  "num_key_value_heads": 32,
 
70
  "pad_token_id": 0,
71
  "rms_norm_eps": 1e-05,
72
  "rope_theta": 10000,
73
+ "time_step_floor": 0.0001,
74
+ "time_step_limit": null,
75
+ "time_step_max": 0.1,
76
+ "time_step_min": 0.001,
77
+ "transformers_version": "4.49.0.dev0",
78
  "use_cache": true,
79
+ "use_conv_bias": true,
80
+ "use_long_context": false,
81
  "use_mem_rope": true,
82
+ "use_shared_attention_adapter": true,
83
+ "use_shared_mlp_adapter": true,
84
  "vocab_size": 32000
85
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:335f73273bfdd3a00287b0976624eccf4e48d9e35e79162faef5f478b61128dc
3
- size 2430175920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f3e6a10dbb0874cc0f4fc7f00e3ae5f03ab3eeecb250869b5250c472282eb32
3
+ size 4860300264
special_tokens_map.json CHANGED
@@ -14,7 +14,7 @@
14
  "single_word": false
15
  },
16
  "unk_token": {
17
- "content": "<unk>",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
 
14
  "single_word": false
15
  },
16
  "unk_token": {
17
+ "content": "[PAD]",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
tokenizer.json CHANGED
@@ -5,7 +5,7 @@
5
  "added_tokens": [
6
  {
7
  "id": 0,
8
- "content": "<unk>",
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
@@ -129,14 +129,14 @@
129
  "model": {
130
  "type": "BPE",
131
  "dropout": null,
132
- "unk_token": "<unk>",
133
  "continuing_subword_prefix": null,
134
  "end_of_word_suffix": null,
135
  "fuse_unk": true,
136
  "byte_fallback": true,
137
  "ignore_merges": false,
138
  "vocab": {
139
- "<unk>": 0,
140
  "<s>": 1,
141
  "</s>": 2,
142
  "<0x00>": 3,
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
8
+ "content": "[PAD]",
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
 
129
  "model": {
130
  "type": "BPE",
131
  "dropout": null,
132
+ "unk_token": null,
133
  "continuing_subword_prefix": null,
134
  "end_of_word_suffix": null,
135
  "fuse_unk": true,
136
  "byte_fallback": true,
137
  "ignore_merges": false,
138
  "vocab": {
139
+ "[PAD]": 0,
140
  "<s>": 1,
141
  "</s>": 2,
142
  "<0x00>": 3,
tokenizer_config.json CHANGED
@@ -4,7 +4,7 @@
4
  "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
  "0": {
7
- "content": "<unk>",
8
  "lstrip": false,
9
  "normalized": false,
10
  "rstrip": false,
@@ -34,10 +34,10 @@
34
  "eos_token": "</s>",
35
  "legacy": true,
36
  "model_max_length": 1000000000000000019884624838656,
37
- "pad_token": null,
38
  "sp_model_kwargs": {},
39
  "spaces_between_special_tokens": false,
40
  "tokenizer_class": "LlamaTokenizer",
41
- "unk_token": "<unk>",
42
  "use_default_system_prompt": false
43
  }
 
4
  "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
  "0": {
7
+ "content": "[PAD]",
8
  "lstrip": false,
9
  "normalized": false,
10
  "rstrip": false,
 
34
  "eos_token": "</s>",
35
  "legacy": true,
36
  "model_max_length": 1000000000000000019884624838656,
37
+ "pad_token": "[PAD]",
38
  "sp_model_kwargs": {},
39
  "spaces_between_special_tokens": false,
40
  "tokenizer_class": "LlamaTokenizer",
41
+ "unk_token": null,
42
  "use_default_system_prompt": false
43
  }