qgallouedec HF Staff commited on
Commit
ca18b3a
·
verified ·
1 Parent(s): 505d3a7

Upload Gemma3ForConditionalGeneration

Browse files
Files changed (3) hide show
  1. config.json +50 -10
  2. generation_config.json +4 -1
  3. model.safetensors +2 -2
config.json CHANGED
@@ -4,6 +4,10 @@
4
  ],
5
  "boi_token_index": 255999,
6
  "eoi_token_index": 256000,
 
 
 
 
7
  "image_token_index": 262144,
8
  "initializer_range": 0.02,
9
  "mm_tokens_per_image": 256,
@@ -18,24 +22,59 @@
18
  "hidden_activation": "gelu_pytorch_tanh",
19
  "hidden_size": 16,
20
  "initializer_range": 0.02,
21
- "intermediate_size": 32,
22
  "layer_types": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  "sliding_attention",
24
  "sliding_attention"
25
  ],
26
  "max_position_embeddings": 131072,
27
  "model_type": "gemma3_text",
28
- "num_attention_heads": 4,
29
  "num_hidden_layers": 2,
30
- "num_key_value_heads": 2,
31
  "query_pre_attn_scalar": 256,
32
  "rms_norm_eps": 1e-06,
33
  "rope_local_base_freq": 10000.0,
34
- "rope_scaling": null,
 
 
 
35
  "rope_theta": 1000000.0,
36
- "sliding_window": 4096,
37
  "use_cache": true,
38
- "vocab_size": 268559
39
  },
40
  "torch_dtype": "float32",
41
  "transformers_version": "4.56.0.dev0",
@@ -43,13 +82,14 @@
43
  "attention_dropout": 0.0,
44
  "hidden_act": "gelu_pytorch_tanh",
45
  "hidden_size": 16,
46
- "image_size": 336,
47
- "intermediate_size": 32,
48
  "layer_norm_eps": 1e-06,
49
  "model_type": "siglip_vision_model",
50
- "num_attention_heads": 4,
51
  "num_channels": 3,
52
  "num_hidden_layers": 2,
53
- "patch_size": 20
 
54
  }
55
  }
 
4
  ],
5
  "boi_token_index": 255999,
6
  "eoi_token_index": 256000,
7
+ "eos_token_id": [
8
+ 1,
9
+ 106
10
+ ],
11
  "image_token_index": 262144,
12
  "initializer_range": 0.02,
13
  "mm_tokens_per_image": 256,
 
22
  "hidden_activation": "gelu_pytorch_tanh",
23
  "hidden_size": 16,
24
  "initializer_range": 0.02,
25
+ "intermediate_size": 10240,
26
  "layer_types": [
27
+ "sliding_attention",
28
+ "sliding_attention",
29
+ "sliding_attention",
30
+ "sliding_attention",
31
+ "sliding_attention",
32
+ "full_attention",
33
+ "sliding_attention",
34
+ "sliding_attention",
35
+ "sliding_attention",
36
+ "sliding_attention",
37
+ "sliding_attention",
38
+ "full_attention",
39
+ "sliding_attention",
40
+ "sliding_attention",
41
+ "sliding_attention",
42
+ "sliding_attention",
43
+ "sliding_attention",
44
+ "full_attention",
45
+ "sliding_attention",
46
+ "sliding_attention",
47
+ "sliding_attention",
48
+ "sliding_attention",
49
+ "sliding_attention",
50
+ "full_attention",
51
+ "sliding_attention",
52
+ "sliding_attention",
53
+ "sliding_attention",
54
+ "sliding_attention",
55
+ "sliding_attention",
56
+ "full_attention",
57
+ "sliding_attention",
58
+ "sliding_attention",
59
  "sliding_attention",
60
  "sliding_attention"
61
  ],
62
  "max_position_embeddings": 131072,
63
  "model_type": "gemma3_text",
64
+ "num_attention_heads": 8,
65
  "num_hidden_layers": 2,
66
+ "num_key_value_heads": 4,
67
  "query_pre_attn_scalar": 256,
68
  "rms_norm_eps": 1e-06,
69
  "rope_local_base_freq": 10000.0,
70
+ "rope_scaling": {
71
+ "factor": 8.0,
72
+ "rope_type": "linear"
73
+ },
74
  "rope_theta": 1000000.0,
75
+ "sliding_window": 1024,
76
  "use_cache": true,
77
+ "vocab_size": 262208
78
  },
79
  "torch_dtype": "float32",
80
  "transformers_version": "4.56.0.dev0",
 
82
  "attention_dropout": 0.0,
83
  "hidden_act": "gelu_pytorch_tanh",
84
  "hidden_size": 16,
85
+ "image_size": 896,
86
+ "intermediate_size": 4304,
87
  "layer_norm_eps": 1e-06,
88
  "model_type": "siglip_vision_model",
89
+ "num_attention_heads": 16,
90
  "num_channels": 3,
91
  "num_hidden_layers": 2,
92
+ "patch_size": 14,
93
+ "vision_use_head": false
94
  }
95
  }
generation_config.json CHANGED
@@ -1,7 +1,10 @@
1
  {
2
  "_from_model_config": true,
3
  "bos_token_id": 2,
4
- "eos_token_id": 1,
 
 
 
5
  "pad_token_id": 0,
6
  "transformers_version": "4.56.0.dev0"
7
  }
 
1
  {
2
  "_from_model_config": true,
3
  "bos_token_id": 2,
4
+ "eos_token_id": [
5
+ 1,
6
+ 106
7
+ ],
8
  "pad_token_id": 0,
9
  "transformers_version": "4.56.0.dev0"
10
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70e7e358822675eabb72cdeca6dc61b23d44706cdd77c86ff221249cae28c17c
3
- size 17728776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db80e8136185f45ba7732fd1f88b0fc7446ff8169fafdded5cdf9f57a7303b70
3
+ size 22959728