hans00 commited on
Commit
f14df53
·
verified ·
1 Parent(s): 92bf992

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -35,3 +35,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  onnx/decoder_model_merged.onnx_data filter=lfs diff=lfs merge=lfs -text
37
  onnx/decoder_model_merged_fp16.onnx_data filter=lfs diff=lfs merge=lfs -text
 
 
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  onnx/decoder_model_merged.onnx_data filter=lfs diff=lfs merge=lfs -text
37
  onnx/decoder_model_merged_fp16.onnx_data filter=lfs diff=lfs merge=lfs -text
38
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
added_tokens.json CHANGED
@@ -10,6 +10,7 @@
10
  "<quad>": 151668,
11
  "<ref>": 151670,
12
  "<tool_call>": 151657,
 
13
  "<|box_end|>": 151649,
14
  "<|box_start|>": 151648,
15
  "<|endoftext|>": 151643,
 
10
  "<quad>": 151668,
11
  "<ref>": 151670,
12
  "<tool_call>": 151657,
13
+ "<video>": 151674,
14
  "<|box_end|>": 151649,
15
  "<|box_start|>": 151648,
16
  "<|endoftext|>": 151643,
config.json CHANGED
@@ -1,76 +1,30 @@
1
  {
2
- "_commit_hash": null,
3
- "_name_or_path": "OpenGVLab/InternVL3-2B",
4
  "architectures": [
5
- "InternVLChatModel"
6
  ],
7
  "downsample_ratio": 0.5,
8
- "dynamic_image_size": true,
9
- "force_image_size": 448,
10
- "hidden_size": 1536,
11
- "image_fold": null,
12
- "llm_config": {
13
- "_attn_implementation_autoset": true,
14
- "_name_or_path": "./pretrained/Qwen2.5-32B-Instruct",
15
- "add_cross_attention": false,
16
  "architectures": [
17
  "Qwen2ForCausalLM"
18
  ],
19
  "attention_dropout": 0.0,
20
- "bad_words_ids": null,
21
- "begin_suppress_tokens": null,
22
  "bos_token_id": 151643,
23
- "chunk_size_feed_forward": 0,
24
- "cross_attention_hidden_size": null,
25
- "decoder_start_token_id": null,
26
- "diversity_penalty": 0.0,
27
- "do_sample": false,
28
- "early_stopping": false,
29
- "encoder_no_repeat_ngram_size": 0,
30
- "eos_token_id": 151643,
31
- "exponential_decay_length_penalty": null,
32
- "finetuning_task": null,
33
- "forced_bos_token_id": null,
34
- "forced_eos_token_id": null,
35
  "hidden_act": "silu",
36
  "hidden_size": 1536,
37
- "id2label": {
38
- "0": "LABEL_0",
39
- "1": "LABEL_1"
40
- },
41
  "initializer_range": 0.02,
42
  "intermediate_size": 8960,
43
- "is_decoder": false,
44
- "is_encoder_decoder": false,
45
- "label2id": {
46
- "LABEL_0": 0,
47
- "LABEL_1": 1
48
- },
49
- "length_penalty": 1.0,
50
- "max_length": 20,
51
  "max_position_embeddings": 32768,
52
  "max_window_layers": 70,
53
- "min_length": 0,
54
  "model_type": "qwen2",
55
- "moe_config": null,
56
- "no_repeat_ngram_size": 0,
57
  "num_attention_heads": 12,
58
- "num_beam_groups": 1,
59
- "num_beams": 1,
60
  "num_hidden_layers": 28,
61
  "num_key_value_heads": 2,
62
- "num_return_sequences": 1,
63
- "output_attentions": false,
64
- "output_hidden_states": false,
65
- "output_scores": false,
66
- "pad_token_id": null,
67
- "prefix": null,
68
- "problem_type": null,
69
- "pruned_heads": {},
70
- "remove_invalid_values": false,
71
- "repetition_penalty": 1.0,
72
- "return_dict": true,
73
- "return_dict_in_generate": false,
74
  "rms_norm_eps": 1e-06,
75
  "rope_scaling": {
76
  "factor": 2.0,
@@ -78,142 +32,46 @@
78
  "type": "dynamic"
79
  },
80
  "rope_theta": 1000000.0,
81
- "sep_token_id": null,
82
  "sliding_window": null,
83
- "suppress_tokens": null,
84
- "task_specific_params": null,
85
- "temperature": 1.0,
86
- "tf_legacy_loss": false,
87
- "tie_encoder_decoder": false,
88
- "tie_word_embeddings": false,
89
- "tokenizer_class": null,
90
- "top_k": 50,
91
- "top_p": 1.0,
92
  "torch_dtype": "bfloat16",
93
- "torchscript": false,
94
- "transformers_version": "4.48.3",
95
- "typical_p": 1.0,
96
- "use_bfloat16": true,
97
- "use_cache": false,
98
  "use_sliding_window": false,
99
  "vocab_size": 151674
100
  },
101
- "max_dynamic_patch": 12,
102
- "min_dynamic_patch": 1,
103
- "model_type": "internvl_chat",
104
- "pad2square": false,
105
- "ps_version": "v2",
106
- "select_layer": -1,
107
- "system_message": null,
108
- "template": "internvl2_5",
109
- "tie_word_embeddings": false,
110
  "torch_dtype": "bfloat16",
111
- "transformers_version": null,
112
- "use_backbone_lora": 0,
113
- "use_llm_lora": 0,
114
- "use_thumbnail": true,
115
  "vision_config": {
116
- "_attn_implementation_autoset": true,
117
- "_name_or_path": "OpenGVLab/InternViT-6B-448px-V1-5",
118
- "add_cross_attention": false,
119
  "architectures": [
120
  "InternVisionModel"
121
  ],
 
122
  "attention_dropout": 0.0,
123
- "auto_map": {
124
- "AutoConfig": "configuration_intern_vit.InternVisionConfig",
125
- "AutoModel": "modeling_intern_vit.InternVisionModel"
126
- },
127
- "bad_words_ids": null,
128
- "begin_suppress_tokens": null,
129
- "bos_token_id": null,
130
- "capacity_factor": 1.2,
131
- "chunk_size_feed_forward": 0,
132
- "cross_attention_hidden_size": null,
133
- "decoder_start_token_id": null,
134
- "diversity_penalty": 0.0,
135
- "do_sample": false,
136
- "drop_path_rate": 0.1,
137
  "dropout": 0.0,
138
- "early_stopping": false,
139
- "encoder_no_repeat_ngram_size": 0,
140
- "eos_token_id": null,
141
- "eval_capacity_factor": 1.4,
142
- "exponential_decay_length_penalty": null,
143
- "finetuning_task": null,
144
- "forced_bos_token_id": null,
145
- "forced_eos_token_id": null,
146
  "hidden_act": "gelu",
 
147
  "hidden_size": 1024,
148
- "id2label": {
149
- "0": "LABEL_0",
150
- "1": "LABEL_1"
151
- },
152
- "image_size": 448,
153
  "initializer_factor": 0.1,
154
  "initializer_range": 1e-10,
155
  "intermediate_size": 4096,
156
- "is_decoder": false,
157
- "is_encoder_decoder": false,
158
- "label2id": {
159
- "LABEL_0": 0,
160
- "LABEL_1": 1
161
- },
162
- "laux_allreduce": "all_nodes",
163
  "layer_norm_eps": 1e-06,
164
- "length_penalty": 1.0,
165
- "max_length": 20,
166
- "min_length": 0,
167
- "model_type": "intern_vit_6b",
168
- "moe_coeff_ratio": 0.5,
169
- "moe_intermediate_size": 768,
170
- "moe_output_scale": 4.0,
171
- "no_repeat_ngram_size": 0,
172
- "noisy_gate_policy": "RSample_before",
173
  "norm_type": "layer_norm",
174
  "num_attention_heads": 16,
175
- "num_beam_groups": 1,
176
- "num_beams": 1,
177
  "num_channels": 3,
178
- "num_experts": 8,
179
  "num_hidden_layers": 24,
180
- "num_return_sequences": 1,
181
- "num_routed_experts": 4,
182
- "num_shared_experts": 4,
183
- "output_attentions": false,
184
- "output_hidden_states": false,
185
- "output_scores": false,
186
- "pad_token_id": null,
187
  "patch_size": 14,
188
- "prefix": null,
189
- "problem_type": null,
190
- "pruned_heads": {},
191
- "qk_normalization": false,
192
- "qkv_bias": true,
193
- "remove_invalid_values": false,
194
- "repetition_penalty": 1.0,
195
- "return_dict": true,
196
- "return_dict_in_generate": false,
197
- "sep_token_id": null,
198
- "shared_expert_intermediate_size": 3072,
199
- "suppress_tokens": null,
200
- "task_specific_params": null,
201
- "temperature": 1.0,
202
- "tf_legacy_loss": false,
203
- "tie_encoder_decoder": false,
204
- "tie_word_embeddings": true,
205
- "tokenizer_class": null,
206
- "top_k": 50,
207
- "top_p": 1.0,
208
  "torch_dtype": "bfloat16",
209
- "torchscript": false,
210
- "transformers_version": "4.48.3",
211
- "typical_p": 1.0,
212
- "use_bfloat16": true,
213
- "use_flash_attn": true,
214
- "use_moe": false,
215
- "use_residual": true,
216
- "use_rts": false,
217
- "use_weighted_residual": false
218
- }
219
  }
 
1
  {
2
+ "_name_or_path": "OpenGVLab/InternVL3-2B-hf",
 
3
  "architectures": [
4
+ "InternVLForConditionalGeneration"
5
  ],
6
  "downsample_ratio": 0.5,
7
+ "image_seq_length": 256,
8
+ "image_token_id": 151667,
9
+ "model_type": "internvl",
10
+ "projector_hidden_act": "gelu",
11
+ "text_config": {
 
 
 
12
  "architectures": [
13
  "Qwen2ForCausalLM"
14
  ],
15
  "attention_dropout": 0.0,
 
 
16
  "bos_token_id": 151643,
17
+ "eos_token_id": 151645,
 
 
 
 
 
 
 
 
 
 
 
18
  "hidden_act": "silu",
19
  "hidden_size": 1536,
 
 
 
 
20
  "initializer_range": 0.02,
21
  "intermediate_size": 8960,
 
 
 
 
 
 
 
 
22
  "max_position_embeddings": 32768,
23
  "max_window_layers": 70,
 
24
  "model_type": "qwen2",
 
 
25
  "num_attention_heads": 12,
 
 
26
  "num_hidden_layers": 28,
27
  "num_key_value_heads": 2,
 
 
 
 
 
 
 
 
 
 
 
 
28
  "rms_norm_eps": 1e-06,
29
  "rope_scaling": {
30
  "factor": 2.0,
 
32
  "type": "dynamic"
33
  },
34
  "rope_theta": 1000000.0,
 
35
  "sliding_window": null,
 
 
 
 
 
 
 
 
 
36
  "torch_dtype": "bfloat16",
37
+ "use_cache": true,
 
 
 
 
38
  "use_sliding_window": false,
39
  "vocab_size": 151674
40
  },
 
 
 
 
 
 
 
 
 
41
  "torch_dtype": "bfloat16",
42
+ "transformers_version": "4.52.0.dev0",
 
 
 
43
  "vision_config": {
 
 
 
44
  "architectures": [
45
  "InternVisionModel"
46
  ],
47
+ "attention_bias": true,
48
  "attention_dropout": 0.0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  "dropout": 0.0,
 
 
 
 
 
 
 
 
50
  "hidden_act": "gelu",
51
+ "hidden_dropout_prob": 0.0,
52
  "hidden_size": 1024,
53
+ "image_size": [
54
+ 448,
55
+ 448
56
+ ],
 
57
  "initializer_factor": 0.1,
58
  "initializer_range": 1e-10,
59
  "intermediate_size": 4096,
 
 
 
 
 
 
 
60
  "layer_norm_eps": 1e-06,
61
+ "layer_scale_init_value": 0.1,
62
+ "model_type": "internvl_vision",
 
 
 
 
 
 
 
63
  "norm_type": "layer_norm",
64
  "num_attention_heads": 16,
 
 
65
  "num_channels": 3,
 
66
  "num_hidden_layers": 24,
 
 
 
 
 
 
 
67
  "patch_size": 14,
68
+ "projection_dropout": 0.0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  "torch_dtype": "bfloat16",
70
+ "use_absolute_position_embeddings": true,
71
+ "use_mask_token": false,
72
+ "use_mean_pooling": true,
73
+ "use_qk_norm": false
74
+ },
75
+ "vision_feature_layer": -1,
76
+ "vision_feature_select_strategy": "default"
 
 
 
77
  }
generation_config.json CHANGED
@@ -1,4 +1,6 @@
1
  {
2
  "_from_model_config": true,
3
- "transformers_version": "4.48.3"
 
 
4
  }
 
1
  {
2
  "_from_model_config": true,
3
+ "bos_token_id": 151643,
4
+ "eos_token_id": 151645,
5
+ "transformers_version": "4.52.0.dev0"
6
  }
onnx/decoder_model_merged.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b86d0f6387aff4168f5f8ea8ba138ab1f157f98f96725c884b32596ef681da2
3
- size 812416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f020efc01cdfbe3c959d371dae9353794e3ec74122ef3a4446a9c20123dc1582
3
+ size 1008073
onnx/decoder_model_merged_bnb4.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73fbf069a7de5ef7f3f06933349729d7939fa87bd1abfa13d0024d6a482996b6
3
- size 869427722
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:effc3b5767679ed682e9ecb3669ee91d654dc42a7ab40768469b720e2f928f64
3
+ size 869623343
onnx/decoder_model_merged_fp16.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b853540502c6a8172e2bd518601ba5d1404c82ab276c4ecf8b9e3aa66ead275c
3
- size 851104
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1498a0cfb4872c182079214a0dbce59a16821135dcb8ffb407b2890e63bdc2b8
3
+ size 1046033
onnx/decoder_model_merged_int8.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4096dd92a63e1303f26fd20cea5ffbdc9ac79aa9bb806181178cd506569d3d6f
3
- size 1548732008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c56a19d84d85782454046cf56a1be882a21ae575ee25d341a886f490e226bd18
3
+ size 1548968535
onnx/decoder_model_merged_q4.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87433eb12033da23e7800b6e544bdcc6125d7d038fb819e981876cb66711a485
3
- size 965874026
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71ffb4bdfdb8a62278acb979d7edc2a1633bebc54337791888104393c4780417
3
+ size 966069647
onnx/decoder_model_merged_q4f16.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:067af8e730d05fc1591c6c5ba992261b7229052c020160388df2bd391b938b3a
3
- size 869149396
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a16af4208f7acea05586a68fd8647bd020c03978abbefdd414f5098719300659
3
+ size 869344323
onnx/decoder_model_merged_quantized.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4096dd92a63e1303f26fd20cea5ffbdc9ac79aa9bb806181178cd506569d3d6f
3
- size 1548732008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c56a19d84d85782454046cf56a1be882a21ae575ee25d341a886f490e226bd18
3
+ size 1548968535
onnx/decoder_model_merged_uint8.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7944385d44e1691acec4b522d23491ff97daf54f287d48f2466f7d61ee4ceb1e
3
- size 1548732008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8dc0eeb12a3d5a29d86818b8fc998a5a30e5aa49b452c9c0867b176aa2b8dd19
3
+ size 1548968535
onnx/embed_tokens.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fdf81e17f9bec5199802212ca7e7667e14b1aa839727315bf40d41587b360c73
3
  size 931885394
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe1785d6723c5a6d174d6329d44760f36f43b68a7014bfe8702b592d4802b8be
3
  size 931885394
onnx/embed_tokens_bnb4.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba45088c4dec51d451460f9751c3635bd06ce80083b07d7e2a015116800605bb
3
  size 931885413
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e362b55fc08561241f7591fb2983ac6bd8ba14fd5eed8b5c4d4318e84c4e1e5
3
  size 931885413
onnx/embed_tokens_fp16.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05512597438cbe877479d1707397f08345beec9f3bda56233ecfb83f52a3803b
3
  size 465942894
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a025fdc951e69075017852821ed5e9d8771e759482384d6bdb09a4e7af3ae76
3
  size 465942894
onnx/embed_tokens_q4.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba45088c4dec51d451460f9751c3635bd06ce80083b07d7e2a015116800605bb
3
  size 931885413
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e362b55fc08561241f7591fb2983ac6bd8ba14fd5eed8b5c4d4318e84c4e1e5
3
  size 931885413
onnx/embed_tokens_q4f16.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c4074de851bd10407a308c3769baa2d5a3c977ac8a3b0ec353c791c5e2b7b024
3
  size 465942913
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:deba2fabe018575a7b9b2f3119655cc5ce4c3655113442064ee18b0160e81058
3
  size 465942913
onnx/image_embeds.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:91b7fdf672cb666540e58c78155793bcd85d6d656d6a9468c8a6540f692219a0
3
- size 1251170210
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b309189b0470bc37a5b171a4552be6322ba93ae03d98a331c3416763d0835ef3
3
+ size 1251245175
onnx/image_embeds_bnb4.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:398c754150154676300c795d18fda1c7f079f7688dc0fbe38e2b0d6c311e5c31
3
- size 183358465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74af480b5403ce4dbec7b5671d743669eec4b3b3bf98bcbc56a11c545402b480
3
+ size 183440916
onnx/image_embeds_fp16.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac118eb4074905960e10a77dd6f3d0fdc5b795a12932728c329e826754fc270e
3
- size 627919100
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:772e9f3d27390cff1c113e1997d1f55a2986c9ce73e9f1fea91c5b7ec2fca15f
3
+ size 627994070
onnx/image_embeds_int8.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c16361e8c9ec130d0226569dfd2d016b74a3301dd8f9af6bf807d80b3f32a17b
3
- size 318695126
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74dfec899834faa47e9f8d035e0613aa72bb9e28712b874eed8a2bfe6638b301
3
+ size 318835232
onnx/image_embeds_q4.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:62115536ece81778f9388277b3ca19eb54de01c62578e972ff979fb0298f6a55
3
- size 202772697
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8225bd15503f8f9c69a25443c249d295b4b3a0f1868f0493606b96d49cc9515c
3
+ size 202854764
onnx/image_embeds_q4f16.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2f75f24d5669e061be4fcb472a639249fa2d1890e6599b71490a6993c5ee397
3
- size 181387827
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a59391d8aeb7f20f6f22cae0a30ea26488a52efa526c33e4d69c242a365a290c
3
+ size 181469899
onnx/image_embeds_quantized.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:155423eafc5f92ddd1b03c5e077f07622410c4e4f91bfcd921fef3052267ab78
3
- size 318695126
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:066d908399a1ffc4f26831b51590313faa8f304600996646efc8c1023137ec54
3
+ size 318835232
onnx/image_embeds_uint8.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:155423eafc5f92ddd1b03c5e077f07622410c4e4f91bfcd921fef3052267ab78
3
- size 318695126
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:066d908399a1ffc4f26831b51590313faa8f304600996646efc8c1023137ec54
3
+ size 318835232
special_tokens_map.json CHANGED
@@ -12,8 +12,19 @@
12
  "<|vision_end|>",
13
  "<|vision_pad|>",
14
  "<|image_pad|>",
15
- "<|video_pad|>"
 
 
 
 
 
 
 
 
 
16
  ],
 
 
17
  "eos_token": {
18
  "content": "<|im_end|>",
19
  "lstrip": false,
@@ -27,5 +38,7 @@
27
  "normalized": false,
28
  "rstrip": false,
29
  "single_word": false
30
- }
 
 
31
  }
 
12
  "<|vision_end|>",
13
  "<|vision_pad|>",
14
  "<|image_pad|>",
15
+ "<|video_pad|>",
16
+ "<img>",
17
+ "</img>",
18
+ "<IMG_CONTEXT>",
19
+ "<quad>",
20
+ "</quad>",
21
+ "<ref>",
22
+ "</ref>",
23
+ "<box>",
24
+ "</box>"
25
  ],
26
+ "context_image_token": "<IMG_CONTEXT>",
27
+ "end_image_token": "</img>",
28
  "eos_token": {
29
  "content": "<|im_end|>",
30
  "lstrip": false,
 
38
  "normalized": false,
39
  "rstrip": false,
40
  "single_word": false
41
+ },
42
+ "start_image_token": "<img>",
43
+ "video_token": "<video>"
44
  }
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,6 +1,5 @@
1
  {
2
  "add_bos_token": false,
3
- "add_eos_token": false,
4
  "add_prefix_space": false,
5
  "added_tokens_decoder": {
6
  "151643": {
@@ -250,6 +249,14 @@
250
  "rstrip": false,
251
  "single_word": false,
252
  "special": true
 
 
 
 
 
 
 
 
253
  }
254
  },
255
  "additional_special_tokens": [
@@ -265,17 +272,35 @@
265
  "<|vision_end|>",
266
  "<|vision_pad|>",
267
  "<|image_pad|>",
268
- "<|video_pad|>"
 
 
 
 
 
 
 
 
 
269
  ],
270
  "bos_token": null,
271
- "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
272
  "clean_up_tokenization_spaces": false,
 
 
273
  "eos_token": "<|im_end|>",
274
  "errors": "replace",
275
- "extra_special_tokens": {},
276
- "model_max_length": 12288,
 
 
 
 
 
277
  "pad_token": "<|endoftext|>",
 
278
  "split_special_tokens": false,
 
279
  "tokenizer_class": "Qwen2Tokenizer",
280
- "unk_token": null
 
281
  }
 
1
  {
2
  "add_bos_token": false,
 
3
  "add_prefix_space": false,
4
  "added_tokens_decoder": {
5
  "151643": {
 
249
  "rstrip": false,
250
  "single_word": false,
251
  "special": true
252
+ },
253
+ "151674": {
254
+ "content": "<video>",
255
+ "lstrip": false,
256
+ "normalized": false,
257
+ "rstrip": false,
258
+ "single_word": false,
259
+ "special": true
260
  }
261
  },
262
  "additional_special_tokens": [
 
272
  "<|vision_end|>",
273
  "<|vision_pad|>",
274
  "<|image_pad|>",
275
+ "<|video_pad|>",
276
+ "<img>",
277
+ "</img>",
278
+ "<IMG_CONTEXT>",
279
+ "<quad>",
280
+ "</quad>",
281
+ "<ref>",
282
+ "</ref>",
283
+ "<box>",
284
+ "</box>"
285
  ],
286
  "bos_token": null,
 
287
  "clean_up_tokenization_spaces": false,
288
+ "context_image_token": "<IMG_CONTEXT>",
289
+ "end_image_token": "</img>",
290
  "eos_token": "<|im_end|>",
291
  "errors": "replace",
292
+ "extra_special_tokens": {
293
+ "context_image_token": "<IMG_CONTEXT>",
294
+ "end_image_token": "</img>",
295
+ "start_image_token": "<img>",
296
+ "video_token": "<video>"
297
+ },
298
+ "model_max_length": 8192,
299
  "pad_token": "<|endoftext|>",
300
+ "return_token_type_ids": false,
301
  "split_special_tokens": false,
302
+ "start_image_token": "<img>",
303
  "tokenizer_class": "Qwen2Tokenizer",
304
+ "unk_token": null,
305
+ "video_token": "<video>"
306
  }
vocab.json CHANGED
The diff for this file is too large to render. See raw diff