danielhanchen commited on
Commit
951d1ec
·
verified ·
1 Parent(s): cf32fdb

Add files using upload-large-folder tool

Browse files
config.json CHANGED
@@ -1,10 +1,9 @@
1
  {
2
- "_name_or_path": "Qwen/Qwen2-VL-72B-Instruct",
3
  "architectures": [
4
  "Qwen2VLForConditionalGeneration"
5
  ],
6
  "attention_dropout": 0.0,
7
- "bos_token_id": 151643,
8
  "eos_token_id": 151645,
9
  "hidden_act": "silu",
10
  "hidden_size": 8192,
@@ -27,7 +26,12 @@
27
  "bnb_4bit_use_double_quant": true,
28
  "llm_int8_enable_fp32_cpu_offload": false,
29
  "llm_int8_has_fp16_weight": false,
30
- "llm_int8_skip_modules": null,
 
 
 
 
 
31
  "llm_int8_threshold": 6.0,
32
  "load_in_4bit": true,
33
  "load_in_8bit": false,
@@ -47,7 +51,7 @@
47
  "sliding_window": 32768,
48
  "tie_word_embeddings": false,
49
  "torch_dtype": "bfloat16",
50
- "transformers_version": "4.46.3",
51
  "unsloth_fixed": true,
52
  "use_cache": true,
53
  "use_sliding_window": false,
@@ -56,7 +60,8 @@
56
  "hidden_size": 8192,
57
  "in_chans": 3,
58
  "model_type": "qwen2_vl",
59
- "spatial_patch_size": 14
 
60
  },
61
  "vision_end_token_id": 151653,
62
  "vision_start_token_id": 151652,
 
1
  {
2
+ "_name_or_path": "unsloth/Qwen2-VL-72B-Instruct",
3
  "architectures": [
4
  "Qwen2VLForConditionalGeneration"
5
  ],
6
  "attention_dropout": 0.0,
 
7
  "eos_token_id": 151645,
8
  "hidden_act": "silu",
9
  "hidden_size": 8192,
 
26
  "bnb_4bit_use_double_quant": true,
27
  "llm_int8_enable_fp32_cpu_offload": false,
28
  "llm_int8_has_fp16_weight": false,
29
+ "llm_int8_skip_modules": [
30
+ "lm_head",
31
+ "multi_modal_projector",
32
+ "merger",
33
+ "modality_projection"
34
+ ],
35
  "llm_int8_threshold": 6.0,
36
  "load_in_4bit": true,
37
  "load_in_8bit": false,
 
51
  "sliding_window": 32768,
52
  "tie_word_embeddings": false,
53
  "torch_dtype": "bfloat16",
54
+ "transformers_version": "4.49.0",
55
  "unsloth_fixed": true,
56
  "use_cache": true,
57
  "use_sliding_window": false,
 
60
  "hidden_size": 8192,
61
  "in_chans": 3,
62
  "model_type": "qwen2_vl",
63
+ "spatial_patch_size": 14,
64
+ "torch_dtype": "bfloat16"
65
  },
66
  "vision_end_token_id": 151653,
67
  "vision_start_token_id": 151652,
generation_config.json CHANGED
@@ -10,5 +10,5 @@
10
  "repetition_penalty": 1.05,
11
  "top_k": 1,
12
  "top_p": 0.001,
13
- "transformers_version": "4.46.3"
14
  }
 
10
  "repetition_penalty": 1.05,
11
  "top_k": 1,
12
  "top_p": 0.001,
13
+ "transformers_version": "4.49.0"
14
  }
model-00001-of-00009.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb657cd6fd71c36f9e32dccc9fbf63ce4edcdc91ce6c59b905b388866c2a81f4
3
- size 4994653119
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e13715a3a74d8069d3ce48f2707a015b4d2961ad5e72e99fdd2631de1d97153
3
+ size 4970847674
model-00002-of-00009.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a3e1ad376a674af43cea78e6bef3ca47ec18ff094e4a0899a160a2204cbf75e
3
- size 4981068110
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c549de5db4f49f14ca490d8eb55dc5d6cdbbd18401ad5534a042079e20e04b62
3
+ size 4981068103
model-00003-of-00009.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:099a221caefd20edde061677478df495db15bfd1d105b40c90053cbb261765f3
3
- size 4981068356
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa82dbbd5a0da35c3d52f55574be811561859f861ec59d9457231a15eecf0803
3
+ size 4981068373
model-00004-of-00009.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2c7918bb36f45562fa3661bbbc9b27fc9ade4b075bd0910b6bb2cac1de9a088
3
- size 4981068368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65bdd4c5d14745baacb47035550b14e5b91bc8e916d8f24f892796abc2e63db6
3
+ size 4981068384
model-00005-of-00009.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3fee0a6a61d2069170a6d05e9af28db5d8aedda19d2c1a80eaa4ab0fb1e134a0
3
- size 4981068366
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d25758df29ad0229f4c511766fcbfa0a1c4fd61c8363483b808aa82e76a53314
3
+ size 4981068383
model-00006-of-00009.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:53df35a4aaecd9229b86e2d1bc264c134de541099a7f7a3c73a0bf6c4d6c8cbb
3
- size 4981068358
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9a9a17004df3311144e7b8f739cf43d20c4ed784c2698735d42eab662885ef4
3
+ size 4981068372
model-00007-of-00009.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:007083b98c30c543981785b022d4ca8a2f0a7f75c690925059c8f2155e29059a
3
- size 4981068353
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3f93c26d32b06d3de4cfb76d48924b88c8c29204a7bd6e73ccdac99e486f3b8
3
+ size 4981068371
model-00008-of-00009.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3c51503da3bd8d9b09d38fbeef232de7179aec7ee86b20b5d891c3a33e5ece2
3
- size 4200425336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1779cff4999f4c72280f47ef1c40f421913d8b6f6b9dd1821c5efbf5f90e5f8f
3
+ size 4325381691
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 41572342422
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00009-of-00009.safetensors",
@@ -347,12 +347,12 @@
347
  "model.layers.15.mlp.gate_proj.weight.nested_quant_map": "model-00002-of-00009.safetensors",
348
  "model.layers.15.mlp.gate_proj.weight.quant_map": "model-00002-of-00009.safetensors",
349
  "model.layers.15.mlp.gate_proj.weight.quant_state.bitsandbytes__nf4": "model-00002-of-00009.safetensors",
350
- "model.layers.15.mlp.up_proj.weight": "model-00002-of-00009.safetensors",
351
- "model.layers.15.mlp.up_proj.weight.absmax": "model-00002-of-00009.safetensors",
352
- "model.layers.15.mlp.up_proj.weight.nested_absmax": "model-00002-of-00009.safetensors",
353
- "model.layers.15.mlp.up_proj.weight.nested_quant_map": "model-00002-of-00009.safetensors",
354
- "model.layers.15.mlp.up_proj.weight.quant_map": "model-00002-of-00009.safetensors",
355
- "model.layers.15.mlp.up_proj.weight.quant_state.bitsandbytes__nf4": "model-00002-of-00009.safetensors",
356
  "model.layers.15.post_attention_layernorm.weight": "model-00003-of-00009.safetensors",
357
  "model.layers.15.self_attn.k_proj.bias": "model-00002-of-00009.safetensors",
358
  "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00009.safetensors",
@@ -911,12 +911,12 @@
911
  "model.layers.26.mlp.gate_proj.weight.nested_quant_map": "model-00003-of-00009.safetensors",
912
  "model.layers.26.mlp.gate_proj.weight.quant_map": "model-00003-of-00009.safetensors",
913
  "model.layers.26.mlp.gate_proj.weight.quant_state.bitsandbytes__nf4": "model-00003-of-00009.safetensors",
914
- "model.layers.26.mlp.up_proj.weight": "model-00003-of-00009.safetensors",
915
- "model.layers.26.mlp.up_proj.weight.absmax": "model-00003-of-00009.safetensors",
916
- "model.layers.26.mlp.up_proj.weight.nested_absmax": "model-00003-of-00009.safetensors",
917
- "model.layers.26.mlp.up_proj.weight.nested_quant_map": "model-00003-of-00009.safetensors",
918
- "model.layers.26.mlp.up_proj.weight.quant_map": "model-00003-of-00009.safetensors",
919
- "model.layers.26.mlp.up_proj.weight.quant_state.bitsandbytes__nf4": "model-00003-of-00009.safetensors",
920
  "model.layers.26.post_attention_layernorm.weight": "model-00004-of-00009.safetensors",
921
  "model.layers.26.self_attn.k_proj.bias": "model-00003-of-00009.safetensors",
922
  "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00009.safetensors",
@@ -1475,12 +1475,12 @@
1475
  "model.layers.37.mlp.gate_proj.weight.nested_quant_map": "model-00004-of-00009.safetensors",
1476
  "model.layers.37.mlp.gate_proj.weight.quant_map": "model-00004-of-00009.safetensors",
1477
  "model.layers.37.mlp.gate_proj.weight.quant_state.bitsandbytes__nf4": "model-00004-of-00009.safetensors",
1478
- "model.layers.37.mlp.up_proj.weight": "model-00004-of-00009.safetensors",
1479
- "model.layers.37.mlp.up_proj.weight.absmax": "model-00004-of-00009.safetensors",
1480
- "model.layers.37.mlp.up_proj.weight.nested_absmax": "model-00004-of-00009.safetensors",
1481
- "model.layers.37.mlp.up_proj.weight.nested_quant_map": "model-00004-of-00009.safetensors",
1482
- "model.layers.37.mlp.up_proj.weight.quant_map": "model-00004-of-00009.safetensors",
1483
- "model.layers.37.mlp.up_proj.weight.quant_state.bitsandbytes__nf4": "model-00004-of-00009.safetensors",
1484
  "model.layers.37.post_attention_layernorm.weight": "model-00005-of-00009.safetensors",
1485
  "model.layers.37.self_attn.k_proj.bias": "model-00004-of-00009.safetensors",
1486
  "model.layers.37.self_attn.k_proj.weight": "model-00004-of-00009.safetensors",
@@ -1616,12 +1616,12 @@
1616
  "model.layers.4.mlp.gate_proj.weight.nested_quant_map": "model-00001-of-00009.safetensors",
1617
  "model.layers.4.mlp.gate_proj.weight.quant_map": "model-00001-of-00009.safetensors",
1618
  "model.layers.4.mlp.gate_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00009.safetensors",
1619
- "model.layers.4.mlp.up_proj.weight": "model-00001-of-00009.safetensors",
1620
- "model.layers.4.mlp.up_proj.weight.absmax": "model-00001-of-00009.safetensors",
1621
- "model.layers.4.mlp.up_proj.weight.nested_absmax": "model-00001-of-00009.safetensors",
1622
- "model.layers.4.mlp.up_proj.weight.nested_quant_map": "model-00001-of-00009.safetensors",
1623
- "model.layers.4.mlp.up_proj.weight.quant_map": "model-00001-of-00009.safetensors",
1624
- "model.layers.4.mlp.up_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00009.safetensors",
1625
  "model.layers.4.post_attention_layernorm.weight": "model-00002-of-00009.safetensors",
1626
  "model.layers.4.self_attn.k_proj.bias": "model-00001-of-00009.safetensors",
1627
  "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00009.safetensors",
@@ -2039,12 +2039,12 @@
2039
  "model.layers.48.mlp.gate_proj.weight.nested_quant_map": "model-00005-of-00009.safetensors",
2040
  "model.layers.48.mlp.gate_proj.weight.quant_map": "model-00005-of-00009.safetensors",
2041
  "model.layers.48.mlp.gate_proj.weight.quant_state.bitsandbytes__nf4": "model-00005-of-00009.safetensors",
2042
- "model.layers.48.mlp.up_proj.weight": "model-00005-of-00009.safetensors",
2043
- "model.layers.48.mlp.up_proj.weight.absmax": "model-00005-of-00009.safetensors",
2044
- "model.layers.48.mlp.up_proj.weight.nested_absmax": "model-00005-of-00009.safetensors",
2045
- "model.layers.48.mlp.up_proj.weight.nested_quant_map": "model-00005-of-00009.safetensors",
2046
- "model.layers.48.mlp.up_proj.weight.quant_map": "model-00005-of-00009.safetensors",
2047
- "model.layers.48.mlp.up_proj.weight.quant_state.bitsandbytes__nf4": "model-00005-of-00009.safetensors",
2048
  "model.layers.48.post_attention_layernorm.weight": "model-00006-of-00009.safetensors",
2049
  "model.layers.48.self_attn.k_proj.bias": "model-00005-of-00009.safetensors",
2050
  "model.layers.48.self_attn.k_proj.weight": "model-00005-of-00009.safetensors",
@@ -2603,12 +2603,12 @@
2603
  "model.layers.59.mlp.gate_proj.weight.nested_quant_map": "model-00006-of-00009.safetensors",
2604
  "model.layers.59.mlp.gate_proj.weight.quant_map": "model-00006-of-00009.safetensors",
2605
  "model.layers.59.mlp.gate_proj.weight.quant_state.bitsandbytes__nf4": "model-00006-of-00009.safetensors",
2606
- "model.layers.59.mlp.up_proj.weight": "model-00006-of-00009.safetensors",
2607
- "model.layers.59.mlp.up_proj.weight.absmax": "model-00006-of-00009.safetensors",
2608
- "model.layers.59.mlp.up_proj.weight.nested_absmax": "model-00006-of-00009.safetensors",
2609
- "model.layers.59.mlp.up_proj.weight.nested_quant_map": "model-00006-of-00009.safetensors",
2610
- "model.layers.59.mlp.up_proj.weight.quant_map": "model-00006-of-00009.safetensors",
2611
- "model.layers.59.mlp.up_proj.weight.quant_state.bitsandbytes__nf4": "model-00006-of-00009.safetensors",
2612
  "model.layers.59.post_attention_layernorm.weight": "model-00007-of-00009.safetensors",
2613
  "model.layers.59.self_attn.k_proj.bias": "model-00006-of-00009.safetensors",
2614
  "model.layers.59.self_attn.k_proj.weight": "model-00006-of-00009.safetensors",
@@ -3214,12 +3214,12 @@
3214
  "model.layers.70.mlp.gate_proj.weight.nested_quant_map": "model-00007-of-00009.safetensors",
3215
  "model.layers.70.mlp.gate_proj.weight.quant_map": "model-00007-of-00009.safetensors",
3216
  "model.layers.70.mlp.gate_proj.weight.quant_state.bitsandbytes__nf4": "model-00007-of-00009.safetensors",
3217
- "model.layers.70.mlp.up_proj.weight": "model-00007-of-00009.safetensors",
3218
- "model.layers.70.mlp.up_proj.weight.absmax": "model-00007-of-00009.safetensors",
3219
- "model.layers.70.mlp.up_proj.weight.nested_absmax": "model-00007-of-00009.safetensors",
3220
- "model.layers.70.mlp.up_proj.weight.nested_quant_map": "model-00007-of-00009.safetensors",
3221
- "model.layers.70.mlp.up_proj.weight.quant_map": "model-00007-of-00009.safetensors",
3222
- "model.layers.70.mlp.up_proj.weight.quant_state.bitsandbytes__nf4": "model-00007-of-00009.safetensors",
3223
  "model.layers.70.post_attention_layernorm.weight": "model-00008-of-00009.safetensors",
3224
  "model.layers.70.self_attn.k_proj.bias": "model-00007-of-00009.safetensors",
3225
  "model.layers.70.self_attn.k_proj.weight": "model-00007-of-00009.safetensors",
@@ -4794,18 +4794,8 @@
4794
  "visual.merger.ln_q.weight": "model-00001-of-00009.safetensors",
4795
  "visual.merger.mlp.0.bias": "model-00001-of-00009.safetensors",
4796
  "visual.merger.mlp.0.weight": "model-00001-of-00009.safetensors",
4797
- "visual.merger.mlp.0.weight.absmax": "model-00001-of-00009.safetensors",
4798
- "visual.merger.mlp.0.weight.nested_absmax": "model-00001-of-00009.safetensors",
4799
- "visual.merger.mlp.0.weight.nested_quant_map": "model-00001-of-00009.safetensors",
4800
- "visual.merger.mlp.0.weight.quant_map": "model-00001-of-00009.safetensors",
4801
- "visual.merger.mlp.0.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00009.safetensors",
4802
  "visual.merger.mlp.2.bias": "model-00001-of-00009.safetensors",
4803
  "visual.merger.mlp.2.weight": "model-00001-of-00009.safetensors",
4804
- "visual.merger.mlp.2.weight.absmax": "model-00001-of-00009.safetensors",
4805
- "visual.merger.mlp.2.weight.nested_absmax": "model-00001-of-00009.safetensors",
4806
- "visual.merger.mlp.2.weight.nested_quant_map": "model-00001-of-00009.safetensors",
4807
- "visual.merger.mlp.2.weight.quant_map": "model-00001-of-00009.safetensors",
4808
- "visual.merger.mlp.2.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00009.safetensors",
4809
  "visual.patch_embed.proj.weight": "model-00001-of-00009.safetensors"
4810
  }
4811
  }
 
1
  {
2
  "metadata": {
3
+ "total_size": 41673494463
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00009-of-00009.safetensors",
 
347
  "model.layers.15.mlp.gate_proj.weight.nested_quant_map": "model-00002-of-00009.safetensors",
348
  "model.layers.15.mlp.gate_proj.weight.quant_map": "model-00002-of-00009.safetensors",
349
  "model.layers.15.mlp.gate_proj.weight.quant_state.bitsandbytes__nf4": "model-00002-of-00009.safetensors",
350
+ "model.layers.15.mlp.up_proj.weight": "model-00003-of-00009.safetensors",
351
+ "model.layers.15.mlp.up_proj.weight.absmax": "model-00003-of-00009.safetensors",
352
+ "model.layers.15.mlp.up_proj.weight.nested_absmax": "model-00003-of-00009.safetensors",
353
+ "model.layers.15.mlp.up_proj.weight.nested_quant_map": "model-00003-of-00009.safetensors",
354
+ "model.layers.15.mlp.up_proj.weight.quant_map": "model-00003-of-00009.safetensors",
355
+ "model.layers.15.mlp.up_proj.weight.quant_state.bitsandbytes__nf4": "model-00003-of-00009.safetensors",
356
  "model.layers.15.post_attention_layernorm.weight": "model-00003-of-00009.safetensors",
357
  "model.layers.15.self_attn.k_proj.bias": "model-00002-of-00009.safetensors",
358
  "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00009.safetensors",
 
911
  "model.layers.26.mlp.gate_proj.weight.nested_quant_map": "model-00003-of-00009.safetensors",
912
  "model.layers.26.mlp.gate_proj.weight.quant_map": "model-00003-of-00009.safetensors",
913
  "model.layers.26.mlp.gate_proj.weight.quant_state.bitsandbytes__nf4": "model-00003-of-00009.safetensors",
914
+ "model.layers.26.mlp.up_proj.weight": "model-00004-of-00009.safetensors",
915
+ "model.layers.26.mlp.up_proj.weight.absmax": "model-00004-of-00009.safetensors",
916
+ "model.layers.26.mlp.up_proj.weight.nested_absmax": "model-00004-of-00009.safetensors",
917
+ "model.layers.26.mlp.up_proj.weight.nested_quant_map": "model-00004-of-00009.safetensors",
918
+ "model.layers.26.mlp.up_proj.weight.quant_map": "model-00004-of-00009.safetensors",
919
+ "model.layers.26.mlp.up_proj.weight.quant_state.bitsandbytes__nf4": "model-00004-of-00009.safetensors",
920
  "model.layers.26.post_attention_layernorm.weight": "model-00004-of-00009.safetensors",
921
  "model.layers.26.self_attn.k_proj.bias": "model-00003-of-00009.safetensors",
922
  "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00009.safetensors",
 
1475
  "model.layers.37.mlp.gate_proj.weight.nested_quant_map": "model-00004-of-00009.safetensors",
1476
  "model.layers.37.mlp.gate_proj.weight.quant_map": "model-00004-of-00009.safetensors",
1477
  "model.layers.37.mlp.gate_proj.weight.quant_state.bitsandbytes__nf4": "model-00004-of-00009.safetensors",
1478
+ "model.layers.37.mlp.up_proj.weight": "model-00005-of-00009.safetensors",
1479
+ "model.layers.37.mlp.up_proj.weight.absmax": "model-00005-of-00009.safetensors",
1480
+ "model.layers.37.mlp.up_proj.weight.nested_absmax": "model-00005-of-00009.safetensors",
1481
+ "model.layers.37.mlp.up_proj.weight.nested_quant_map": "model-00005-of-00009.safetensors",
1482
+ "model.layers.37.mlp.up_proj.weight.quant_map": "model-00005-of-00009.safetensors",
1483
+ "model.layers.37.mlp.up_proj.weight.quant_state.bitsandbytes__nf4": "model-00005-of-00009.safetensors",
1484
  "model.layers.37.post_attention_layernorm.weight": "model-00005-of-00009.safetensors",
1485
  "model.layers.37.self_attn.k_proj.bias": "model-00004-of-00009.safetensors",
1486
  "model.layers.37.self_attn.k_proj.weight": "model-00004-of-00009.safetensors",
 
1616
  "model.layers.4.mlp.gate_proj.weight.nested_quant_map": "model-00001-of-00009.safetensors",
1617
  "model.layers.4.mlp.gate_proj.weight.quant_map": "model-00001-of-00009.safetensors",
1618
  "model.layers.4.mlp.gate_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00009.safetensors",
1619
+ "model.layers.4.mlp.up_proj.weight": "model-00002-of-00009.safetensors",
1620
+ "model.layers.4.mlp.up_proj.weight.absmax": "model-00002-of-00009.safetensors",
1621
+ "model.layers.4.mlp.up_proj.weight.nested_absmax": "model-00002-of-00009.safetensors",
1622
+ "model.layers.4.mlp.up_proj.weight.nested_quant_map": "model-00002-of-00009.safetensors",
1623
+ "model.layers.4.mlp.up_proj.weight.quant_map": "model-00002-of-00009.safetensors",
1624
+ "model.layers.4.mlp.up_proj.weight.quant_state.bitsandbytes__nf4": "model-00002-of-00009.safetensors",
1625
  "model.layers.4.post_attention_layernorm.weight": "model-00002-of-00009.safetensors",
1626
  "model.layers.4.self_attn.k_proj.bias": "model-00001-of-00009.safetensors",
1627
  "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00009.safetensors",
 
2039
  "model.layers.48.mlp.gate_proj.weight.nested_quant_map": "model-00005-of-00009.safetensors",
2040
  "model.layers.48.mlp.gate_proj.weight.quant_map": "model-00005-of-00009.safetensors",
2041
  "model.layers.48.mlp.gate_proj.weight.quant_state.bitsandbytes__nf4": "model-00005-of-00009.safetensors",
2042
+ "model.layers.48.mlp.up_proj.weight": "model-00006-of-00009.safetensors",
2043
+ "model.layers.48.mlp.up_proj.weight.absmax": "model-00006-of-00009.safetensors",
2044
+ "model.layers.48.mlp.up_proj.weight.nested_absmax": "model-00006-of-00009.safetensors",
2045
+ "model.layers.48.mlp.up_proj.weight.nested_quant_map": "model-00006-of-00009.safetensors",
2046
+ "model.layers.48.mlp.up_proj.weight.quant_map": "model-00006-of-00009.safetensors",
2047
+ "model.layers.48.mlp.up_proj.weight.quant_state.bitsandbytes__nf4": "model-00006-of-00009.safetensors",
2048
  "model.layers.48.post_attention_layernorm.weight": "model-00006-of-00009.safetensors",
2049
  "model.layers.48.self_attn.k_proj.bias": "model-00005-of-00009.safetensors",
2050
  "model.layers.48.self_attn.k_proj.weight": "model-00005-of-00009.safetensors",
 
2603
  "model.layers.59.mlp.gate_proj.weight.nested_quant_map": "model-00006-of-00009.safetensors",
2604
  "model.layers.59.mlp.gate_proj.weight.quant_map": "model-00006-of-00009.safetensors",
2605
  "model.layers.59.mlp.gate_proj.weight.quant_state.bitsandbytes__nf4": "model-00006-of-00009.safetensors",
2606
+ "model.layers.59.mlp.up_proj.weight": "model-00007-of-00009.safetensors",
2607
+ "model.layers.59.mlp.up_proj.weight.absmax": "model-00007-of-00009.safetensors",
2608
+ "model.layers.59.mlp.up_proj.weight.nested_absmax": "model-00007-of-00009.safetensors",
2609
+ "model.layers.59.mlp.up_proj.weight.nested_quant_map": "model-00007-of-00009.safetensors",
2610
+ "model.layers.59.mlp.up_proj.weight.quant_map": "model-00007-of-00009.safetensors",
2611
+ "model.layers.59.mlp.up_proj.weight.quant_state.bitsandbytes__nf4": "model-00007-of-00009.safetensors",
2612
  "model.layers.59.post_attention_layernorm.weight": "model-00007-of-00009.safetensors",
2613
  "model.layers.59.self_attn.k_proj.bias": "model-00006-of-00009.safetensors",
2614
  "model.layers.59.self_attn.k_proj.weight": "model-00006-of-00009.safetensors",
 
3214
  "model.layers.70.mlp.gate_proj.weight.nested_quant_map": "model-00007-of-00009.safetensors",
3215
  "model.layers.70.mlp.gate_proj.weight.quant_map": "model-00007-of-00009.safetensors",
3216
  "model.layers.70.mlp.gate_proj.weight.quant_state.bitsandbytes__nf4": "model-00007-of-00009.safetensors",
3217
+ "model.layers.70.mlp.up_proj.weight": "model-00008-of-00009.safetensors",
3218
+ "model.layers.70.mlp.up_proj.weight.absmax": "model-00008-of-00009.safetensors",
3219
+ "model.layers.70.mlp.up_proj.weight.nested_absmax": "model-00008-of-00009.safetensors",
3220
+ "model.layers.70.mlp.up_proj.weight.nested_quant_map": "model-00008-of-00009.safetensors",
3221
+ "model.layers.70.mlp.up_proj.weight.quant_map": "model-00008-of-00009.safetensors",
3222
+ "model.layers.70.mlp.up_proj.weight.quant_state.bitsandbytes__nf4": "model-00008-of-00009.safetensors",
3223
  "model.layers.70.post_attention_layernorm.weight": "model-00008-of-00009.safetensors",
3224
  "model.layers.70.self_attn.k_proj.bias": "model-00007-of-00009.safetensors",
3225
  "model.layers.70.self_attn.k_proj.weight": "model-00007-of-00009.safetensors",
 
4794
  "visual.merger.ln_q.weight": "model-00001-of-00009.safetensors",
4795
  "visual.merger.mlp.0.bias": "model-00001-of-00009.safetensors",
4796
  "visual.merger.mlp.0.weight": "model-00001-of-00009.safetensors",
 
 
 
 
 
4797
  "visual.merger.mlp.2.bias": "model-00001-of-00009.safetensors",
4798
  "visual.merger.mlp.2.weight": "model-00001-of-00009.safetensors",
 
 
 
 
 
4799
  "visual.patch_embed.proj.weight": "model-00001-of-00009.safetensors"
4800
  }
4801
  }
preprocessor_config.json CHANGED
@@ -22,8 +22,8 @@
22
  "resample": 3,
23
  "rescale_factor": 0.00392156862745098,
24
  "size": {
25
- "max_pixels": 12845056,
26
- "min_pixels": 3136
27
  },
28
  "temporal_patch_size": 2
29
  }
 
22
  "resample": 3,
23
  "rescale_factor": 0.00392156862745098,
24
  "size": {
25
+ "longest_edge": 12845056,
26
+ "shortest_edge": 3136
27
  },
28
  "temporal_patch_size": 2
29
  }
tokenizer_config.json CHANGED
@@ -134,6 +134,7 @@
134
  "clean_up_tokenization_spaces": false,
135
  "eos_token": "<|im_end|>",
136
  "errors": "replace",
 
137
  "model_max_length": 32768,
138
  "pad_token": "<|vision_pad|>",
139
  "padding_side": "left",
 
134
  "clean_up_tokenization_spaces": false,
135
  "eos_token": "<|im_end|>",
136
  "errors": "replace",
137
+ "extra_special_tokens": {},
138
  "model_max_length": 32768,
139
  "pad_token": "<|vision_pad|>",
140
  "padding_side": "left",