intfloat commited on
Commit
c713514
·
1 Parent(s): de9df37

Add preprocessor_config.json

Browse files
Files changed (2) hide show
  1. README.md +3 -3
  2. preprocessor_config.json +26 -0
README.md CHANGED
@@ -15,7 +15,7 @@ license: mit
15
  ---
16
  ## mmE5-mllama-11b-instruct
17
 
18
- [mmE5: Improving Multimodal Multilingual Embeddings via High-quality Synthetic Data](https://arxiv.org/abs/2502.08468.pdf). Haonan Chen, Liang Wang, Nan Yang, Yutao Zhu, Ziliang Zhao, Furu Wei, Zhicheng Dou, arXiv 2024
19
 
20
  This model is trained based on [Llama-3.2-11B-Vision](https://huggingface.co/meta-llama/Llama-3.2-11B-Vision).
21
 
@@ -50,7 +50,6 @@ from PIL import Image
50
  import numpy as np
51
  model_args = ModelArguments(
52
  model_name='intfloat/mmE5-mllama-11b-instruct',
53
- processor_name='meta-llama/Llama-3.2-11B-Vision',
54
  pooling='last',
55
  normalize=True,
56
  model_backbone='mllama')
@@ -104,4 +103,5 @@ print(string, '=', model.compute_similarity(qry_output, tgt_output))
104
  journal={arXiv preprint arXiv:2502.08468},
105
  year={2025}
106
  }
107
- ```
 
 
15
  ---
16
  ## mmE5-mllama-11b-instruct
17
 
18
+ [mmE5: Improving Multimodal Multilingual Embeddings via High-quality Synthetic Data](https://arxiv.org/abs/2502.08468.pdf). Haonan Chen, Liang Wang, Nan Yang, Yutao Zhu, Ziliang Zhao, Furu Wei, Zhicheng Dou, arXiv 2025
19
 
20
  This model is trained based on [Llama-3.2-11B-Vision](https://huggingface.co/meta-llama/Llama-3.2-11B-Vision).
21
 
 
50
  import numpy as np
51
  model_args = ModelArguments(
52
  model_name='intfloat/mmE5-mllama-11b-instruct',
 
53
  pooling='last',
54
  normalize=True,
55
  model_backbone='mllama')
 
103
  journal={arXiv preprint arXiv:2502.08468},
104
  year={2025}
105
  }
106
+ ```
107
+
preprocessor_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": true,
3
+ "do_normalize": true,
4
+ "do_pad": true,
5
+ "do_rescale": true,
6
+ "do_resize": true,
7
+ "image_mean": [
8
+ 0.48145466,
9
+ 0.4578275,
10
+ 0.40821073
11
+ ],
12
+ "image_processor_type": "MllamaImageProcessor",
13
+ "image_std": [
14
+ 0.26862954,
15
+ 0.26130258,
16
+ 0.27577711
17
+ ],
18
+ "max_image_tiles": 4,
19
+ "processor_class": "MllamaProcessor",
20
+ "resample": 2,
21
+ "rescale_factor": 0.00392156862745098,
22
+ "size": {
23
+ "height": 448,
24
+ "width": 448
25
+ }
26
+ }