zwgao commited on
Commit
5312ec6
·
verified ·
1 Parent(s): bf3e2cf

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +28 -28
README.md CHANGED
@@ -55,43 +55,43 @@ InternVL 2.5is a multimodal large language model series, featuring models of var
55
 
56
  | Benchmark | GPT-4V | GPT-4o-20240513 | Claude-3-Opus | Claude-3.5-Sonnet | Gemini-1.5-Pro | LLaVA-OneVision-72B | Qwen2-VL-72B | InternVL2.5-78B |
57
  |----------------------------|-------------|-----------------|---------------|-------------------|----------------|---------------------|--------------|-----------------|
58
- | MMMU (val) | 63.1 | 69.1 | - | 68.3 | 62.2 | 56.8 | 64.5 | 70.1 |
59
- | MMMU (test) | - | - | - | - | - | - | - | 61.8 |
60
- | MMMU-PRO (overall) | - | 51.9 | - | 51.5 | 46.9 | 31.0 | 46.2 | 48.6 |
61
- | MathVista (mini) | 58.1 | 63.8 | - | 67.7 | 63.9 | 67.5 | 70.5 | 72.3 |
62
- | MathVision (mini) | - | - | - | - | - | - | - | 34.9 |
63
- | MathVision (full) | 24.0 | 30.4 | - | - | 19.2 | - | 25.9 | 32.2 |
64
- | MathVerse (mini) | 32.8 | 50.2 | - | - | - | 39.1 | - | 51.7 |
65
  | Olympiad Bench | 18.0 | 25.9 | - | - | - | - | - | 11.6 |
66
- | AI2D (w / wo M) | 78.2 / 89.4 | 84.6 / 94.2 | 70.6 / 88.1 | 81.2 / 94.7 | 79.1 / 94.4 | 85.6 / - | 88.1 / - | 89.1 / 95.7 |
67
- | ChartQA (test avg.) | 78.5 | 85.7 | 80.8 | 90.8 | 87.2 | 83.7 | 88.3 | 88.3 |
68
- | TextVQA (val) | 78.0 | 77.4 | 67.5 | 74.1 | 78.8 | 80.5 | 85.5 | 83.4 |
69
- | DocVQA (test) | 88.4 | 92.8 | 89.3 | 95.2 | 93.1 | 91.3 | 96.5 | 95.1 |
70
- | InfoVQA (test) | 75.1 | 79.2 | 55.6 | 74.3 | 81.0 | 74.9 | 84.5 | 84.1 |
71
  | OCR-Bench | 645 | 736 | 694 | 788 | 754 | 741 | 877 | 854 |
72
  | SEED-2 Plus | 53.8 | 72.0 | 44.2 | 71.7 | - | 69.7 | - | 71.3 |
73
- | CharXiv (RQ / DQ) | 37.1 / 79.9 | 47.1 / 84.5 | 30.2 / 71.6 | 60.2 / 84.3 | 43.3 / 72.0 | - | 91.3 / 94.6 | 42.4 / 82.3 |
74
- | VCR-EN-Easy (EM / Jaccard) | 52.0 / 65.4 | 91.6 / 96.4 | 62.0 / 77.7 | 63.9 / 74.7 | 62.7 / 77.7 | - | 94.6 | 95.7 / 94.5 |
75
- | BLINK (val) | 54.6 | 68.0 | - | - | - | 55.4 | - | 63.8 |
76
  | Mantis Eval | 62.7 | - | - | - | - | 77.6 | - | 77.0 |
77
  | MMIU | - | 55.7 | - | 53.4 | 53.4 | - | - | 55.8 |
78
  | Muir Bench | 62.3 | 68.0 | - | - | - | 54.8 | - | 63.5 |
79
- | MMT (val) | 64.3 | 65.4 | - | - | 64.5 | - | 71.8 | 70.8 |
80
- | MIRB (avg.) | 53.1 | - | - | - | - | - | - | 61.1 |
81
  | RealWorld QA | 61.4 | 75.4 | - | 60.1 | 67.5 | 71.9 | 77.8 | 78.7 |
82
- | MME-RW (EN) | - | 45.2 | - | 51.6 | 38.2 | - | - | 62.9 |
83
- | WildVision (win rate) | 71.8 | 80.6 | - | - | - | - | - | 71.4 |
84
  | R-Bench | 65.6 | 77.7 | - | - | - | - | - | 77.2 |
85
- | MME (sum) | 1926.6 | -- | 1586.8 | -- | -- | 2261.0 | 2482.7 | 2494.5 |
86
- | MMB (EN / CN) | 81.0 / 80.2 | 83.4 / 82.1 | 63.3 / 59.2 | 82.6 / 83.5 | 73.9 / 73.8 | 85.8 / 85.3 | 86.5 / 86.6 | 88.3 / 88.5 |
87
- | MMBv1.1 (EN) | 80.0 | 83.1 | 60.1 | 80.9 | 74.6 | 85.0 | 85.9 | 87.4 |
88
- | MMVet (turbo) | 67.5 | 69.1 | 51.7 | 70.1 | 64.0 | 60.6 | 74.0 | 72.3 |
89
- | MMVetv2 (0613) | 66.3 | 71.0 | 55.8 | 71.8 | 66.9 | -- | 66.9 | 65.5 |
90
  | MMStar | 56.0 | 64.7 | 45.7 | 65.1 | 59.1 | 65.8 | 68.3 | 69.5 |
91
- | HallBench (avg.) | 46.5 | 55.0 | 37.8 | 55.5 | 45.6 | 49.0 | 58.1 | 57.4 |
92
- | MMHal (score) | -- | 4.00 | -- | -- | -- | -- | -- | 3.89 |
93
- | CRPE (relation) | -- | 76.6 | -- | -- | -- | -- | -- | 78.8 |
94
- | POPE (avg.) | -- | 86.9 | -- | -- | -- | -- | -- | 90.8 |
95
 
96
 
97
  ### Video Benchmarks
 
55
 
56
  | Benchmark | GPT-4V | GPT-4o-20240513 | Claude-3-Opus | Claude-3.5-Sonnet | Gemini-1.5-Pro | LLaVA-OneVision-72B | Qwen2-VL-72B | InternVL2.5-78B |
57
  |----------------------------|-------------|-----------------|---------------|-------------------|----------------|---------------------|--------------|-----------------|
58
+ | MMMU<sub>val<sub> | 63.1 | 69.1 | - | 68.3 | 62.2 | 56.8 | 64.5 | 70.1 |
59
+ | MMMU<sub>test<sub> | - | - | - | - | - | - | - | 61.8 |
60
+ | MMMU-PRO<sub>overall<sub> | - | 51.9 | - | 51.5 | 46.9 | 31.0 | 46.2 | 48.6 |
61
+ | MathVista<sub>mini<sub> | 58.1 | 63.8 | - | 67.7 | 63.9 | 67.5 | 70.5 | 72.3 |
62
+ | MathVisionv<sub>mini<sub> | - | - | - | - | - | - | - | 34.9 |
63
+ | MathVision<sub>full<sub> | 24.0 | 30.4 | - | - | 19.2 | - | 25.9 | 32.2 |
64
+ | MathVerse<sub>mini<sub> | 32.8 | 50.2 | - | - | - | 39.1 | - | 51.7 |
65
  | Olympiad Bench | 18.0 | 25.9 | - | - | - | - | - | 11.6 |
66
+ | AI2D<sub>w / wo M<sub> | 78.2 / 89.4 | 84.6 / 94.2 | 70.6 / 88.1 | 81.2 / 94.7 | 79.1 / 94.4 | 85.6 / - | 88.1 / - | 89.1 / 95.7 |
67
+ | ChartQA<sub>test avg.<sub> | 78.5 | 85.7 | 80.8 | 90.8 | 87.2 | 83.7 | 88.3 | 88.3 |
68
+ | TextVQA<sub>val<sub> | 78.0 | 77.4 | 67.5 | 74.1 | 78.8 | 80.5 | 85.5 | 83.4 |
69
+ | DocVQA<sub>test<sub> | 88.4 | 92.8 | 89.3 | 95.2 | 93.1 | 91.3 | 96.5 | 95.1 |
70
+ | InfoVQA<sub>test<sub> | 75.1 | 79.2 | 55.6 | 74.3 | 81.0 | 74.9 | 84.5 | 84.1 |
71
  | OCR-Bench | 645 | 736 | 694 | 788 | 754 | 741 | 877 | 854 |
72
  | SEED-2 Plus | 53.8 | 72.0 | 44.2 | 71.7 | - | 69.7 | - | 71.3 |
73
+ | CharXiv<sub>RQ / DQ<sub> | 37.1 / 79.9 | 47.1 / 84.5 | 30.2 / 71.6 | 60.2 / 84.3 | 43.3 / 72.0 | - | 91.3 / 94.6 | 42.4 / 82.3 |
74
+ | VCR-EN-Easy<sub>EM / Jaccard<sub> | 52.0 / 65.4 | 91.6 / 96.4 | 62.0 / 77.7 | 63.9 / 74.7 | 62.7 / 77.7 | - | 94.6 | 95.7 / 94.5 |
75
+ | BLINK<sub>val<sub> | 54.6 | 68.0 | - | - | - | 55.4 | - | 63.8 |
76
  | Mantis Eval | 62.7 | - | - | - | - | 77.6 | - | 77.0 |
77
  | MMIU | - | 55.7 | - | 53.4 | 53.4 | - | - | 55.8 |
78
  | Muir Bench | 62.3 | 68.0 | - | - | - | 54.8 | - | 63.5 |
79
+ | MMT<sub>val<sub> | 64.3 | 65.4 | - | - | 64.5 | - | 71.8 | 70.8 |
80
+ | MIRB<sub>avg.<sub> | 53.1 | - | - | - | - | - | - | 61.1 |
81
  | RealWorld QA | 61.4 | 75.4 | - | 60.1 | 67.5 | 71.9 | 77.8 | 78.7 |
82
+ | MME-RW<sub>EN<sub> | - | 45.2 | - | 51.6 | 38.2 | - | - | 62.9 |
83
+ | WildVision<sub>win rate<sub> | 71.8 | 80.6 | - | - | - | - | - | 71.4 |
84
  | R-Bench | 65.6 | 77.7 | - | - | - | - | - | 77.2 |
85
+ | MME<sub>sum<sub> | 1926.6 | -- | 1586.8 | -- | -- | 2261.0 | 2482.7 | 2494.5 |
86
+ | MMB<sub>EN / CN<sub> | 81.0 / 80.2 | 83.4 / 82.1 | 63.3 / 59.2 | 82.6 / 83.5 | 73.9 / 73.8 | 85.8 / 85.3 | 86.5 / 86.6 | 88.3 / 88.5 |
87
+ | MMBv1.1<sub>EN<sub> | 80.0 | 83.1 | 60.1 | 80.9 | 74.6 | 85.0 | 85.9 | 87.4 |
88
+ | MMVet<sub>turbo<sub> | 67.5 | 69.1 | 51.7 | 70.1 | 64.0 | 60.6 | 74.0 | 72.3 |
89
+ | MMVetv2<sub>0613<sub> | 66.3 | 71.0 | 55.8 | 71.8 | 66.9 | -- | 66.9 | 65.5 |
90
  | MMStar | 56.0 | 64.7 | 45.7 | 65.1 | 59.1 | 65.8 | 68.3 | 69.5 |
91
+ | HallBench<sub>avg.<sub> | 46.5 | 55.0 | 37.8 | 55.5 | 45.6 | 49.0 | 58.1 | 57.4 |
92
+ | MMHal<sub>score<sub> | -- | 4.00 | -- | -- | -- | -- | -- | 3.89 |
93
+ | CRPE<sub>relation<sub> | -- | 76.6 | -- | -- | -- | -- | -- | 78.8 |
94
+ | POPE<sub>avg.<sub> | -- | 86.9 | -- | -- | -- | -- | -- | 90.8 |
95
 
96
 
97
  ### Video Benchmarks