Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	ui updates (#50)
Browse files- big ui update (5ba04356379684b78ac2e957803ea23d5be95114)
- app.py +10 -7
- scores/LamRA-Ret-Qwen2.5VL-7b.json +4 -1
- scores/LamRA-Ret.json +4 -1
- scores/VLM2Vec-V1-Qwen2VL-2B.json +4 -1
- scores/VLM2Vec-V1-Qwen2VL-7B.json +4 -1
- scores/VLM2Vec-V2.0-Qwen2VL-2B.json +4 -1
- scores/colpali-v1.3.json +4 -1
- scores/gme-Qwen2-VL-2B-Instruct.json +4 -1
- scores/gme-Qwen2-VL-7B-Instruct.json +4 -1
- utils.py +21 -23
- utils_v2.py +35 -24
    	
        app.py
    CHANGED
    
    | @@ -23,7 +23,7 @@ with gr.Blocks() as block: | |
| 23 | 
             
                gr.Markdown(LEADERBOARD_INTRODUCTION)
         | 
| 24 |  | 
| 25 | 
             
                with gr.Tabs(elem_classes="tab-buttons") as tabs:
         | 
| 26 | 
            -
                    # Table 1
         | 
| 27 | 
             
                    with gr.TabItem("π MMEB (V2)", elem_id="qa-tab-table1", id=1):
         | 
| 28 | 
             
                        with gr.Row():
         | 
| 29 | 
             
                            with gr.Accordion("Citation", open=False):
         | 
| @@ -92,10 +92,11 @@ with gr.Blocks() as block: | |
| 92 | 
             
                        )
         | 
| 93 | 
             
                        refresh_button2.click(fn=v2.refresh_data, outputs=data_component2)
         | 
| 94 |  | 
| 95 | 
            -
                    # table 2
         | 
| 96 | 
             
                    with gr.TabItem("πΌοΈ Image", elem_id="qa-tab-table1", id=2):
         | 
|  | |
| 97 | 
             
                        data_component3 = gr.components.Dataframe(
         | 
| 98 | 
            -
                            value=df2[v2.COLUMN_NAMES_I],
         | 
| 99 | 
             
                            headers=v2.COLUMN_NAMES_I,
         | 
| 100 | 
             
                            type="pandas",
         | 
| 101 | 
             
                            datatype=v2.DATA_TITLE_TYPE_I,
         | 
| @@ -104,10 +105,11 @@ with gr.Blocks() as block: | |
| 104 | 
             
                            max_height=2400, 
         | 
| 105 | 
             
                        )
         | 
| 106 |  | 
| 107 | 
            -
                    # table 3
         | 
| 108 | 
             
                    with gr.TabItem("π½ Video", elem_id="qa-tab-table1", id=3):
         | 
|  | |
| 109 | 
             
                        data_component4 = gr.components.Dataframe(
         | 
| 110 | 
            -
                            value=df2[v2.COLUMN_NAMES_V],
         | 
| 111 | 
             
                            headers=v2.COLUMN_NAMES_V,
         | 
| 112 | 
             
                            type="pandas",
         | 
| 113 | 
             
                            datatype=v2.DATA_TITLE_TYPE_V,
         | 
| @@ -116,10 +118,11 @@ with gr.Blocks() as block: | |
| 116 | 
             
                            max_height=2400, 
         | 
| 117 | 
             
                        )
         | 
| 118 |  | 
| 119 | 
            -
                    # table 4
         | 
| 120 | 
             
                    with gr.TabItem("π Visual Doc", elem_id="qa-tab-table1", id=4):
         | 
|  | |
| 121 | 
             
                        data_component5 = gr.components.Dataframe(
         | 
| 122 | 
            -
                            value=df2[v2.COLUMN_NAMES_D],
         | 
| 123 | 
             
                            headers=v2.COLUMN_NAMES_D,
         | 
| 124 | 
             
                            type="pandas",
         | 
| 125 | 
             
                            datatype=v2.DATA_TITLE_TYPE_D,
         | 
|  | |
| 23 | 
             
                gr.Markdown(LEADERBOARD_INTRODUCTION)
         | 
| 24 |  | 
| 25 | 
             
                with gr.Tabs(elem_classes="tab-buttons") as tabs:
         | 
| 26 | 
            +
                    # Table 1, the main leaderboard of overall scores
         | 
| 27 | 
             
                    with gr.TabItem("π MMEB (V2)", elem_id="qa-tab-table1", id=1):
         | 
| 28 | 
             
                        with gr.Row():
         | 
| 29 | 
             
                            with gr.Accordion("Citation", open=False):
         | 
|  | |
| 92 | 
             
                        )
         | 
| 93 | 
             
                        refresh_button2.click(fn=v2.refresh_data, outputs=data_component2)
         | 
| 94 |  | 
| 95 | 
            +
                    # table 2, image scores only
         | 
| 96 | 
             
                    with gr.TabItem("πΌοΈ Image", elem_id="qa-tab-table1", id=2):
         | 
| 97 | 
            +
                        gr.Markdown(v2.TABLE_INTRODUCTION_I)
         | 
| 98 | 
             
                        data_component3 = gr.components.Dataframe(
         | 
| 99 | 
            +
                            value=v2.rank_models(df2[v2.COLUMN_NAMES_I], 'Image-Overall'),
         | 
| 100 | 
             
                            headers=v2.COLUMN_NAMES_I,
         | 
| 101 | 
             
                            type="pandas",
         | 
| 102 | 
             
                            datatype=v2.DATA_TITLE_TYPE_I,
         | 
|  | |
| 105 | 
             
                            max_height=2400, 
         | 
| 106 | 
             
                        )
         | 
| 107 |  | 
| 108 | 
            +
                    # table 3, video scores only
         | 
| 109 | 
             
                    with gr.TabItem("π½ Video", elem_id="qa-tab-table1", id=3):
         | 
| 110 | 
            +
                        gr.Markdown(v2.TABLE_INTRODUCTION_V)
         | 
| 111 | 
             
                        data_component4 = gr.components.Dataframe(
         | 
| 112 | 
            +
                            value=v2.rank_models(df2[v2.COLUMN_NAMES_V], 'Video-Overall'),
         | 
| 113 | 
             
                            headers=v2.COLUMN_NAMES_V,
         | 
| 114 | 
             
                            type="pandas",
         | 
| 115 | 
             
                            datatype=v2.DATA_TITLE_TYPE_V,
         | 
|  | |
| 118 | 
             
                            max_height=2400, 
         | 
| 119 | 
             
                        )
         | 
| 120 |  | 
| 121 | 
            +
                    # table 4, visual document scores only
         | 
| 122 | 
             
                    with gr.TabItem("π Visual Doc", elem_id="qa-tab-table1", id=4):
         | 
| 123 | 
            +
                        gr.Markdown(v2.TABLE_INTRODUCTION_D)
         | 
| 124 | 
             
                        data_component5 = gr.components.Dataframe(
         | 
| 125 | 
            +
                            value=v2.rank_models(df2[v2.COLUMN_NAMES_D], 'VisDoc'),
         | 
| 126 | 
             
                            headers=v2.COLUMN_NAMES_D,
         | 
| 127 | 
             
                            type="pandas",
         | 
| 128 | 
             
                            datatype=v2.DATA_TITLE_TYPE_D,
         | 
    	
        scores/LamRA-Ret-Qwen2.5VL-7b.json
    CHANGED
    
    | @@ -1,7 +1,10 @@ | |
| 1 | 
             
            {
         | 
| 2 | 
             
                "metadata": {
         | 
| 3 | 
             
                    "model_name": "LamRA-Ret-Qwen2.5VL-7b",
         | 
| 4 | 
            -
                    "report_generated_date": "2025-06-09T07:00:24.383583"
         | 
|  | |
|  | |
|  | |
| 5 | 
             
                },
         | 
| 6 | 
             
                "metrics": {
         | 
| 7 | 
             
                    "image": {
         | 
|  | |
| 1 | 
             
            {
         | 
| 2 | 
             
                "metadata": {
         | 
| 3 | 
             
                    "model_name": "LamRA-Ret-Qwen2.5VL-7b",
         | 
| 4 | 
            +
                    "report_generated_date": "2025-06-09T07:00:24.383583", 
         | 
| 5 | 
            +
                    "model_size": 8.29, 
         | 
| 6 | 
            +
                    "url": "https://huggingface.co/code-kunkun/LamRA-Ret-Qwen2.5VL-7b", 
         | 
| 7 | 
            +
                    "data_source": "TIGER-Lab"
         | 
| 8 | 
             
                },
         | 
| 9 | 
             
                "metrics": {
         | 
| 10 | 
             
                    "image": {
         | 
    	
        scores/LamRA-Ret.json
    CHANGED
    
    | @@ -1,7 +1,10 @@ | |
| 1 | 
             
            {
         | 
| 2 | 
             
                "metadata": {
         | 
| 3 | 
             
                    "model_name": "LamRA-Ret",
         | 
| 4 | 
            -
                    "report_generated_date": "2025-06-09T07:03:51.413144"
         | 
|  | |
|  | |
|  | |
| 5 | 
             
                },
         | 
| 6 | 
             
                "metrics": {
         | 
| 7 | 
             
                    "image": {
         | 
|  | |
| 1 | 
             
            {
         | 
| 2 | 
             
                "metadata": {
         | 
| 3 | 
             
                    "model_name": "LamRA-Ret",
         | 
| 4 | 
            +
                    "report_generated_date": "2025-06-09T07:03:51.413144", 
         | 
| 5 | 
            +
                    "model_size": 8.29, 
         | 
| 6 | 
            +
                    "url": "https://huggingface.co/code-kunkun/LamRA-Ret", 
         | 
| 7 | 
            +
                    "data_source": "TIGER-Lab"
         | 
| 8 | 
             
                },
         | 
| 9 | 
             
                "metrics": {
         | 
| 10 | 
             
                    "image": {
         | 
    	
        scores/VLM2Vec-V1-Qwen2VL-2B.json
    CHANGED
    
    | @@ -1,7 +1,10 @@ | |
| 1 | 
             
            {
         | 
| 2 | 
             
                "metadata": {
         | 
| 3 | 
             
                    "model_name": "VLM2Vec-V1-Qwen2VL-2B",
         | 
| 4 | 
            -
                    "report_generated_date": "2025-06-09T07:08:50.537181"
         | 
|  | |
|  | |
|  | |
| 5 | 
             
                },
         | 
| 6 | 
             
                "metrics": {
         | 
| 7 | 
             
                    "image": {
         | 
|  | |
| 1 | 
             
            {
         | 
| 2 | 
             
                "metadata": {
         | 
| 3 | 
             
                    "model_name": "VLM2Vec-V1-Qwen2VL-2B",
         | 
| 4 | 
            +
                    "report_generated_date": "2025-06-09T07:08:50.537181", 
         | 
| 5 | 
            +
                    "model_size": 2.21, 
         | 
| 6 | 
            +
                    "url": "https://huggingface.co/TIGER-Lab/VLM2Vec-Qwen2VL-2B", 
         | 
| 7 | 
            +
                    "data_source": "TIGER-Lab"
         | 
| 8 | 
             
                },
         | 
| 9 | 
             
                "metrics": {
         | 
| 10 | 
             
                    "image": {
         | 
    	
        scores/VLM2Vec-V1-Qwen2VL-7B.json
    CHANGED
    
    | @@ -1,7 +1,10 @@ | |
| 1 | 
             
            {
         | 
| 2 | 
             
                "metadata": {
         | 
| 3 | 
             
                    "model_name": "VLM2Vec-V1-Qwen2VL-7B",
         | 
| 4 | 
            -
                    "report_generated_date": "2025-06-08T08:08:07.905654"
         | 
|  | |
|  | |
|  | |
| 5 | 
             
                },
         | 
| 6 | 
             
                "metrics": {
         | 
| 7 | 
             
                    "image": {
         | 
|  | |
| 1 | 
             
            {
         | 
| 2 | 
             
                "metadata": {
         | 
| 3 | 
             
                    "model_name": "VLM2Vec-V1-Qwen2VL-7B",
         | 
| 4 | 
            +
                    "report_generated_date": "2025-06-08T08:08:07.905654", 
         | 
| 5 | 
            +
                    "model_size": 8.29, 
         | 
| 6 | 
            +
                    "url": "https://huggingface.co/TIGER-Lab/VLM2Vec-Qwen2VL-7B", 
         | 
| 7 | 
            +
                    "data_source": "TIGER-Lab"
         | 
| 8 | 
             
                },
         | 
| 9 | 
             
                "metrics": {
         | 
| 10 | 
             
                    "image": {
         | 
    	
        scores/VLM2Vec-V2.0-Qwen2VL-2B.json
    CHANGED
    
    | @@ -1,7 +1,10 @@ | |
| 1 | 
             
            {
         | 
| 2 | 
             
                "metadata": {
         | 
| 3 | 
             
                    "model_name": "VLM2Vec-V2.0-Qwen2VL-2B",
         | 
| 4 | 
            -
                    "report_generated_date": "2025-06-09T07:05:59.773788"
         | 
|  | |
|  | |
|  | |
| 5 | 
             
                },
         | 
| 6 | 
             
                "metrics": {
         | 
| 7 | 
             
                    "image": {
         | 
|  | |
| 1 | 
             
            {
         | 
| 2 | 
             
                "metadata": {
         | 
| 3 | 
             
                    "model_name": "VLM2Vec-V2.0-Qwen2VL-2B",
         | 
| 4 | 
            +
                    "report_generated_date": "2025-06-09T07:05:59.773788", 
         | 
| 5 | 
            +
                    "model_size": 2.21, 
         | 
| 6 | 
            +
                    "url": "https://huggingface.co/VLM2Vec/VLM2Vec-V2.0", 
         | 
| 7 | 
            +
                    "data_source": "TIGER-Lab"
         | 
| 8 | 
             
                },
         | 
| 9 | 
             
                "metrics": {
         | 
| 10 | 
             
                    "image": {
         | 
    	
        scores/colpali-v1.3.json
    CHANGED
    
    | @@ -1,7 +1,10 @@ | |
| 1 | 
             
            {
         | 
| 2 | 
             
                "metadata": {
         | 
| 3 | 
             
                    "model_name": "colpali-v1.3",
         | 
| 4 | 
            -
                    "report_generated_date": "2025-06-09T07:08:13.841120"
         | 
|  | |
|  | |
|  | |
| 5 | 
             
                },
         | 
| 6 | 
             
                "metrics": {
         | 
| 7 | 
             
                    "image": {
         | 
|  | |
| 1 | 
             
            {
         | 
| 2 | 
             
                "metadata": {
         | 
| 3 | 
             
                    "model_name": "colpali-v1.3",
         | 
| 4 | 
            +
                    "report_generated_date": "2025-06-09T07:08:13.841120", 
         | 
| 5 | 
            +
                    "model_size": 2.92, 
         | 
| 6 | 
            +
                    "url": "https://huggingface.co/vidore/colpali-v1.3", 
         | 
| 7 | 
            +
                    "data_source": "TIGER-Lab"
         | 
| 8 | 
             
                },
         | 
| 9 | 
             
                "metrics": {
         | 
| 10 | 
             
                    "image": {
         | 
    	
        scores/gme-Qwen2-VL-2B-Instruct.json
    CHANGED
    
    | @@ -1,7 +1,10 @@ | |
| 1 | 
             
            {
         | 
| 2 | 
             
                "metadata": {
         | 
| 3 | 
             
                    "model_name": "gme-Qwen2-VL-2B-Instruct",
         | 
| 4 | 
            -
                    "report_generated_date": "2025-06-09T07:04:30.518891"
         | 
|  | |
|  | |
|  | |
| 5 | 
             
                },
         | 
| 6 | 
             
                "metrics": {
         | 
| 7 | 
             
                    "image": {
         | 
|  | |
| 1 | 
             
            {
         | 
| 2 | 
             
                "metadata": {
         | 
| 3 | 
             
                    "model_name": "gme-Qwen2-VL-2B-Instruct",
         | 
| 4 | 
            +
                    "report_generated_date": "2025-06-09T07:04:30.518891", 
         | 
| 5 | 
            +
                    "model_size": 2.21, 
         | 
| 6 | 
            +
                    "url": "https://huggingface.co/Alibaba-NLP/gme-Qwen2-VL-2B-Instruct", 
         | 
| 7 | 
            +
                    "data_source": "TIGER-Lab"
         | 
| 8 | 
             
                },
         | 
| 9 | 
             
                "metrics": {
         | 
| 10 | 
             
                    "image": {
         | 
    	
        scores/gme-Qwen2-VL-7B-Instruct.json
    CHANGED
    
    | @@ -1,7 +1,10 @@ | |
| 1 | 
             
            {
         | 
| 2 | 
             
                "metadata": {
         | 
| 3 | 
             
                    "model_name": "gme-Qwen2-VL-7B-Instruct",
         | 
| 4 | 
            -
                    "report_generated_date": "2025-06-09T07:05:25.508931"
         | 
|  | |
|  | |
|  | |
| 5 | 
             
                },
         | 
| 6 | 
             
                "metrics": {
         | 
| 7 | 
             
                    "image": {
         | 
|  | |
| 1 | 
             
            {
         | 
| 2 | 
             
                "metadata": {
         | 
| 3 | 
             
                    "model_name": "gme-Qwen2-VL-7B-Instruct",
         | 
| 4 | 
            +
                    "report_generated_date": "2025-06-09T07:05:25.508931", 
         | 
| 5 | 
            +
                    "model_size": 8.29, 
         | 
| 6 | 
            +
                    "url": "https://huggingface.co/Alibaba-NLP/gme-Qwen2-VL-7B-Instruct", 
         | 
| 7 | 
            +
                    "data_source": "TIGER-Lab"
         | 
| 8 | 
             
                },
         | 
| 9 | 
             
                "metrics": {
         | 
| 10 | 
             
                    "image": {
         | 
    	
        utils.py
    CHANGED
    
    | @@ -57,26 +57,9 @@ SUBMIT_INTRODUCTION = """# Submit on MMEB Leaderboard Introduction | |
| 57 |  | 
| 58 | 
             
            ## β  Please note that you need to submit the JSON file with the following format:
         | 
| 59 |  | 
| 60 | 
            -
            ###  | 
| 61 | 
            -
             | 
| 62 | 
            -
             | 
| 63 | 
            -
                {
         | 
| 64 | 
            -
                    "Model": "<Model Name>",
         | 
| 65 | 
            -
                    "URL": "<Model URL>" or null,
         | 
| 66 | 
            -
                    "Model Size(B)": 1000 or null,
         | 
| 67 | 
            -
                    "Data Source": "Self-Reported",
         | 
| 68 | 
            -
                    "V1-Overall": 50.0,
         | 
| 69 | 
            -
                    "I-CLS": 50.0,
         | 
| 70 | 
            -
                    "I-QA": 50.0,
         | 
| 71 | 
            -
                    "I-RET": 50.0,
         | 
| 72 | 
            -
                    "I-VG": 50.0
         | 
| 73 | 
            -
                }, 
         | 
| 74 | 
            -
            ]
         | 
| 75 | 
            -
            ```
         | 
| 76 | 
            -
             | 
| 77 | 
            -
            ### ***Important Notes: We will be releasing MMEB-V2 soon!***
         | 
| 78 | 
            -
            ### ***In V2, the detailed scores of each dataset will be included, and our code will automatically generate the results and calculate the overall scores.***
         | 
| 79 | 
            -
            ### **A V2 Submission would look like this: (TO BE RELEASED SOON)**
         | 
| 80 | 
             
            ```json
         | 
| 81 | 
             
            {
         | 
| 82 | 
             
                "metadata": {
         | 
| @@ -84,8 +67,6 @@ SUBMIT_INTRODUCTION = """# Submit on MMEB Leaderboard Introduction | |
| 84 | 
             
                    "URL": "<Model URL>" or null,
         | 
| 85 | 
             
                    "Model Size(B)": 1000 or null,
         | 
| 86 | 
             
                    "Data Source": "Self-Reported",
         | 
| 87 | 
            -
                    "V1-Overall": 50.0,
         | 
| 88 | 
            -
                    "V2-Overall": 50.0
         | 
| 89 | 
             
                },
         | 
| 90 | 
             
                "metrics": {
         | 
| 91 | 
             
                    "image": {
         | 
| @@ -121,7 +102,24 @@ SUBMIT_INTRODUCTION = """# Submit on MMEB Leaderboard Introduction | |
| 121 | 
             
                }
         | 
| 122 | 
             
            }
         | 
| 123 | 
             
            ```
         | 
| 124 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 125 | 
             
            Please send us an email at [email protected], attaching the JSON file. We will review your submission and update the leaderboard accordingly. \n
         | 
| 126 | 
             
            Please also share any feedback or suggestions you have for improving the leaderboard experience. We appreciate your contributions to the MMEB community!
         | 
| 127 | 
             
            """
         | 
|  | |
| 57 |  | 
| 58 | 
             
            ## β  Please note that you need to submit the JSON file with the following format:
         | 
| 59 |  | 
| 60 | 
            +
            ### ***Important Notes: We have released MMEB-V2 and will deprecate MMEB-V1 soon. All further submissions should be made using the V2 format (see following).***
         | 
| 61 | 
            +
            ### ***In V2, the detailed scores of each dataset will be included, and our code will automatically generate the results and calculate the overall scores. See the [**GitHub page**](https://github.com/TIGER-AI-Lab/VLM2Vec) for more information.***
         | 
| 62 | 
            +
            ### **A V2 Submission would look like this:**
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 63 | 
             
            ```json
         | 
| 64 | 
             
            {
         | 
| 65 | 
             
                "metadata": {
         | 
|  | |
| 67 | 
             
                    "URL": "<Model URL>" or null,
         | 
| 68 | 
             
                    "Model Size(B)": 1000 or null,
         | 
| 69 | 
             
                    "Data Source": "Self-Reported",
         | 
|  | |
|  | |
| 70 | 
             
                },
         | 
| 71 | 
             
                "metrics": {
         | 
| 72 | 
             
                    "image": {
         | 
|  | |
| 102 | 
             
                }
         | 
| 103 | 
             
            }
         | 
| 104 | 
             
            ```
         | 
| 105 | 
            +
             | 
| 106 | 
            +
            ### **TO SUBMIT V1 ONLY (Depreciated, but we still accept this format until 2025-06-30)**
         | 
| 107 | 
            +
            ```json
         | 
| 108 | 
            +
            [
         | 
| 109 | 
            +
                {
         | 
| 110 | 
            +
                    "Model": "<Model Name>",
         | 
| 111 | 
            +
                    "URL": "<Model URL>" or null,
         | 
| 112 | 
            +
                    "Model Size(B)": 1000 or null,
         | 
| 113 | 
            +
                    "Data Source": "Self-Reported",
         | 
| 114 | 
            +
                    "V1-Overall": 50.0,
         | 
| 115 | 
            +
                    "I-CLS": 50.0,
         | 
| 116 | 
            +
                    "I-QA": 50.0,
         | 
| 117 | 
            +
                    "I-RET": 50.0,
         | 
| 118 | 
            +
                    "I-VG": 50.0
         | 
| 119 | 
            +
                }, 
         | 
| 120 | 
            +
            ]
         | 
| 121 | 
            +
            ```
         | 
| 122 | 
            +
            You may refer to the [**GitHub page**](https://github.com/TIGER-AI-Lab/VLM2Vec) for detailed instructions about evaluating your model. \n
         | 
| 123 | 
             
            Please send us an email at [email protected], attaching the JSON file. We will review your submission and update the leaderboard accordingly. \n
         | 
| 124 | 
             
            Please also share any feedback or suggestions you have for improving the leaderboard experience. We appreciate your contributions to the MMEB community!
         | 
| 125 | 
             
            """
         | 
    	
        utils_v2.py
    CHANGED
    
    | @@ -1,14 +1,13 @@ | |
| 1 | 
             
            import json
         | 
| 2 | 
             
            import os
         | 
| 3 | 
             
            import pandas as pd
         | 
| 4 | 
            -
            from utils import create_hyperlinked_names
         | 
| 5 | 
            -
             | 
| 6 | 
            -
            def  | 
| 7 | 
            -
                assert isinstance( | 
| 8 | 
            -
                total =  | 
| 9 | 
            -
                for  | 
| 10 | 
            -
                     | 
| 11 | 
            -
                    total += item
         | 
| 12 | 
             
                return total
         | 
| 13 |  | 
| 14 | 
             
            SCORE_BASE_DIR = "scores"
         | 
| @@ -21,7 +20,7 @@ DATASETS = { | |
| 21 | 
             
                    "I-VG": ['MSCOCO', 'RefCOCO', 'RefCOCO-Matching', 'Visual7W']
         | 
| 22 | 
             
                    }, 
         | 
| 23 | 
             
                "visdoc": {
         | 
| 24 | 
            -
                    "VisDoc": ['ViDoRe_arxivqa', 'ViDoRe_docvqa', 'ViDoRe_infovqa', 'ViDoRe_tabfquad', 'ViDoRe_tatdqa', 'ViDoRe_shiftproject', 'ViDoRe_syntheticDocQA_artificial_intelligence', 'ViDoRe_syntheticDocQA_energy', 'ViDoRe_syntheticDocQA_government_reports', 'ViDoRe_syntheticDocQA_healthcare_industry', 'VisRAG_ArxivQA', 'VisRAG_ChartQA', 'VisRAG_MP-DocVQA', 'VisRAG_SlideVQA', 'VisRAG_InfoVQA', 'VisRAG_PlotQA', 'ViDoSeek-page', 'ViDoSeek-doc', 'MMLongBench-page', 'MMLongBench-doc']
         | 
| 25 | 
             
                    }, 
         | 
| 26 | 
             
                "video": {
         | 
| 27 | 
             
                    "V-CLS": ['K700', 'UCF101', 'HMDB51', 'SmthSmthV2', 'Breakfast'], 
         | 
| @@ -30,8 +29,8 @@ DATASETS = { | |
| 30 | 
             
                    "V-MRET": ['QVHighlight', 'Charades-STA', 'MomentSeeker', 'ActivityNetQA']
         | 
| 31 | 
             
                    }
         | 
| 32 | 
             
            }
         | 
| 33 | 
            -
            ALL_DATASETS_SPLITS = {k:  | 
| 34 | 
            -
            ALL_DATASETS =  | 
| 35 | 
             
            MODALITIES = list(DATASETS.keys())
         | 
| 36 | 
             
            SPECIAL_METRICS = {
         | 
| 37 | 
             
                '__default__': 'hit@1',
         | 
| @@ -45,24 +44,29 @@ COLUMN_NAMES = BASE_COLS + ["Overall", 'Image-Overall', 'Video-Overall', 'VisDoc | |
| 45 | 
             
            DATA_TITLE_TYPE = BASE_DATA_TITLE_TYPE + \
         | 
| 46 | 
             
                                ['number'] * 3
         | 
| 47 |  | 
| 48 | 
            -
            TASKS_I = ['Image-Overall'] + ALL_DATASETS_SPLITS['image']
         | 
| 49 | 
             
            COLUMN_NAMES_I = BASE_COLS + TASKS_I
         | 
| 50 | 
             
            DATA_TITLE_TYPE_I = BASE_DATA_TITLE_TYPE + \
         | 
| 51 | 
            -
                                ['number'] * len(TASKS_I)
         | 
| 52 |  | 
| 53 | 
            -
            TASKS_V = ['Video-Overall'] + ALL_DATASETS_SPLITS['video']
         | 
| 54 | 
             
            COLUMN_NAMES_V = BASE_COLS + TASKS_V
         | 
| 55 | 
             
            DATA_TITLE_TYPE_V = BASE_DATA_TITLE_TYPE + \
         | 
| 56 | 
            -
                                ['number'] * len(TASKS_V)
         | 
| 57 |  | 
| 58 | 
             
            TASKS_D = ['VisDoc'] + ALL_DATASETS_SPLITS['visdoc']
         | 
| 59 | 
             
            COLUMN_NAMES_D = BASE_COLS + TASKS_D
         | 
| 60 | 
             
            DATA_TITLE_TYPE_D = BASE_DATA_TITLE_TYPE + \
         | 
| 61 | 
             
                                ['number'] * len(TASKS_D)
         | 
| 62 |  | 
| 63 | 
            -
            TABLE_INTRODUCTION = """** | 
| 64 | 
            -
             | 
| 65 | 
            -
            ** | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 66 |  | 
| 67 | 
             
            LEADERBOARD_INFO = """
         | 
| 68 | 
             
            ## Dataset Summary
         | 
| @@ -112,16 +116,16 @@ def calculate_score(raw_scores=None): | |
| 112 | 
             
                avg_scores = {}
         | 
| 113 |  | 
| 114 | 
             
                # Calculate overall score for all datasets
         | 
| 115 | 
            -
                avg_scores['Overall'] =  | 
| 116 |  | 
| 117 | 
             
                # Calculate scores for each modality
         | 
| 118 | 
             
                for modality in MODALITIES:
         | 
| 119 | 
            -
                    datasets_for_each_modality = ALL_DATASETS_SPLITS | 
| 120 | 
             
                    avg_scores[f"{modality.capitalize()}-Overall"] = get_avg(
         | 
| 121 | 
             
                        sum(all_scores.get(dataset, 0.0) for dataset in datasets_for_each_modality),
         | 
| 122 | 
             
                        len(datasets_for_each_modality)
         | 
| 123 | 
             
                    )
         | 
| 124 | 
            -
             | 
| 125 | 
             
                # Calculate scores for each sub-task
         | 
| 126 | 
             
                for modality, datasets_list in DATASETS.items():
         | 
| 127 | 
             
                    for sub_task, datasets in datasets_list.items():
         | 
| @@ -136,20 +140,27 @@ def generate_model_row(data): | |
| 136 | 
             
                row = {
         | 
| 137 | 
             
                    'Models': metadata.get('model_name', None), 
         | 
| 138 | 
             
                    'Model Size(B)': metadata.get('model_size', None),
         | 
| 139 | 
            -
                    'URL': metadata.get('url', None)
         | 
|  | |
| 140 | 
             
                }
         | 
| 141 | 
             
                scores = calculate_score(data['metrics'])
         | 
| 142 | 
             
                row.update(scores)
         | 
| 143 | 
             
                return row
         | 
| 144 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 145 | 
             
            def get_df():
         | 
| 146 | 
             
                """Generates a DataFrame from the loaded data."""
         | 
| 147 | 
             
                all_data = load_data()
         | 
| 148 | 
             
                rows = [generate_model_row(data) for data in all_data]
         | 
| 149 | 
             
                df = pd.DataFrame(rows)
         | 
| 150 | 
            -
                df = df | 
| 151 | 
            -
                df['Rank'] = range(1, len(df) + 1)
         | 
| 152 | 
             
                df = create_hyperlinked_names(df)
         | 
|  | |
| 153 | 
             
                return df
         | 
| 154 |  | 
| 155 | 
             
            def refresh_data():
         | 
|  | |
| 1 | 
             
            import json
         | 
| 2 | 
             
            import os
         | 
| 3 | 
             
            import pandas as pd
         | 
| 4 | 
            +
            from utils import create_hyperlinked_names, process_model_size
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            def sum_lol(lol):
         | 
| 7 | 
            +
                assert isinstance(lol, list) and all(isinstance(i, list) for i in lol), f"Input should be a list of lists, got {type(lol)}"
         | 
| 8 | 
            +
                total = []
         | 
| 9 | 
            +
                for sublist in lol:
         | 
| 10 | 
            +
                    total.extend(sublist)
         | 
|  | |
| 11 | 
             
                return total
         | 
| 12 |  | 
| 13 | 
             
            SCORE_BASE_DIR = "scores"
         | 
|  | |
| 20 | 
             
                    "I-VG": ['MSCOCO', 'RefCOCO', 'RefCOCO-Matching', 'Visual7W']
         | 
| 21 | 
             
                    }, 
         | 
| 22 | 
             
                "visdoc": {
         | 
| 23 | 
            +
                    "VisDoc": ['ViDoRe_arxivqa', 'ViDoRe_docvqa', 'ViDoRe_infovqa', 'ViDoRe_tabfquad', 'ViDoRe_tatdqa', 'ViDoRe_shiftproject', 'ViDoRe_syntheticDocQA_artificial_intelligence', 'ViDoRe_syntheticDocQA_energy', 'ViDoRe_syntheticDocQA_government_reports', 'ViDoRe_syntheticDocQA_healthcare_industry', 'VisRAG_ArxivQA', 'VisRAG_ChartQA', 'VisRAG_MP-DocVQA', 'VisRAG_SlideVQA', 'VisRAG_InfoVQA', 'VisRAG_PlotQA', 'ViDoSeek-page', 'ViDoSeek-doc', 'MMLongBench-page', 'MMLongBench-doc', "ViDoRe_esg_reports_human_labeled_v2", "ViDoRe_biomedical_lectures_v2", "ViDoRe_biomedical_lectures_v2_multilingual", "ViDoRe_economics_reports_v2", "ViDoRe_economics_reports_v2_multilingual", "ViDoRe_esg_reports_v2", "ViDoRe_esg_reports_v2_multilingual"]
         | 
| 24 | 
             
                    }, 
         | 
| 25 | 
             
                "video": {
         | 
| 26 | 
             
                    "V-CLS": ['K700', 'UCF101', 'HMDB51', 'SmthSmthV2', 'Breakfast'], 
         | 
|  | |
| 29 | 
             
                    "V-MRET": ['QVHighlight', 'Charades-STA', 'MomentSeeker', 'ActivityNetQA']
         | 
| 30 | 
             
                    }
         | 
| 31 | 
             
            }
         | 
| 32 | 
            +
            ALL_DATASETS_SPLITS = {k: sum_lol(list(v.values())) for k, v in DATASETS.items()}
         | 
| 33 | 
            +
            ALL_DATASETS = sum_lol(list(ALL_DATASETS_SPLITS.values()))
         | 
| 34 | 
             
            MODALITIES = list(DATASETS.keys())
         | 
| 35 | 
             
            SPECIAL_METRICS = {
         | 
| 36 | 
             
                '__default__': 'hit@1',
         | 
|  | |
| 44 | 
             
            DATA_TITLE_TYPE = BASE_DATA_TITLE_TYPE + \
         | 
| 45 | 
             
                                ['number'] * 3
         | 
| 46 |  | 
| 47 | 
            +
            TASKS_I = ['Image-Overall'] + TASKS[1:5] + ALL_DATASETS_SPLITS['image']
         | 
| 48 | 
             
            COLUMN_NAMES_I = BASE_COLS + TASKS_I
         | 
| 49 | 
             
            DATA_TITLE_TYPE_I = BASE_DATA_TITLE_TYPE + \
         | 
| 50 | 
            +
                                ['number'] * (len(TASKS_I) + 4)
         | 
| 51 |  | 
| 52 | 
            +
            TASKS_V = ['Video-Overall'] + TASKS[6:10] + ALL_DATASETS_SPLITS['video']
         | 
| 53 | 
             
            COLUMN_NAMES_V = BASE_COLS + TASKS_V
         | 
| 54 | 
             
            DATA_TITLE_TYPE_V = BASE_DATA_TITLE_TYPE + \
         | 
| 55 | 
            +
                                ['number'] * (len(TASKS_V) + 4)
         | 
| 56 |  | 
| 57 | 
             
            TASKS_D = ['VisDoc'] + ALL_DATASETS_SPLITS['visdoc']
         | 
| 58 | 
             
            COLUMN_NAMES_D = BASE_COLS + TASKS_D
         | 
| 59 | 
             
            DATA_TITLE_TYPE_D = BASE_DATA_TITLE_TYPE + \
         | 
| 60 | 
             
                                ['number'] * len(TASKS_D)
         | 
| 61 |  | 
| 62 | 
            +
            TABLE_INTRODUCTION = """**MMEB**: Massive MultiModal Embedding Benchmark \n
         | 
| 63 | 
            +
                                    Models are ranked based on **Overall**"""
         | 
| 64 | 
            +
            TABLE_INTRODUCTION_I = """**I-CLS**: Image Classification, **I-QA**: (Image) Visual Question Answering, **I-RET**: Image Retrieval, **I-VG**: (Image) Visual Grounding \n
         | 
| 65 | 
            +
                                    Models are ranked based on **Image-Overall**"""
         | 
| 66 | 
            +
            TABLE_INTRODUCTION_V = """**V-CLS**: Video Classification, **V-QA**: (Video) Visual Question Answering, **V-RET**: Video Retrieval, **V-MRET**: Video Moment Retrieval \n
         | 
| 67 | 
            +
                                    Models are ranked based on **Video-Overall**"""
         | 
| 68 | 
            +
            TABLE_INTRODUCTION_D = """**VisDoc**: Visual Document Understanding \n
         | 
| 69 | 
            +
                                    Models are ranked based on **VisDoc**"""
         | 
| 70 |  | 
| 71 | 
             
            LEADERBOARD_INFO = """
         | 
| 72 | 
             
            ## Dataset Summary
         | 
|  | |
| 116 | 
             
                avg_scores = {}
         | 
| 117 |  | 
| 118 | 
             
                # Calculate overall score for all datasets
         | 
| 119 | 
            +
                avg_scores['Overall'] = get_avg(sum(all_scores.values()), len(ALL_DATASETS))
         | 
| 120 |  | 
| 121 | 
             
                # Calculate scores for each modality
         | 
| 122 | 
             
                for modality in MODALITIES:
         | 
| 123 | 
            +
                    datasets_for_each_modality = ALL_DATASETS_SPLITS[modality]
         | 
| 124 | 
             
                    avg_scores[f"{modality.capitalize()}-Overall"] = get_avg(
         | 
| 125 | 
             
                        sum(all_scores.get(dataset, 0.0) for dataset in datasets_for_each_modality),
         | 
| 126 | 
             
                        len(datasets_for_each_modality)
         | 
| 127 | 
             
                    )
         | 
| 128 | 
            +
                
         | 
| 129 | 
             
                # Calculate scores for each sub-task
         | 
| 130 | 
             
                for modality, datasets_list in DATASETS.items():
         | 
| 131 | 
             
                    for sub_task, datasets in datasets_list.items():
         | 
|  | |
| 140 | 
             
                row = {
         | 
| 141 | 
             
                    'Models': metadata.get('model_name', None), 
         | 
| 142 | 
             
                    'Model Size(B)': metadata.get('model_size', None),
         | 
| 143 | 
            +
                    'URL': metadata.get('url', None), 
         | 
| 144 | 
            +
                    'Data Source': metadata.get('data_source', 'Self-Reported'),
         | 
| 145 | 
             
                }
         | 
| 146 | 
             
                scores = calculate_score(data['metrics'])
         | 
| 147 | 
             
                row.update(scores)
         | 
| 148 | 
             
                return row
         | 
| 149 |  | 
| 150 | 
            +
            def rank_models(df, column='Overall'):
         | 
| 151 | 
            +
                """Ranks the models based on the specific score."""
         | 
| 152 | 
            +
                df = df.sort_values(by=column, ascending=False).reset_index(drop=True)
         | 
| 153 | 
            +
                df['Rank'] = range(1, len(df) + 1)
         | 
| 154 | 
            +
                return df
         | 
| 155 | 
            +
             | 
| 156 | 
             
            def get_df():
         | 
| 157 | 
             
                """Generates a DataFrame from the loaded data."""
         | 
| 158 | 
             
                all_data = load_data()
         | 
| 159 | 
             
                rows = [generate_model_row(data) for data in all_data]
         | 
| 160 | 
             
                df = pd.DataFrame(rows)
         | 
| 161 | 
            +
                df['Model Size(B)'] = df['Model Size(B)'].apply(process_model_size)
         | 
|  | |
| 162 | 
             
                df = create_hyperlinked_names(df)
         | 
| 163 | 
            +
                df = rank_models(df, column='Overall')
         | 
| 164 | 
             
                return df
         | 
| 165 |  | 
| 166 | 
             
            def refresh_data():
         | 

