Leaderboard-Deepseek-Gemini-Grok-GPT-Qwen

Running

App Files Files Community

awacke1 commited on Feb 2

Commit

e5e9425

verified ·

1 Parent(s): 2cbf3fb

Create app.py

Browse files

Files changed (1) hide show

app.py +153 -0

app.py ADDED Viewed

	@@ -0,0 +1,153 @@

+import gradio as gr
+from utils import MEGABenchEvalDataLoader
+import os
+from constants import *
+# Get the directory of the current script
+current_dir = os.path.dirname(os.path.abspath(__file__))
+# Construct paths to CSS files
+base_css_file = os.path.join(current_dir, "static", "css", "style.css")
+table_css_file = os.path.join(current_dir, "static", "css", "table.css")
+# Read CSS files
+with open(base_css_file, "r") as f:
+    base_css = f.read()
+with open(table_css_file, "r") as f:
+    table_css = f.read()
+# Initialize data loaders
+default_loader = MEGABenchEvalDataLoader("./static/eval_results/Default")
+si_loader = MEGABenchEvalDataLoader("./static/eval_results/SI")
+with gr.Blocks() as block:
+    # Add a style element that we'll update
+    css_style = gr.HTML(
+        f"<style>{base_css}\n{table_css}</style>",
+        visible=False
+    )
+    with gr.Tabs(elem_classes="tab-buttons") as tabs:
+        with gr.TabItem("📚 Introduction", elem_id="intro-tab", id=0):
+            gr.Markdown(
+                LEADERBOARD_INTRODUCTION
+            )
+        with gr.TabItem("📊 MEGA-Bench", elem_id="qa-tab-table1", id=1):
+            with gr.Row():
+                with gr.Accordion("Citation", open=False):
+                    citation_button = gr.Textbox(
+                        value=CITATION_BUTTON_TEXT,
+                        label=CITATION_BUTTON_LABEL,
+                        elem_id="citation-button",
+                        lines=10,
+                    )
+            gr.Markdown(
+                TABLE_INTRODUCTION
+            )
+            with gr.Row():
+                table_selector = gr.Radio(
+                    choices=["Default", "Single Image"],
+                    label="Select table to display. Default: all MEGA-Bench tasks; Single Image: single-image tasks only.",
+                    value="Default"
+                )
+            # Define different captions for each table
+            default_caption = "**Table 1: MEGA-Bench full results.** The number in the parentheses is the number of tasks of each keyword. <br> The Core set contains $N_{\\text{core}} = 440$ tasks evaluated by rule-based metrics, and the Open-ended set contains $N_{\\text{open}} = 65$ tasks evaluated by a VLM judge (we use GPT-4o-0806). <br> Different from the results in our paper, we only use the Core results with CoT prompting here for clarity and compatibility with the released data. <br> $\\text{Overall} \\ = \\ \\frac{\\text{Core} \\ \\cdot \\ N_{\\text{core}} \\ + \\ \\text{Open-ended} \\ \\cdot \\ N_{\\text{open}}}{N_{\\text{core}} \\ + \\ N_{\\text{open}}}$ <br> * indicates self-reported results from the model authors."
+            single_image_caption = "**Table 2: MEGA-Bench Single-image setting results.** The number in the parentheses is the number of tasks in each keyword. <br> This subset contains 273 single-image tasks from the Core set and 42 single-image tasks from the Open-ended set. For open-source models, we drop the image input in the 1-shot demonstration example so that the entire query contains a single image only. <br> Compared to the default table, some models with only single-image support are added."
+            caption_component = gr.Markdown(
+                value=default_caption,
+                elem_classes="table-caption",
+                latex_delimiters=[{"left": "$", "right": "$", "display": False}],
+            )
+            with gr.Row():
+                super_group_selector = gr.Radio(
+                    choices=list(default_loader.SUPER_GROUPS.keys()),
+                    label="Select a dimension to display breakdown results. We use different column colors to distinguish the overall benchmark scores and breakdown results.",
+                    value=list(default_loader.SUPER_GROUPS.keys())[0]
+                )
+                model_group_selector = gr.Radio(
+                    choices=list(BASE_MODEL_GROUPS.keys()),
+                    label="Select a model group",
+                    value="All"
+                )
+            initial_headers, initial_data = default_loader.get_leaderboard_data(list(default_loader.SUPER_GROUPS.keys())[0], "All")
+            data_component = gr.Dataframe(
+                value=initial_data,
+                headers=initial_headers,
+                datatype=["number", "html"] + ["number"] * (len(initial_headers) - 2),
+                interactive=False,
+                elem_classes="custom-dataframe",
+                max_height=2400,
+                column_widths=["100px", "240px"] + ["160px"] * 3 + ["210px"] * (len(initial_headers) - 5),
+            )
+            def update_table_and_caption(table_type, super_group, model_group):
+                if table_type == "Default":
+                    headers, data = default_loader.get_leaderboard_data(super_group, model_group)
+                    caption = default_caption
+                else:  # Single-image
+                    headers, data = si_loader.get_leaderboard_data(super_group, model_group)
+                    caption = single_image_caption
+                return [
+                    gr.Dataframe(
+                        value=data,
+                        headers=headers,
+                        datatype=["number", "html"] + ["number"] * (len(headers) - 2),
+                        interactive=False,
+                        column_widths=["100px", "240px"] + ["160px"] * 3 + ["210px"] * (len(headers) - 5),
+                    ),
+                    caption,
+                    f"<style>{base_css}\n{table_css}</style>"
+                ]
+            def update_selectors(table_type):
+                loader = default_loader if table_type == "Default" else si_loader
+                return [
+                    gr.Radio(choices=list(loader.SUPER_GROUPS.keys())),
+                    gr.Radio(choices=list(loader.MODEL_GROUPS.keys()))
+                ]
+            refresh_button = gr.Button("Refresh")
+            # Update click and change handlers to include caption updates
+            refresh_button.click(
+                fn=update_table_and_caption,
+                inputs=[table_selector, super_group_selector, model_group_selector],
+                outputs=[data_component, caption_component, css_style]
+            )
+            super_group_selector.change(
+                fn=update_table_and_caption,
+                inputs=[table_selector, super_group_selector, model_group_selector],
+                outputs=[data_component, caption_component, css_style]
+            )
+            model_group_selector.change(
+                fn=update_table_and_caption,
+                inputs=[table_selector, super_group_selector, model_group_selector],
+                outputs=[data_component, caption_component, css_style]
+            )
+            table_selector.change(
+                fn=update_selectors,
+                inputs=[table_selector],
+                outputs=[super_group_selector, model_group_selector]
+            ).then(
+                fn=update_table_and_caption,
+                inputs=[table_selector, super_group_selector, model_group_selector],
+                outputs=[data_component, caption_component, css_style]
+            )
+        with gr.TabItem("📝 Data Information", elem_id="qa-tab-table2", id=2):
+            gr.Markdown(DATA_INFO, elem_classes="markdown-text")
+        with gr.TabItem("🚀 Submit", elem_id="submit-tab", id=3):
+            with gr.Row():
+                gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")
+if __name__ == "__main__":
+    block.launch(share=True, show_api=False)