awacke1 commited on
Commit
e5e9425
·
verified ·
1 Parent(s): 2cbf3fb

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +153 -0
app.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from utils import MEGABenchEvalDataLoader
3
+ import os
4
+ from constants import *
5
+
6
+ # Get the directory of the current script
7
+ current_dir = os.path.dirname(os.path.abspath(__file__))
8
+
9
+ # Construct paths to CSS files
10
+ base_css_file = os.path.join(current_dir, "static", "css", "style.css")
11
+ table_css_file = os.path.join(current_dir, "static", "css", "table.css")
12
+
13
+ # Read CSS files
14
+ with open(base_css_file, "r") as f:
15
+ base_css = f.read()
16
+ with open(table_css_file, "r") as f:
17
+ table_css = f.read()
18
+
19
+ # Initialize data loaders
20
+ default_loader = MEGABenchEvalDataLoader("./static/eval_results/Default")
21
+ si_loader = MEGABenchEvalDataLoader("./static/eval_results/SI")
22
+
23
+ with gr.Blocks() as block:
24
+ # Add a style element that we'll update
25
+ css_style = gr.HTML(
26
+ f"<style>{base_css}\n{table_css}</style>",
27
+ visible=False
28
+ )
29
+
30
+ with gr.Tabs(elem_classes="tab-buttons") as tabs:
31
+ with gr.TabItem("📚 Introduction", elem_id="intro-tab", id=0):
32
+ gr.Markdown(
33
+ LEADERBOARD_INTRODUCTION
34
+ )
35
+
36
+ with gr.TabItem("📊 MEGA-Bench", elem_id="qa-tab-table1", id=1):
37
+ with gr.Row():
38
+ with gr.Accordion("Citation", open=False):
39
+ citation_button = gr.Textbox(
40
+ value=CITATION_BUTTON_TEXT,
41
+ label=CITATION_BUTTON_LABEL,
42
+ elem_id="citation-button",
43
+ lines=10,
44
+ )
45
+ gr.Markdown(
46
+ TABLE_INTRODUCTION
47
+ )
48
+
49
+ with gr.Row():
50
+ table_selector = gr.Radio(
51
+ choices=["Default", "Single Image"],
52
+ label="Select table to display. Default: all MEGA-Bench tasks; Single Image: single-image tasks only.",
53
+ value="Default"
54
+ )
55
+
56
+ # Define different captions for each table
57
+ default_caption = "**Table 1: MEGA-Bench full results.** The number in the parentheses is the number of tasks of each keyword. <br> The Core set contains $N_{\\text{core}} = 440$ tasks evaluated by rule-based metrics, and the Open-ended set contains $N_{\\text{open}} = 65$ tasks evaluated by a VLM judge (we use GPT-4o-0806). <br> Different from the results in our paper, we only use the Core results with CoT prompting here for clarity and compatibility with the released data. <br> $\\text{Overall} \\ = \\ \\frac{\\text{Core} \\ \\cdot \\ N_{\\text{core}} \\ + \\ \\text{Open-ended} \\ \\cdot \\ N_{\\text{open}}}{N_{\\text{core}} \\ + \\ N_{\\text{open}}}$ <br> * indicates self-reported results from the model authors."
58
+
59
+ single_image_caption = "**Table 2: MEGA-Bench Single-image setting results.** The number in the parentheses is the number of tasks in each keyword. <br> This subset contains 273 single-image tasks from the Core set and 42 single-image tasks from the Open-ended set. For open-source models, we drop the image input in the 1-shot demonstration example so that the entire query contains a single image only. <br> Compared to the default table, some models with only single-image support are added."
60
+
61
+ caption_component = gr.Markdown(
62
+ value=default_caption,
63
+ elem_classes="table-caption",
64
+ latex_delimiters=[{"left": "$", "right": "$", "display": False}],
65
+ )
66
+
67
+ with gr.Row():
68
+ super_group_selector = gr.Radio(
69
+ choices=list(default_loader.SUPER_GROUPS.keys()),
70
+ label="Select a dimension to display breakdown results. We use different column colors to distinguish the overall benchmark scores and breakdown results.",
71
+ value=list(default_loader.SUPER_GROUPS.keys())[0]
72
+ )
73
+ model_group_selector = gr.Radio(
74
+ choices=list(BASE_MODEL_GROUPS.keys()),
75
+ label="Select a model group",
76
+ value="All"
77
+ )
78
+
79
+ initial_headers, initial_data = default_loader.get_leaderboard_data(list(default_loader.SUPER_GROUPS.keys())[0], "All")
80
+ data_component = gr.Dataframe(
81
+ value=initial_data,
82
+ headers=initial_headers,
83
+ datatype=["number", "html"] + ["number"] * (len(initial_headers) - 2),
84
+ interactive=False,
85
+ elem_classes="custom-dataframe",
86
+ max_height=2400,
87
+ column_widths=["100px", "240px"] + ["160px"] * 3 + ["210px"] * (len(initial_headers) - 5),
88
+ )
89
+
90
+ def update_table_and_caption(table_type, super_group, model_group):
91
+ if table_type == "Default":
92
+ headers, data = default_loader.get_leaderboard_data(super_group, model_group)
93
+ caption = default_caption
94
+ else: # Single-image
95
+ headers, data = si_loader.get_leaderboard_data(super_group, model_group)
96
+ caption = single_image_caption
97
+
98
+ return [
99
+ gr.Dataframe(
100
+ value=data,
101
+ headers=headers,
102
+ datatype=["number", "html"] + ["number"] * (len(headers) - 2),
103
+ interactive=False,
104
+ column_widths=["100px", "240px"] + ["160px"] * 3 + ["210px"] * (len(headers) - 5),
105
+ ),
106
+ caption,
107
+ f"<style>{base_css}\n{table_css}</style>"
108
+ ]
109
+
110
+ def update_selectors(table_type):
111
+ loader = default_loader if table_type == "Default" else si_loader
112
+ return [
113
+ gr.Radio(choices=list(loader.SUPER_GROUPS.keys())),
114
+ gr.Radio(choices=list(loader.MODEL_GROUPS.keys()))
115
+ ]
116
+
117
+ refresh_button = gr.Button("Refresh")
118
+
119
+ # Update click and change handlers to include caption updates
120
+ refresh_button.click(
121
+ fn=update_table_and_caption,
122
+ inputs=[table_selector, super_group_selector, model_group_selector],
123
+ outputs=[data_component, caption_component, css_style]
124
+ )
125
+ super_group_selector.change(
126
+ fn=update_table_and_caption,
127
+ inputs=[table_selector, super_group_selector, model_group_selector],
128
+ outputs=[data_component, caption_component, css_style]
129
+ )
130
+ model_group_selector.change(
131
+ fn=update_table_and_caption,
132
+ inputs=[table_selector, super_group_selector, model_group_selector],
133
+ outputs=[data_component, caption_component, css_style]
134
+ )
135
+ table_selector.change(
136
+ fn=update_selectors,
137
+ inputs=[table_selector],
138
+ outputs=[super_group_selector, model_group_selector]
139
+ ).then(
140
+ fn=update_table_and_caption,
141
+ inputs=[table_selector, super_group_selector, model_group_selector],
142
+ outputs=[data_component, caption_component, css_style]
143
+ )
144
+
145
+ with gr.TabItem("📝 Data Information", elem_id="qa-tab-table2", id=2):
146
+ gr.Markdown(DATA_INFO, elem_classes="markdown-text")
147
+
148
+ with gr.TabItem("🚀 Submit", elem_id="submit-tab", id=3):
149
+ with gr.Row():
150
+ gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text")
151
+
152
+ if __name__ == "__main__":
153
+ block.launch(share=True, show_api=False)