jwilles commited on
Commit
c8da037
·
1 Parent(s): 0b8ba75

Update content and style

Browse files
Files changed (3) hide show
  1. app.py +14 -17
  2. src/about.py +54 -49
  3. src/display/css_html_js.py +33 -221
app.py CHANGED
@@ -1,15 +1,11 @@
1
  import gradio as gr
2
- from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
3
- import pandas as pd
4
  from apscheduler.schedulers.background import BackgroundScheduler
5
  from huggingface_hub import snapshot_download
6
 
7
  from src.about import (
8
- CITATION_BUTTON_LABEL,
9
- CITATION_BUTTON_TEXT,
10
- EVALUATION_QUEUE_TEXT,
11
  INTRODUCTION_TEXT,
12
- LLM_BENCHMARKS_TEXT,
13
  TITLE,
14
  )
15
  from src.display.css_html_js import custom_css, custom_js
@@ -18,12 +14,8 @@ from src.display.utils import (
18
  ST_BENCHMARK_COLS,
19
  AGENTIC_BENCHMARK_COLS,
20
  EVAL_COLS,
21
- EVAL_TYPES,
22
  AutoEvalColumn,
23
- ModelType,
24
  fields,
25
- WeightType,
26
- Precision
27
  )
28
  from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
29
  from src.populate import get_evaluation_queue_df, get_leaderboard_df, TASK_NAME_INVERSE_MAP
@@ -92,26 +84,31 @@ with demo:
92
  </div>
93
  <div id="centre-container">
94
  <h1 style="margin-bottom: 0.25rem;">{TITLE}</h1>
95
- <p style="color:#eb088a; margin:0; font-size:1.2rem;">Performance Insights &amp; Comparison</p>
 
 
96
  </div>
97
- <div id="right-container"></div>
98
  </div>
99
  </div>
100
  """)
101
  # gr.HTML(TITLE)
102
- with gr.Group(elem_classes="intro-block"):
103
- gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
104
  # gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
 
105
 
106
  with gr.Tabs(elem_classes=["leaderboard-table", "tab-buttons"]) as tabs:
107
- with gr.TabItem("Base Benchmark", elem_classes="llm-benchmark-tab-table", id=0):
108
  leaderboard = init_leaderboard(ST_LEADERBOARD_DF, "base")
109
 
110
- with gr.TabItem("Agentic Benchmark", elem_classes="llm-benchmark-tab-table", id=1):
111
  leaderboard = init_leaderboard(AGENTIC_LEADERBOARD_DF, "agentic")
112
 
113
  with gr.TabItem("About", elem_classes="llm-benchmark-tab-table", id=2):
114
- gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
 
 
 
115
 
116
  assets = [black_logo_path, white_logo_path]
117
 
 
1
  import gradio as gr
 
 
2
  from apscheduler.schedulers.background import BackgroundScheduler
3
  from huggingface_hub import snapshot_download
4
 
5
  from src.about import (
6
+ REPRODUCIBILITY_TEXT,
 
 
7
  INTRODUCTION_TEXT,
8
+ ABOUT_TEXT,
9
  TITLE,
10
  )
11
  from src.display.css_html_js import custom_css, custom_js
 
14
  ST_BENCHMARK_COLS,
15
  AGENTIC_BENCHMARK_COLS,
16
  EVAL_COLS,
 
17
  AutoEvalColumn,
 
18
  fields,
 
 
19
  )
20
  from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
21
  from src.populate import get_evaluation_queue_df, get_leaderboard_df, TASK_NAME_INVERSE_MAP
 
84
  </div>
85
  <div id="centre-container">
86
  <h1 style="margin-bottom: 0.25rem;">{TITLE}</h1>
87
+ <p style="color:#eb088a; margin:0; font-size:1.2rem;">Explore Interactive Results &amp; Traces</p>
88
+ </div>
89
+ <div id="right-container">
90
  </div>
 
91
  </div>
92
  </div>
93
  """)
94
  # gr.HTML(TITLE)
95
+ # with gr.Group(elem_classes="intro-block"):
96
+ # gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
97
  # gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
98
+ gr.Markdown(INTRODUCTION_TEXT, elem_classes="intro-text")
99
 
100
  with gr.Tabs(elem_classes=["leaderboard-table", "tab-buttons"]) as tabs:
101
+ with gr.TabItem("Base Benchmarks", elem_classes="llm-benchmark-tab-table", id=0):
102
  leaderboard = init_leaderboard(ST_LEADERBOARD_DF, "base")
103
 
104
+ with gr.TabItem("Agentic Benchmarks", elem_classes="llm-benchmark-tab-table", id=1):
105
  leaderboard = init_leaderboard(AGENTIC_LEADERBOARD_DF, "agentic")
106
 
107
  with gr.TabItem("About", elem_classes="llm-benchmark-tab-table", id=2):
108
+ gr.Markdown(ABOUT_TEXT, elem_classes="markdown-text")
109
+
110
+ with gr.TabItem("Reproducibility", elem_classes="llm-benchmark-tab-table", id=3):
111
+ gr.Markdown(REPRODUCIBILITY_TEXT, elem_classes="markdown-text")
112
 
113
  assets = [black_logo_path, white_logo_path]
114
 
src/about.py CHANGED
@@ -46,66 +46,74 @@ NUM_FEWSHOT = 0 # Change with your few shot
46
 
47
 
48
  # Your leaderboard name
49
- TITLE = """<h1 align="center" id="space-title">Vector State of Evaluation Leaderboard</h1>"""
50
 
51
  SINGLE_TURN_TASK_NAMES = ", ".join([f"[{task.value.col_name}]({task.value.source})" for task in Tasks if task.value.type == "base"])
52
  AGENTIC_TASK_NAMES = ", ".join([f"[{task.value.col_name}]({task.value.source})" for task in Tasks if task.value.type == "agentic"])
53
 
54
  # What does your leaderboard evaluate?
55
  INTRODUCTION_TEXT = f"""
56
- This leaderboard presents the performance of selected LLM models on a set of tasks. The tasks are divided into two categories: base and agentic. The base tasks are: {SINGLE_TURN_TASK_NAMES}. The agentic tasks are: {AGENTIC_TASK_NAMES}."""
57
 
58
  # Which evaluations are you running? how can people reproduce what you have?
59
- LLM_BENCHMARKS_TEXT = f"""
60
- # Vector State of Evaluation Leaderboard
61
 
62
- ## Overview
63
- The **Vector State of Evaluation Leaderboard** presents the performance of selected LLM models on a variety of tasks. These tasks are divided into two categories:
64
 
65
- - **Base Tasks**: ARC-Easy, ARC-Challenge, DROP, WinoGrande, GSM8K, HellaSwag, HumanEval, IFEval, MATH, MMLU, MMLU-Pro, GPQA-Diamond, MMMU-Multiple-Choice, MMMU-Open-Ended
66
- - **Agentic Tasks**: GAIA, GDM-InterCode-CTF, GDM-In-House-CTF, AgentHarm, AgentHarm-Benign, SWE-Bench
67
 
68
- Users can compare models side by side to see how they perform on both base-level understanding tasks and more advanced, “agentic” tasks.
69
 
70
- ## Vector Institute
71
- The **Vector Institute** is dedicated to advancing the fields of artificial intelligence and machine learning through cutting-edge research, collaborative projects, and open-source contributions. This leaderboard is part of Vector’s broader effort to promote transparency and progress in AI research.
72
-
73
- ## Model
74
- We evaluate a variety of **Large Language Models (LLMs)** across the included benchmarks. Each model:
75
- - Is tested on the same set of tasks.
76
- - Has standardized prompts or evaluation methodologies.
77
- - Generates performance metrics (accuracy, F1, etc.) for comparison.
78
-
79
- Our goal is to provide clear, reproducible metrics that shed light on how each model handles different task complexities and reasoning requirements.
80
-
81
- ## Benchmarks
82
- Here is a closer look at each benchmark included in the leaderboard:
83
-
84
- ### Base Benchmarks
85
- - **ARC-Easy / ARC-Challenge**: A set of multiple-choice science questions designed to measure a model’s scientific and commonsense reasoning.
86
- - **DROP**: A reading comprehension benchmark emphasizing discrete reasoning steps.
87
- - **WinoGrande**: A commonsense reasoning challenge focused on co-reference resolution.
88
- - **GSM8K**: Grade-school math word problems testing arithmetic and multi-step reasoning.
89
- - **HellaSwag**: A commonsense inference task centered on action completion.
90
- - **HumanEval**: Evaluates code generation and reasoning in a programming context.
91
- - **IFEval**: A specialized benchmark for incremental formal reasoning.
92
- - **MATH**: High school-level math questions requiring detailed solutions.
93
- - **MMLU / MMLU-Pro**: Multi-subject multiple-choice tests covering advanced high school and collegiate-level knowledge.
94
- - **GPQA-Diamond**: A question-answering benchmark that assesses deeper reasoning and knowledge linking.
95
- - **MMMU (Multiple-Choice / Open-Ended)**: A suite of multilingual and multi-domain tasks testing both structured and open-form responses.
96
-
97
- ### Agentic Benchmarks
98
- - **GAIA**: Evaluates more autonomous or “agentic” reasoning, including planning and problem-solving.
99
- - **GDM-InterCode-CTF**: A capture-the-flag style challenge focusing on code interpretation and generative debugging strategies.
100
- - **GDM-In-House-CTF**: A capture-the-flag style challenge testing a variety of security skills pertaining to basic web application security.
101
- - **AgentHarm / AgentHarm-Benign**: A benchmark for measuring harmfulness of LLM agents.
102
- - **SWE-Bench**: A benchmark for testing the ability of AI agents to solve software engineering tasks.
103
-
104
- ---
 
 
 
 
 
 
 
 
 
 
105
  """
106
 
107
- EVALUATION_QUEUE_TEXT = """
108
- ## Some good practices before submitting a model
109
 
110
  ### 1) Make sure you can load your model and tokenizer using AutoClasses:
111
  ```python
@@ -134,6 +142,3 @@ Make sure you have followed the above steps first.
134
  If everything is done, check you can launch the EleutherAIHarness on your model locally, using the above command without modifications (you can add `--limit` to limit the number of examples per task).
135
  """
136
 
137
- CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
138
- CITATION_BUTTON_TEXT = r"""
139
- """
 
46
 
47
 
48
  # Your leaderboard name
49
+ TITLE = """<h1 align="center" id="space-title">State of Evaluation Leaderboard</h1>"""
50
 
51
  SINGLE_TURN_TASK_NAMES = ", ".join([f"[{task.value.col_name}]({task.value.source})" for task in Tasks if task.value.type == "base"])
52
  AGENTIC_TASK_NAMES = ", ".join([f"[{task.value.col_name}]({task.value.source})" for task in Tasks if task.value.type == "agentic"])
53
 
54
  # What does your leaderboard evaluate?
55
  INTRODUCTION_TEXT = f"""
56
+ Powered by **Inspect** and **Inspect Evals**, the **Vector State of Evaluation Leaderboard** presents an objective evaluation of leading frontier models across a comprehensive suite of benchmarks. Go beyond the summary metrics: click through to interactive reporting for each model and benchmark to explore sample-level performance and detailed traces."""
57
 
58
  # Which evaluations are you running? how can people reproduce what you have?
59
+ ABOUT_TEXT = f"""
 
60
 
61
+ ## Vector Institute
62
+ The **Vector Institute** is dedicated to advancing the fields of artificial intelligence and machine learning through cutting-edge research, collaborative projects, and open-source contributions. Our mission is to drive excellence and innovation in AI, fostering a vibrant community of researchers, developers, and industry partners.
63
 
64
+ ## 🎯 Benchmarks
 
65
 
66
+ This leaderboard showcases performance across a comprehensive suite of benchmarks, designed to rigorously evaluate different aspects of AI model capabilities. Let's explore the benchmarks we use:
67
 
68
+ ### Inspect Evals
69
+
70
+ This leaderboard leverages [Inspect Evals](https://ukgovernmentbeis.github.io/inspect_evals/) to power evaluation. Inspect Evals is an open-source repository built upon the Inspect AI framework. Developed in collaboration between the Vector Institute, Arcadia Impact and the UK AI Safety Institute, Inspect Evals provides a comprehensive suite of high-quality benchmarks spanning diverse domains like coding, mathematics, cybersecurity, reasoning, and general knowledge.
71
+
72
+ #### Transparent and Detailed Insights
73
+
74
+ All evaluations presented on this leaderboard are run using Inspect Evals. To facilitate in-depth analysis and promote transparency, we provide [Inspect Logs](https://inspect.ai-safety-institute.org.uk/log-viewer.html) for every benchmark run. These logs offer sample and trace level reporting, allowing the community to explore the granular details of model performance.
75
+
76
+ ### ⚙️ Base Benchmarks
77
+
78
+ These benchmarks assess fundamental reasoning and knowledge capabilities of models.
79
+
80
+ <div class="benchmark-table-container">
81
+
82
+ | Benchmark | Description | Key Skills |
83
+ |--------------------|----------------------------------------------------------------------------------|-----------------------------------------------|
84
+ | **ARC-Easy** / **ARC-Challenge** | Multiple-choice science questions measuring scientific & commonsense reasoning. | Scientific Reasoning, Commonsense Reasoning |
85
+ | **DROP** | Reading comprehension benchmark emphasizing discrete reasoning steps. | Reading Comprehension, Discrete Reasoning |
86
+ | **WinoGrande** | Commonsense reasoning challenge focused on co-reference resolution. | Commonsense Reasoning, Co-reference Resolution|
87
+ | **GSM8K** | Grade-school math word problems testing arithmetic & multi-step reasoning. | Arithmetic Reasoning, Multi-step Reasoning |
88
+ | **HellaSwag** | Commonsense inference task centered on action completion. | Commonsense Inference, Action Completion |
89
+ | **HumanEval** | Evaluates code generation and reasoning in a programming context. | Code Generation, Code Reasoning |
90
+ | **IFEval** | Specialized benchmark for incremental formal reasoning. | Incremental Formal Reasoning |
91
+ | **MATH** | High school-level math questions requiring detailed solutions. | Mathematical Reasoning, Problem Solving |
92
+ | **MMLU** / **MMLU-Pro**| Multi-subject multiple-choice tests of advanced knowledge. | Broad Knowledge, Multi-domain Understanding |
93
+ | **GPQA-Diamond** | Question-answering benchmark assessing deeper reasoning & knowledge linking. | Deep Reasoning, Knowledge Linking |
94
+ | **MMMU** (Multi-Choice / Open-Ended) | Multilingual & multi-domain tasks testing structured & open responses. | Multilingual Understanding, Open-form Response |
95
+
96
+ </div>
97
+
98
+ ### 🚀 Agentic Benchmarks
99
+
100
+ These benchmarks go beyond basic reasoning and evaluate more advanced, autonomous, or "agentic" capabilities of models, such as planning and interaction.
101
+
102
+ <div class="benchmark-table-container">
103
+
104
+ | Benchmark | Description | Key Skills |
105
+ |-----------------------|-----------------------------------------------------------------------------|-------------------------------------------------|
106
+ | **GAIA** | Evaluates autonomous reasoning, planning, problem-solving, & multi-turn interactions. | Autonomous Reasoning, Planning, Problem Solving |
107
+ | [**InterCode-CTF**](https://ukgovernmentbeis.github.io/inspect_evals/evals/cybersecurity/in_house_ctf/) | Capture-the-flag challenge focused on code interpretation & debugging. | Code Interpretation, Generative Debugging |
108
+ | **GDM-In-House-CTF** | Capture-the-flag challenge testing web application security skills. | Web Security Skills, Security Awareness |
109
+ | **AgentHarm** / **AgentHarm-Benign** | Measures harmfulness of LLM agents (and benign behavior baseline). | Harmfulness Detection, Agent Safety |
110
+ | **SWE-Bench** | Tests AI agent ability to solve software engineering tasks. | Software Engineering Tasks, Bug Fixing |
111
+
112
+ </div>
113
  """
114
 
115
+ REPRODUCIBILITY_TEXT = """
116
+ ## Reproduce and Extend the Leaderboard
117
 
118
  ### 1) Make sure you can load your model and tokenizer using AutoClasses:
119
  ```python
 
142
  If everything is done, check you can launch the EleutherAIHarness on your model locally, using the above command without modifications (you can add `--limit` to limit the number of examples per task).
143
  """
144
 
 
 
 
src/display/css_html_js.py CHANGED
@@ -1,219 +1,4 @@
1
- # custom_css = """
2
 
3
- # .markdown-text {
4
- # font-size: 16px !important;
5
- # }
6
-
7
- # #models-to-add-text {
8
- # font-size: 18px !important;
9
- # }
10
-
11
- # #citation-button span {
12
- # font-size: 16px !important;
13
- # }
14
-
15
- # #citation-button textarea {
16
- # font-size: 16px !important;
17
- # }
18
-
19
- # #citation-button > label > button {
20
- # margin: 6px;
21
- # transform: scale(1.3);
22
- # }
23
-
24
- # #leaderboard-table {
25
- # margin-top: 15px
26
- # }
27
-
28
- # #leaderboard-table-lite {
29
- # margin-top: 15px
30
- # }
31
-
32
- # #search-bar-table-box > div:first-child {
33
- # background: none;
34
- # border: none;
35
- # }
36
-
37
- # #search-bar {
38
- # padding: 0px;
39
- # }
40
-
41
- # /* Limit the width of the first AutoEvalColumn so that names don't expand too much */
42
- # #leaderboard-table td:nth-child(2),
43
- # #leaderboard-table th:nth-child(2) {
44
- # max-width: 400px;
45
- # overflow: auto;
46
- # white-space: nowrap;
47
- # }
48
-
49
- # .tab-buttons button {
50
- # font-size: 20px;
51
- # }
52
-
53
- # #scale-logo {
54
- # border-style: none !important;
55
- # box-shadow: none;
56
- # display: block;
57
- # margin-left: auto;
58
- # margin-right: auto;
59
- # max-width: 600px;
60
- # }
61
-
62
- # #scale-logo .download {
63
- # display: none;
64
- # }
65
- # #filter_type{
66
- # border: 0;
67
- # padding-left: 0;
68
- # padding-top: 0;
69
- # }
70
- # #filter_type label {
71
- # display: flex;
72
- # }
73
- # #filter_type label > span{
74
- # margin-top: var(--spacing-lg);
75
- # margin-right: 0.5em;
76
- # }
77
- # #filter_type label > .wrap{
78
- # width: 103px;
79
- # }
80
- # #filter_type label > .wrap .wrap-inner{
81
- # padding: 2px;
82
- # }
83
- # #filter_type label > .wrap .wrap-inner input{
84
- # width: 1px
85
- # }
86
- # #filter-columns-type{
87
- # border:0;
88
- # padding:0.5;
89
- # }
90
- # #filter-columns-size{
91
- # border:0;
92
- # padding:0.5;
93
- # }
94
- # #box-filter > .form{
95
- # border: 0
96
- # }
97
-
98
- # body, .gradio-container {
99
- # font-family: Roboto, sans-serif;
100
- # background-color: #ffffff;
101
- # color: #000000; /* main text color */
102
- # margin: 0;
103
- # padding: 0;
104
- # }
105
-
106
- # h1, h2, h3, h4, h5, h6 {
107
- # color: #eb088a; /* your brand color for headings */
108
- # font-weight: 600;
109
- # margin-bottom: 1rem;
110
- # }
111
-
112
- # /* Example ‘intro-block’ styling if you want extra flair */
113
- # .intro-block {
114
- # background-color: #eb088a10; /* light tinted background */
115
- # padding: 1.5rem;
116
- # border-radius: 10px;
117
- # margin-bottom: 2rem;
118
- # }
119
-
120
- # """
121
-
122
- # custom_css = """
123
- # /* 1) Load Karbon Font: Make sure this points to your actual font files */
124
- # @font-face {
125
- # font-family: 'Karbon';
126
- # src: url('path/to/Karbon.woff2') format('woff2'),
127
- # url('path/to/Karbon.woff') format('woff');
128
- # font-weight: normal;
129
- # font-style: normal;
130
- # }
131
-
132
- # /* 2) Global Container */
133
- # body, .gradio-container {
134
- # font-family: 'Karbon', sans-serif;
135
- # margin: 0;
136
- # padding: 0;
137
- # background-color: #fafafa; /* Light background */
138
- # color: #000000;
139
- # }
140
-
141
- # .gradio-container {
142
- # max-width: 1200px;
143
- # margin: 0 auto;
144
- # padding: 2rem 1rem;
145
- # }
146
-
147
- # /* 3) Headings, with brand color #eb088a */
148
- # h1, h2, h3, h4, h5, h6 {
149
- # color: #000000;
150
- # margin-bottom: 1rem;
151
- # font-weight: 600;
152
- # }
153
-
154
- # /* 4) Intro Block for a slight highlight */
155
- # .intro-block {
156
- # background-color: #ffe2f1; /* lighter tint of #eb088a */
157
- # padding: 1.5rem;
158
- # border-radius: 8px;
159
- # border: 1px solid #f8badb;
160
- # margin-bottom: 2rem;
161
- # }
162
-
163
- # /* 5) Tab styling - remove default orange styling */
164
- # .tab-buttons {
165
- # margin-top: 1rem;
166
- # margin-bottom: 1rem;
167
- # display: flex;
168
- # }
169
- # .tab-buttons > .tabitem {
170
- # padding: 0.6rem 1.2rem;
171
- # background-color: #ffffff;
172
- # border: 1px solid #eb088a;
173
- # border-radius: 6px;
174
- # color: #eb088a;
175
- # margin-right: 5px;
176
- # cursor: pointer;
177
- # transition: background-color 0.2s ease, color 0.2s ease;
178
- # font-weight: 500;
179
- # }
180
- # .tab-buttons > .tabitem.selected {
181
- # background-color: #eb088a;
182
- # color: #ffffff;
183
- # }
184
- # .tab-buttons > .tabitem:hover {
185
- # background-color: #eb088a;
186
- # color: #ffffff;
187
- # }
188
-
189
- # /* 6) Dataframe Styling */
190
- # .gr-dataframe table {
191
- # width: 100%;
192
- # border-collapse: collapse;
193
- # border: 1px solid #cccccc;
194
- # margin-bottom: 2rem;
195
- # }
196
- # .gr-dataframe th {
197
- # background-color: #eb088a;
198
- # color: #ffffff;
199
- # padding: 0.6rem;
200
- # text-align: left;
201
- # font-weight: 600;
202
- # }
203
- # .gr-dataframe td {
204
- # padding: 0.6rem;
205
- # border-bottom: 1px solid #e0e0e0;
206
- # }
207
- # .gr-dataframe tr:nth-child(even) {
208
- # background-color: #fdfdfd;
209
- # }
210
-
211
- # /* 7) Make default markdown text nice */
212
- # .markdown-text p {
213
- # margin-bottom: 1rem;
214
- # line-height: 1.6;
215
- # }
216
- # """
217
  custom_js = """
218
  function tableLinkHack() {
219
  // This is a hack to make the table links work
@@ -228,6 +13,18 @@ function tableLinkHack() {
228
 
229
  custom_css = """
230
 
 
 
 
 
 
 
 
 
 
 
 
 
231
  .tab-buttons button {
232
  font-size: 20px;
233
  }
@@ -311,15 +108,30 @@ custom_css = """
311
  }
312
  }
313
 
 
 
 
 
 
314
 
315
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
316
 
317
- # .selected.svelte-1tcem6n.svelte-1tcem6n {
318
- # background-color: #000000 !important; /* Desired background color */
319
- # color: #eb088a !important; /* Desired text color */
320
- # border-color: #eb088a !important; /* Desired border color */
321
- # }
322
 
 
323
 
324
  get_window_url_params = """
325
  function(url_params) {
 
 
1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  custom_js = """
3
  function tableLinkHack() {
4
  // This is a hack to make the table links work
 
13
 
14
  custom_css = """
15
 
16
+ .intro-text {
17
+ text-align: center; /* Center the text */
18
+ font-size: 3rem; /* Slightly larger font size if desired */
19
+ color: #555; /* A slightly softer color than black */
20
+ margin-bottom: 5px; /* Add some space below the text before the tabs */
21
+ padding: 0 10px; /* Add some horizontal padding for wider screens */
22
+ line-height: 1.2; /* Improve readability with line height */
23
+ max-width: 1200px; /* Limit width for better readability on large screens */
24
+ margin-left: auto; /* Center the block horizontally */
25
+ margin-right: auto; /* Center the block horizontally */
26
+ }
27
+
28
  .tab-buttons button {
29
  font-size: 20px;
30
  }
 
108
  }
109
  }
110
 
111
+ .benchmark-table-container table {
112
+ width: 100%; /* Make table take full width of its container */
113
+ border-collapse: collapse; /* Remove spacing between table cells */
114
+ margin-bottom: 20px; /* Add some space below the table */
115
+ }
116
 
117
+ .benchmark-table-container th, .benchmark-table-container td {
118
+ border: 1px solid #ddd; /* Light gray border for cells */
119
+ padding: 8px; /* Padding within cells for better spacing */
120
+ text-align: left; /* Align text to the left within cells */
121
+ vertical-align: top; /* Align content to the top of cells */
122
+ }
123
+
124
+ .benchmark-table-container th {
125
+ background-color: #f2f2f2; /* Light gray background for header row */
126
+ font-weight: bold; /* Make header text bold */
127
+ }
128
+
129
+ .benchmark-table-container tbody tr:nth-child(even) {
130
+ background-color: #f9f9f9; /* Very light gray background for even rows for zebra striping */
131
+ }
132
 
 
 
 
 
 
133
 
134
+ """
135
 
136
  get_window_url_params = """
137
  function(url_params) {