Spaces:
Running
Running
add intro text
Browse files- app.py +2 -2
- src/display/about.py +6 -3
app.py
CHANGED
@@ -133,7 +133,7 @@ with demo:
|
|
133 |
],
|
134 |
leaderboard_table,
|
135 |
)
|
136 |
-
with gr.TabItem("
|
137 |
with gr.Row():
|
138 |
search_bar = gr.Textbox(
|
139 |
placeholder=" π Search for your model (separate multiple queries with `;`) and press ENTER...",
|
@@ -168,7 +168,7 @@ with demo:
|
|
168 |
leaderboard_table,
|
169 |
)
|
170 |
|
171 |
-
with gr.TabItem("
|
172 |
with gr.Row():
|
173 |
search_bar = gr.Textbox(
|
174 |
placeholder=" π Search for your model (separate multiple queries with `;`) and press ENTER...",
|
|
|
133 |
],
|
134 |
leaderboard_table,
|
135 |
)
|
136 |
+
with gr.TabItem("M3Exam", elem_id="llm-benchmark-M3Exam", id=1):
|
137 |
with gr.Row():
|
138 |
search_bar = gr.Textbox(
|
139 |
placeholder=" π Search for your model (separate multiple queries with `;`) and press ENTER...",
|
|
|
168 |
leaderboard_table,
|
169 |
)
|
170 |
|
171 |
+
with gr.TabItem("MMLU", elem_id="llm-benchmark-MMLU", id=2):
|
172 |
with gr.Row():
|
173 |
search_bar = gr.Textbox(
|
174 |
placeholder=" π Search for your model (separate multiple queries with `;`) and press ENTER...",
|
src/display/about.py
CHANGED
@@ -16,12 +16,15 @@ class Tasks(Enum):
|
|
16 |
|
17 |
|
18 |
# Your leaderboard name
|
19 |
-
TITLE = """<h1 align="center" id="space-title"
|
20 |
|
21 |
# What does your leaderboard evaluate?
|
22 |
INTRODUCTION_TEXT = """
|
23 |
-
|
24 |
-
|
|
|
|
|
|
|
25 |
"""
|
26 |
|
27 |
# Which evaluations are you running? how can people reproduce what you have?
|
|
|
16 |
|
17 |
|
18 |
# Your leaderboard name
|
19 |
+
TITLE = """<h1 align="center" id="space-title">π SeaExam Leaderboard</h1>"""
|
20 |
|
21 |
# What does your leaderboard evaluate?
|
22 |
INTRODUCTION_TEXT = """
|
23 |
+
This leaderboard is specifically designed to evaluate large language models (LLMs) for Southeast Asian (SEA) languages. It assesses model performance using human exam-type benchmarks, reflecting the model's world knowledge (e.g., with language or social science subjects) and reasoning abilities (e.g., with mathematics or natural science subjects).
|
24 |
+
|
25 |
+
For additional details such as datasets, evaluation criteria, and reproducibility, please refer to the "π About" tab.
|
26 |
+
|
27 |
+
Also check the [SeaBench leaderboard](https://huggingface.co/spaces/SeaLLMs/SeaBench_leaderboard) - focusing on evaluating the model's ability to follow instructions in real-world multi-turn settings
|
28 |
"""
|
29 |
|
30 |
# Which evaluations are you running? how can people reproduce what you have?
|