isakzhang commited on
Commit
cc86cb5
Β·
1 Parent(s): 0440741

add intro text

Browse files
Files changed (2) hide show
  1. app.py +2 -2
  2. src/display/about.py +6 -3
app.py CHANGED
@@ -133,7 +133,7 @@ with demo:
133
  ],
134
  leaderboard_table,
135
  )
136
- with gr.TabItem("πŸ… M3Exam", elem_id="llm-benchmark-M3Exam", id=1):
137
  with gr.Row():
138
  search_bar = gr.Textbox(
139
  placeholder=" πŸ” Search for your model (separate multiple queries with `;`) and press ENTER...",
@@ -168,7 +168,7 @@ with demo:
168
  leaderboard_table,
169
  )
170
 
171
- with gr.TabItem("πŸ… MMLU", elem_id="llm-benchmark-MMLU", id=2):
172
  with gr.Row():
173
  search_bar = gr.Textbox(
174
  placeholder=" πŸ” Search for your model (separate multiple queries with `;`) and press ENTER...",
 
133
  ],
134
  leaderboard_table,
135
  )
136
+ with gr.TabItem("M3Exam", elem_id="llm-benchmark-M3Exam", id=1):
137
  with gr.Row():
138
  search_bar = gr.Textbox(
139
  placeholder=" πŸ” Search for your model (separate multiple queries with `;`) and press ENTER...",
 
168
  leaderboard_table,
169
  )
170
 
171
+ with gr.TabItem("MMLU", elem_id="llm-benchmark-MMLU", id=2):
172
  with gr.Row():
173
  search_bar = gr.Textbox(
174
  placeholder=" πŸ” Search for your model (separate multiple queries with `;`) and press ENTER...",
src/display/about.py CHANGED
@@ -16,12 +16,15 @@ class Tasks(Enum):
16
 
17
 
18
  # Your leaderboard name
19
- TITLE = """<h1 align="center" id="space-title">SeaExam Leaderboard</h1>"""
20
 
21
  # What does your leaderboard evaluate?
22
  INTRODUCTION_TEXT = """
23
- 🟒: base
24
- πŸ”Ά: chat
 
 
 
25
  """
26
 
27
  # Which evaluations are you running? how can people reproduce what you have?
 
16
 
17
 
18
  # Your leaderboard name
19
+ TITLE = """<h1 align="center" id="space-title">πŸ“ƒ SeaExam Leaderboard</h1>"""
20
 
21
  # What does your leaderboard evaluate?
22
  INTRODUCTION_TEXT = """
23
+ This leaderboard is specifically designed to evaluate large language models (LLMs) for Southeast Asian (SEA) languages. It assesses model performance using human exam-type benchmarks, reflecting the model's world knowledge (e.g., with language or social science subjects) and reasoning abilities (e.g., with mathematics or natural science subjects).
24
+
25
+ For additional details such as datasets, evaluation criteria, and reproducibility, please refer to the "πŸ“ About" tab.
26
+
27
+ Also check the [SeaBench leaderboard](https://huggingface.co/spaces/SeaLLMs/SeaBench_leaderboard) - focusing on evaluating the model's ability to follow instructions in real-world multi-turn settings
28
  """
29
 
30
  # Which evaluations are you running? how can people reproduce what you have?