isakzhang commited on
Commit
60867e4
·
1 Parent(s): 51b9370

update scripts

Browse files
Files changed (3) hide show
  1. app.py +3 -0
  2. eval-results/README.md +24 -0
  3. src/display/about.py +3 -0
app.py CHANGED
@@ -12,6 +12,7 @@ from src.display.about import (
12
  INTRODUCTION_TEXT,
13
  LLM_BENCHMARKS_TEXT,
14
  TITLE,
 
15
  )
16
  from src.display.css_html_js import custom_css
17
  from src.envs import API
@@ -75,7 +76,9 @@ def filter_queries(query: str, filtered_df: pd.DataFrame) -> pd.DataFrame:
75
  demo = gr.Blocks(css=custom_css)
76
  with demo:
77
  gr.HTML(TITLE)
 
78
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
 
79
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
80
  with gr.TabItem("🏅 Overall", elem_id="llm-benchmark-Sum", id=0):
81
  with gr.Row():
 
12
  INTRODUCTION_TEXT,
13
  LLM_BENCHMARKS_TEXT,
14
  TITLE,
15
+ SUB_TITLE,
16
  )
17
  from src.display.css_html_js import custom_css
18
  from src.envs import API
 
76
  demo = gr.Blocks(css=custom_css)
77
  with demo:
78
  gr.HTML(TITLE)
79
+ gr.HTML(SUB_TITLE)
80
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
81
+
82
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
83
  with gr.TabItem("🏅 Overall", elem_id="llm-benchmark-Sum", id=0):
84
  with gr.Row():
eval-results/README.md ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ language:
4
+ - en
5
+ - zh
6
+ - vi
7
+ - id
8
+ - th
9
+
10
+ size_categories:
11
+ - n<1K
12
+ configs:
13
+ - config_name: results
14
+ data_files: SeaExam_results.csv
15
+ ---
16
+
17
+ # About
18
+
19
+ This repo contains the original results for the space [SeaExam Leaderboard](https://huggingface.co/spaces/SeaLLMs/SeaExam_leaderboard).
20
+
21
+ To reproduce our results, use the script in [this repo](https://github.com/DAMO-NLP-SG/SeaExam/tree/main). The script will download the model and tokenizer, and evaluate the model on the benchmark data.
22
+ ```python
23
+ python scripts/main.py --model $model_name_or_path
24
+ ```
src/display/about.py CHANGED
@@ -18,6 +18,9 @@ class Tasks(Enum):
18
  # Your leaderboard name
19
  TITLE = """<h1 align="center" id="space-title">📃 SeaExam Leaderboard</h1>"""
20
 
 
 
 
21
  # What does your leaderboard evaluate?
22
  INTRODUCTION_TEXT = """
23
  This leaderboard is specifically designed to evaluate large language models (LLMs) for Southeast Asian (SEA) languages. It assesses model performance using human-exam type benchmarks, reflecting the model's world knowledge (e.g., with language or social science subjects) and reasoning abilities (e.g., with mathematics or natural science subjects).
 
18
  # Your leaderboard name
19
  TITLE = """<h1 align="center" id="space-title">📃 SeaExam Leaderboard</h1>"""
20
 
21
+ # subtitle
22
+ SUB_TITLE = """<h2 align="center" id="space-title">What is the best LLM for Southeast Asian Languages❓</h1>"""
23
+
24
  # What does your leaderboard evaluate?
25
  INTRODUCTION_TEXT = """
26
  This leaderboard is specifically designed to evaluate large language models (LLMs) for Southeast Asian (SEA) languages. It assesses model performance using human-exam type benchmarks, reflecting the model's world knowledge (e.g., with language or social science subjects) and reasoning abilities (e.g., with mathematics or natural science subjects).