Spaces:

SeaLLMs
/

LLM_Leaderboard_for_SEA

Running

isakzhang commited on Apr 24, 2024

Commit

60867e4

1 Parent(s): 51b9370

update scripts

Files changed (3) hide show

app.py CHANGED Viewed

@@ -12,6 +12,7 @@ from src.display.about import (
     INTRODUCTION_TEXT,
     LLM_BENCHMARKS_TEXT,
     TITLE,
 )
 from src.display.css_html_js import custom_css
 from src.envs import API
@@ -75,7 +76,9 @@ def filter_queries(query: str, filtered_df: pd.DataFrame) -> pd.DataFrame:
 demo = gr.Blocks(css=custom_css)
 with demo:
     gr.HTML(TITLE)
     gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
         with gr.TabItem("🏅 Overall", elem_id="llm-benchmark-Sum", id=0):
             with gr.Row():

     INTRODUCTION_TEXT,
     LLM_BENCHMARKS_TEXT,
     TITLE,
+    SUB_TITLE,
 )
 from src.display.css_html_js import custom_css
 from src.envs import API
 demo = gr.Blocks(css=custom_css)
 with demo:
     gr.HTML(TITLE)
+    gr.HTML(SUB_TITLE)
     gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
         with gr.TabItem("🏅 Overall", elem_id="llm-benchmark-Sum", id=0):
             with gr.Row():

eval-results/README.md ADDED Viewed

+---
+license: apache-2.0
+language:
+- en
+- zh
+- vi
+- id
+- th
+size_categories:
+- n<1K
+configs:
+- config_name: results
+  data_files: SeaExam_results.csv
+---
+# About
+This repo contains the original results for the space [SeaExam Leaderboard](https://huggingface.co/spaces/SeaLLMs/SeaExam_leaderboard).
+To reproduce our results, use the script in [this repo](https://github.com/DAMO-NLP-SG/SeaExam/tree/main). The script will download the model and tokenizer, and evaluate the model on the benchmark data.
+```python
+python scripts/main.py --model $model_name_or_path
+```

src/display/about.py CHANGED Viewed

@@ -18,6 +18,9 @@ class Tasks(Enum):
 # Your leaderboard name
 TITLE = """<h1 align="center" id="space-title">📃 SeaExam Leaderboard</h1>"""
 # What does your leaderboard evaluate?
 INTRODUCTION_TEXT = """
 This leaderboard is specifically designed to evaluate large language models (LLMs) for Southeast Asian (SEA) languages. It assesses model performance using human-exam type benchmarks, reflecting the model's world knowledge (e.g., with language or social science subjects) and reasoning abilities (e.g., with mathematics or natural science subjects).

 # Your leaderboard name
 TITLE = """<h1 align="center" id="space-title">📃 SeaExam Leaderboard</h1>"""
+# subtitle
+SUB_TITLE = """<h2 align="center" id="space-title">What is the best LLM for Southeast Asian Languages❓</h1>"""
 # What does your leaderboard evaluate?
 INTRODUCTION_TEXT = """
 This leaderboard is specifically designed to evaluate large language models (LLMs) for Southeast Asian (SEA) languages. It assesses model performance using human-exam type benchmarks, reflecting the model's world knowledge (e.g., with language or social science subjects) and reasoning abilities (e.g., with mathematics or natural science subjects).