Spaces:
Running
Running
add results
Browse files- app.py +22 -3
- results/SeaExam_results_0419.csv +46 -0
app.py
CHANGED
@@ -1,7 +1,26 @@
|
|
1 |
import gradio as gr
|
|
|
|
|
2 |
|
3 |
-
|
4 |
-
|
|
|
|
|
|
|
5 |
|
6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
iface.launch()
|
|
|
|
1 |
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
import os
|
4 |
|
5 |
+
# clone / pull the lmeh eval data
|
6 |
+
TOKEN = os.environ.get("TOKEN", None)
|
7 |
+
RESULTS_REPO = f"lukecq/SeaExam-results"
|
8 |
+
CACHE_PATH=os.getenv("HF_HOME", ".")
|
9 |
+
EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
|
10 |
|
11 |
+
# Load the CSV file
|
12 |
+
def load_csv(file_path):
|
13 |
+
data = pd.read_csv(file_path)
|
14 |
+
return data
|
15 |
+
|
16 |
+
# Example path to your CSV file
|
17 |
+
csv_path = './results/SeaExam_results_0419.csv'
|
18 |
+
data = load_csv(csv_path)
|
19 |
+
|
20 |
+
def show_data():
|
21 |
+
return data
|
22 |
+
|
23 |
+
iface = gr.Interface(fn=show_data, outputs="dataframe", title="SeaExam Leaderboard",
|
24 |
+
description="Leaderboard for the SeaExam competition.")
|
25 |
iface.launch()
|
26 |
+
|
results/SeaExam_results_0419.csv
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
,,,,M3Exam,,,,,,
|
2 |
+
Model,type,open?,shot,en,zh,id,th,vi,avg,avg_sea
|
3 |
+
gpt4-1106,chat,N,0,0.877,0.789,0.649,0.69,0.708,0.742,0.682
|
4 |
+
Meta-Llama-3-70B,base,Y,3,0.844,0.756,0.619,0.662,0.683,0.713,0.654
|
5 |
+
Meta-Llama-3-70B-Instruct,chat,Y,3,0.863,0.694,0.63,0.643,0.684,0.703,0.652
|
6 |
+
Qwen1.5-72B,base,Y,3,0.839,0.925,0.587,0.568,0.648,0.713,0.601
|
7 |
+
claude-3-sonnet-20240229,chat,N,0,0.789,0.683,0.585,0.571,0.626,0.651,0.594
|
8 |
+
claude-3-haiku-20240307,chat,N,0,0.79,0.652,0.563,0.573,0.631,0.642,0.589
|
9 |
+
dbrx-base,base,Y,3,0.808,0.689,0.534,0.507,0.605,0.629,0.548
|
10 |
+
Mixtral-8x22B-v0.1,base,Y,3,0.839,0.696,0.57,0.487,0.601,0.639,0.553
|
11 |
+
SeaLLM-7B-v2.5,chat,Y,3,0.759,0.602,0.501,0.507,0.618,0.597,0.542
|
12 |
+
Qwen1.5-14B,base,Y,3,0.797,0.862,0.527,0.478,0.549,0.643,0.518
|
13 |
+
gemini-1.0-pro,chat,N,0,0.569,0.725,0.44,0.492,0.605,0.566,0.513
|
14 |
+
gemma-7b,base,Y,3,0.731,0.528,0.465,0.463,0.597,0.557,0.508
|
15 |
+
gpt-3.5-turbo-0125,chat,N,3,0.751,0.589,0.5,0.389,0.534,0.552,0.474
|
16 |
+
Mixtral-8x7B-v0.1,base,Y,3,0.771,0.606,0.48,0.435,0.522,0.563,0.479
|
17 |
+
Llama-2-70b-hf,base,Y,3,0.749,0.599,0.492,0.345,0.559,0.549,0.465
|
18 |
+
Meta-Llama-3-8B,base,Y,3,0.7,0.54,0.427,0.454,0.509,0.526,0.463
|
19 |
+
Sailor-7B-Chat,chat,Y,3,0.656,0.651,0.474,0.464,0.512,0.551,0.483
|
20 |
+
gpt-3.5-turbo-0125,chat,N,0,0.756,0.606,0.493,0.397,0.529,0.556,0.473
|
21 |
+
Yi-34B,base,Y,3,0.815,0.86,0.541,0.381,0.502,0.62,0.475
|
22 |
+
Meta-Llama-3-8B-Instruct,chat,Y,3,0.725,0.537,0.466,0.371,0.509,0.522,0.449
|
23 |
+
SeaLLM-7B-v2,chat,Y,3,0.702,0.516,0.432,0.406,0.515,0.514,0.451
|
24 |
+
Sailor-7B,base,Y,3,0.611,0.632,0.443,0.41,0.499,0.519,0.451
|
25 |
+
Qwen1.5-7B-Chat,chat,Y,3,0.646,0.627,0.43,0.398,0.492,0.519,0.44
|
26 |
+
Yi-9B,base,Y,3,0.775,0.792,0.492,0.357,0.453,0.574,0.434
|
27 |
+
Qwen1.5-7B,base,Y,3,0.721,0.811,0.441,0.361,0.45,0.557,0.417
|
28 |
+
Mistral-7B-v0.1,base,Y,3,0.677,0.497,0.422,0.346,0.409,0.47,0.392
|
29 |
+
gemma-7b-it,chat,Y,3,0.622,0.427,0.373,0.321,0.467,0.442,0.387
|
30 |
+
Mistral-7B-Instruct-v0.2,chat,Y,3,0.657,0.495,0.404,0.304,0.399,0.452,0.369
|
31 |
+
Qwen1.5-4B,base,Y,3,0.664,0.772,0.351,0.319,0.389,0.499,0.353
|
32 |
+
Yi-6B,base,Y,3,0.704,0.809,0.411,0.298,0.37,0.519,0.36
|
33 |
+
Llama-2-13b-hf,base,Y,3,0.605,0.365,0.384,0.288,0.409,0.41,0.36
|
34 |
+
Llama-2-13b-chat-hf,chat,Y,3,0.589,0.382,0.372,0.288,0.39,0.404,0.35
|
35 |
+
Qwen1.5-MoE-A2.7B,base,Y,3,0.628,0.789,0.366,0.254,0.402,0.488,0.341
|
36 |
+
gemma-2b-it,chat,Y,3,0.439,0.377,0.316,0.284,0.357,0.355,0.319
|
37 |
+
Llama-2-7b-chat-hf,chat,Y,3,0.566,0.326,0.341,0.268,0.34,0.368,0.317
|
38 |
+
bloomz-7b1,chat,Y,3,0.431,0.377,0.361,0.256,0.356,0.356,0.325
|
39 |
+
gemma-2b,base,Y,3,0.417,0.275,0.304,0.286,0.316,0.32,0.302
|
40 |
+
Llama-2-7b-hf,base,Y,3,0.491,0.323,0.308,0.263,0.317,0.341,0.296
|
41 |
+
Qwen1.5-1.8B,base,Y,3,0.546,0.713,0.326,0.244,0.324,0.43,0.298
|
42 |
+
Qwen1.5-0.5B,base,Y,3,0.446,0.61,0.294,0.26,0.297,0.381,0.284
|
43 |
+
sea-lion-7b-instruct,chat,Y,3,0.27,0.273,0.287,0.264,0.269,0.273,0.273
|
44 |
+
sea-lion-7b,base,Y,3,0.245,0.228,0.254,0.264,0.241,0.247,0.253
|
45 |
+
phi-2,base,Y,3,0.582,0.286,0.295,0.21,0.269,0.328,0.258
|
46 |
+
bloom-7b1,base,Y,3,0.227,0.183,0.253,0.24,0.243,0.229,0.246
|