lukecq commited on
Commit
441cdc8
·
1 Parent(s): 65c6479

add results

Browse files
Files changed (2) hide show
  1. app.py +22 -3
  2. results/SeaExam_results_0419.csv +46 -0
app.py CHANGED
@@ -1,7 +1,26 @@
1
  import gradio as gr
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
 
 
5
 
6
- iface = gr.Interface(fn=greet, inputs="text", outputs="text")
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  iface.launch()
 
 
1
  import gradio as gr
2
+ import pandas as pd
3
+ import os
4
 
5
+ # clone / pull the lmeh eval data
6
+ TOKEN = os.environ.get("TOKEN", None)
7
+ RESULTS_REPO = f"lukecq/SeaExam-results"
8
+ CACHE_PATH=os.getenv("HF_HOME", ".")
9
+ EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
10
 
11
+ # Load the CSV file
12
+ def load_csv(file_path):
13
+ data = pd.read_csv(file_path)
14
+ return data
15
+
16
+ # Example path to your CSV file
17
+ csv_path = './results/SeaExam_results_0419.csv'
18
+ data = load_csv(csv_path)
19
+
20
+ def show_data():
21
+ return data
22
+
23
+ iface = gr.Interface(fn=show_data, outputs="dataframe", title="SeaExam Leaderboard",
24
+ description="Leaderboard for the SeaExam competition.")
25
  iface.launch()
26
+
results/SeaExam_results_0419.csv ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,,,,M3Exam,,,,,,
2
+ Model,type,open?,shot,en,zh,id,th,vi,avg,avg_sea
3
+ gpt4-1106,chat,N,0,0.877,0.789,0.649,0.69,0.708,0.742,0.682
4
+ Meta-Llama-3-70B,base,Y,3,0.844,0.756,0.619,0.662,0.683,0.713,0.654
5
+ Meta-Llama-3-70B-Instruct,chat,Y,3,0.863,0.694,0.63,0.643,0.684,0.703,0.652
6
+ Qwen1.5-72B,base,Y,3,0.839,0.925,0.587,0.568,0.648,0.713,0.601
7
+ claude-3-sonnet-20240229,chat,N,0,0.789,0.683,0.585,0.571,0.626,0.651,0.594
8
+ claude-3-haiku-20240307,chat,N,0,0.79,0.652,0.563,0.573,0.631,0.642,0.589
9
+ dbrx-base,base,Y,3,0.808,0.689,0.534,0.507,0.605,0.629,0.548
10
+ Mixtral-8x22B-v0.1,base,Y,3,0.839,0.696,0.57,0.487,0.601,0.639,0.553
11
+ SeaLLM-7B-v2.5,chat,Y,3,0.759,0.602,0.501,0.507,0.618,0.597,0.542
12
+ Qwen1.5-14B,base,Y,3,0.797,0.862,0.527,0.478,0.549,0.643,0.518
13
+ gemini-1.0-pro,chat,N,0,0.569,0.725,0.44,0.492,0.605,0.566,0.513
14
+ gemma-7b,base,Y,3,0.731,0.528,0.465,0.463,0.597,0.557,0.508
15
+ gpt-3.5-turbo-0125,chat,N,3,0.751,0.589,0.5,0.389,0.534,0.552,0.474
16
+ Mixtral-8x7B-v0.1,base,Y,3,0.771,0.606,0.48,0.435,0.522,0.563,0.479
17
+ Llama-2-70b-hf,base,Y,3,0.749,0.599,0.492,0.345,0.559,0.549,0.465
18
+ Meta-Llama-3-8B,base,Y,3,0.7,0.54,0.427,0.454,0.509,0.526,0.463
19
+ Sailor-7B-Chat,chat,Y,3,0.656,0.651,0.474,0.464,0.512,0.551,0.483
20
+ gpt-3.5-turbo-0125,chat,N,0,0.756,0.606,0.493,0.397,0.529,0.556,0.473
21
+ Yi-34B,base,Y,3,0.815,0.86,0.541,0.381,0.502,0.62,0.475
22
+ Meta-Llama-3-8B-Instruct,chat,Y,3,0.725,0.537,0.466,0.371,0.509,0.522,0.449
23
+ SeaLLM-7B-v2,chat,Y,3,0.702,0.516,0.432,0.406,0.515,0.514,0.451
24
+ Sailor-7B,base,Y,3,0.611,0.632,0.443,0.41,0.499,0.519,0.451
25
+ Qwen1.5-7B-Chat,chat,Y,3,0.646,0.627,0.43,0.398,0.492,0.519,0.44
26
+ Yi-9B,base,Y,3,0.775,0.792,0.492,0.357,0.453,0.574,0.434
27
+ Qwen1.5-7B,base,Y,3,0.721,0.811,0.441,0.361,0.45,0.557,0.417
28
+ Mistral-7B-v0.1,base,Y,3,0.677,0.497,0.422,0.346,0.409,0.47,0.392
29
+ gemma-7b-it,chat,Y,3,0.622,0.427,0.373,0.321,0.467,0.442,0.387
30
+ Mistral-7B-Instruct-v0.2,chat,Y,3,0.657,0.495,0.404,0.304,0.399,0.452,0.369
31
+ Qwen1.5-4B,base,Y,3,0.664,0.772,0.351,0.319,0.389,0.499,0.353
32
+ Yi-6B,base,Y,3,0.704,0.809,0.411,0.298,0.37,0.519,0.36
33
+ Llama-2-13b-hf,base,Y,3,0.605,0.365,0.384,0.288,0.409,0.41,0.36
34
+ Llama-2-13b-chat-hf,chat,Y,3,0.589,0.382,0.372,0.288,0.39,0.404,0.35
35
+ Qwen1.5-MoE-A2.7B,base,Y,3,0.628,0.789,0.366,0.254,0.402,0.488,0.341
36
+ gemma-2b-it,chat,Y,3,0.439,0.377,0.316,0.284,0.357,0.355,0.319
37
+ Llama-2-7b-chat-hf,chat,Y,3,0.566,0.326,0.341,0.268,0.34,0.368,0.317
38
+ bloomz-7b1,chat,Y,3,0.431,0.377,0.361,0.256,0.356,0.356,0.325
39
+ gemma-2b,base,Y,3,0.417,0.275,0.304,0.286,0.316,0.32,0.302
40
+ Llama-2-7b-hf,base,Y,3,0.491,0.323,0.308,0.263,0.317,0.341,0.296
41
+ Qwen1.5-1.8B,base,Y,3,0.546,0.713,0.326,0.244,0.324,0.43,0.298
42
+ Qwen1.5-0.5B,base,Y,3,0.446,0.61,0.294,0.26,0.297,0.381,0.284
43
+ sea-lion-7b-instruct,chat,Y,3,0.27,0.273,0.287,0.264,0.269,0.273,0.273
44
+ sea-lion-7b,base,Y,3,0.245,0.228,0.254,0.264,0.241,0.247,0.253
45
+ phi-2,base,Y,3,0.582,0.286,0.295,0.21,0.269,0.328,0.258
46
+ bloom-7b1,base,Y,3,0.227,0.183,0.253,0.24,0.243,0.229,0.246