Spaces:
Running
Running
booydar
commited on
Commit
Β·
170a088
1
Parent(s):
b4a4293
cleanup; add average columns
Browse filesThis view is limited to 50 files because it contains too many changes. Β
See raw diff
- app.py +16 -9
- results/ARMT/qa1/1000000.csv +0 -2
- results/ARMT/qa1/10000000.csv +0 -2
- results/ARMT/qa1/128000.csv +0 -2
- results/ARMT/qa1/32000.csv +0 -2
- results/ARMT/qa1/500000.csv +0 -2
- results/ARMT/qa1/64000.csv +0 -2
- results/ARMT/qa1/8000.csv +0 -2
- results/ARMT/qa2/1000000.csv +0 -2
- results/ARMT/qa2/10000000.csv +0 -2
- results/ARMT/qa2/128000.csv +0 -2
- results/ARMT/qa2/16000.csv +0 -2
- results/ARMT/qa2/32000.csv +0 -2
- results/ARMT/qa2/4000.csv +0 -2
- results/ARMT/qa2/500000.csv +0 -2
- results/ARMT/qa2/64000.csv +0 -2
- results/ARMT/qa2/8000.csv +0 -2
- results/ARMT/qa3/1000000.csv +0 -2
- results/ARMT/qa3/10000000.csv +0 -2
- results/ARMT/qa3/128000.csv +0 -2
- results/ARMT/qa3/16000.csv +0 -2
- results/ARMT/qa3/32000.csv +0 -2
- results/ARMT/qa3/4000.csv +0 -2
- results/ARMT/qa3/500000.csv +0 -2
- results/ARMT/qa3/64000.csv +0 -2
- results/ARMT/qa3/8000.csv +0 -2
- results/ARMT/qa4/1000000.csv +0 -2
- results/ARMT/qa4/10000000.csv +0 -2
- results/ARMT/qa4/128000.csv +0 -2
- results/ARMT/qa4/16000.csv +0 -2
- results/ARMT/qa4/32000.csv +0 -2
- results/ARMT/qa4/4000.csv +0 -2
- results/ARMT/qa4/500000.csv +0 -2
- results/ARMT/qa4/64000.csv +0 -2
- results/ARMT/qa4/8000.csv +0 -2
- results/ARMT/qa5/1000000.csv +0 -2
- results/ARMT/qa5/10000000.csv +0 -2
- results/ARMT/qa5/128000.csv +0 -2
- results/ARMT/qa5/16000.csv +0 -2
- results/ARMT/qa5/32000.csv +0 -2
- results/ARMT/qa5/4000.csv +0 -2
- results/ARMT/qa5/500000.csv +0 -2
- results/ARMT/qa5/64000.csv +0 -2
- results/ARMT/qa5/8000.csv +0 -2
- results/{GPT-4 β GPT-4 (gpt-4-0125-preview)}/qa1/0.csv +0 -0
- results/{GPT-4 β GPT-4 (gpt-4-0125-preview)}/qa1/1000.csv +0 -0
- results/{GPT-4 β GPT-4 (gpt-4-0125-preview)}/qa1/128000.csv +0 -0
- results/{GPT-4 β GPT-4 (gpt-4-0125-preview)}/qa1/16000.csv +0 -0
- results/{GPT-4 β GPT-4 (gpt-4-0125-preview)}/qa1/2000.csv +0 -0
- results/{GPT-4 β GPT-4 (gpt-4-0125-preview)}/qa1/32000.csv +0 -0
app.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
"""A gradio app that renders a static leaderboard. This is used for Hugging Face Space."""
|
2 |
-
import ast
|
3 |
-
import argparse
|
4 |
-
import glob
|
5 |
-
import pickle
|
6 |
import gradio as gr
|
7 |
import numpy as np
|
8 |
import pandas as pd
|
@@ -21,7 +21,7 @@ def make_default_md():
|
|
21 |
return leaderboard_md
|
22 |
|
23 |
def make_arena_leaderboard_md(total_models):
|
24 |
-
leaderboard_md = f"""Total #models: **{total_models}**. Last updated:
|
25 |
return leaderboard_md
|
26 |
|
27 |
def make_model_desc_md(f_len):
|
@@ -63,7 +63,11 @@ def load_model(folders, tab_name, msg_lengths):
|
|
63 |
for rank, i in enumerate(np.argsort(mean_score)):
|
64 |
results['Rank'][i] = rank + 1
|
65 |
|
66 |
-
|
|
|
|
|
|
|
|
|
67 |
|
68 |
def build_leaderboard_tab(folders):
|
69 |
default_md = make_default_md()
|
@@ -86,7 +90,8 @@ def build_leaderboard_tab(folders):
|
|
86 |
df = load_model(folders, tab_name, msg_lengths)
|
87 |
cmap = LinearSegmentedColormap.from_list('ryg', ["red", "yellow", "green"], N=256)
|
88 |
|
89 |
-
df = df.style.background_gradient(cmap=cmap, vmin=0, vmax=100, subset=list(msg_lengths.values()))
|
|
|
90 |
# arena table
|
91 |
with gr.Tab(tab_name, id=tab_id):
|
92 |
md = make_arena_leaderboard_md(len(folders))
|
@@ -95,7 +100,7 @@ def build_leaderboard_tab(folders):
|
|
95 |
headers=[
|
96 |
"Rank",
|
97 |
"Model",
|
98 |
-
] + list(msg_lengths.values()),
|
99 |
datatype=[
|
100 |
"str",
|
101 |
"markdown",
|
@@ -106,11 +111,13 @@ def build_leaderboard_tab(folders):
|
|
106 |
"str",
|
107 |
"str",
|
108 |
"str",
|
|
|
|
|
109 |
],
|
110 |
value=df,
|
111 |
elem_id="arena_leaderboard_dataframe",
|
112 |
height=700,
|
113 |
-
column_widths=[20, 150] + [20] * len(msg_lengths),
|
114 |
wrap=True,
|
115 |
)
|
116 |
|
|
|
1 |
"""A gradio app that renders a static leaderboard. This is used for Hugging Face Space."""
|
2 |
+
# import ast
|
3 |
+
# import argparse
|
4 |
+
# import glob
|
5 |
+
# import pickle
|
6 |
import gradio as gr
|
7 |
import numpy as np
|
8 |
import pandas as pd
|
|
|
21 |
return leaderboard_md
|
22 |
|
23 |
def make_arena_leaderboard_md(total_models):
|
24 |
+
leaderboard_md = f"""Total #models: **{total_models}**. Last updated: July 26, 2024."""
|
25 |
return leaderboard_md
|
26 |
|
27 |
def make_model_desc_md(f_len):
|
|
|
63 |
for rank, i in enumerate(np.argsort(mean_score)):
|
64 |
results['Rank'][i] = rank + 1
|
65 |
|
66 |
+
res_df = pd.DataFrame(results).sort_values(['Rank'])
|
67 |
+
# print(res_df.head())
|
68 |
+
res_df['Avg β€32k'] = res_df[res_df.columns[2:7]].astype(float).fillna(0).mean(axis=1).astype(int)
|
69 |
+
res_df['Avg β€128k'] = res_df[res_df.columns[2:9]].astype(float).fillna(0).mean(axis=1).astype(int)
|
70 |
+
return res_df
|
71 |
|
72 |
def build_leaderboard_tab(folders):
|
73 |
default_md = make_default_md()
|
|
|
90 |
df = load_model(folders, tab_name, msg_lengths)
|
91 |
cmap = LinearSegmentedColormap.from_list('ryg', ["red", "yellow", "green"], N=256)
|
92 |
|
93 |
+
# df = df.style.background_gradient(cmap=cmap, vmin=0, vmax=100, subset=list(msg_lengths.values()))
|
94 |
+
df = df.style.background_gradient(cmap=cmap, vmin=0, vmax=100, subset=df.columns[2:])
|
95 |
# arena table
|
96 |
with gr.Tab(tab_name, id=tab_id):
|
97 |
md = make_arena_leaderboard_md(len(folders))
|
|
|
100 |
headers=[
|
101 |
"Rank",
|
102 |
"Model",
|
103 |
+
] + list(msg_lengths.values()) + ['Avg β€32k', 'Avg β€128k'],
|
104 |
datatype=[
|
105 |
"str",
|
106 |
"markdown",
|
|
|
111 |
"str",
|
112 |
"str",
|
113 |
"str",
|
114 |
+
"str",
|
115 |
+
"str",
|
116 |
],
|
117 |
value=df,
|
118 |
elem_id="arena_leaderboard_dataframe",
|
119 |
height=700,
|
120 |
+
column_widths=[20, 150] + [20] * len(msg_lengths) + [20] * 2,
|
121 |
wrap=True,
|
122 |
)
|
123 |
|
results/ARMT/qa1/1000000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
0.9940000000000001
|
|
|
|
|
|
results/ARMT/qa1/10000000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
0.9740000000000001
|
|
|
|
|
|
results/ARMT/qa1/128000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
1.0
|
|
|
|
|
|
results/ARMT/qa1/32000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
1.0
|
|
|
|
|
|
results/ARMT/qa1/500000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
0.9990000000000001
|
|
|
|
|
|
results/ARMT/qa1/64000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
1.0
|
|
|
|
|
|
results/ARMT/qa1/8000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
0.9990000000000001
|
|
|
|
|
|
results/ARMT/qa2/1000000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
0.996
|
|
|
|
|
|
results/ARMT/qa2/10000000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
0.8170000000000001
|
|
|
|
|
|
results/ARMT/qa2/128000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
1.0
|
|
|
|
|
|
results/ARMT/qa2/16000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
1.0
|
|
|
|
|
|
results/ARMT/qa2/32000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
1.0
|
|
|
|
|
|
results/ARMT/qa2/4000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
0.998
|
|
|
|
|
|
results/ARMT/qa2/500000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
0.997
|
|
|
|
|
|
results/ARMT/qa2/64000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
1.0
|
|
|
|
|
|
results/ARMT/qa2/8000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
1.0
|
|
|
|
|
|
results/ARMT/qa3/1000000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
0.564
|
|
|
|
|
|
results/ARMT/qa3/10000000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
0.275
|
|
|
|
|
|
results/ARMT/qa3/128000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
0.804
|
|
|
|
|
|
results/ARMT/qa3/16000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
0.927
|
|
|
|
|
|
results/ARMT/qa3/32000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
0.907
|
|
|
|
|
|
results/ARMT/qa3/4000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
0.909
|
|
|
|
|
|
results/ARMT/qa3/500000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
0.679
|
|
|
|
|
|
results/ARMT/qa3/64000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
0.883
|
|
|
|
|
|
results/ARMT/qa3/8000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
0.92
|
|
|
|
|
|
results/ARMT/qa4/1000000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
0.998
|
|
|
|
|
|
results/ARMT/qa4/10000000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
0.932
|
|
|
|
|
|
results/ARMT/qa4/128000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
1.0
|
|
|
|
|
|
results/ARMT/qa4/16000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
1.0
|
|
|
|
|
|
results/ARMT/qa4/32000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
1.0
|
|
|
|
|
|
results/ARMT/qa4/4000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
1.0
|
|
|
|
|
|
results/ARMT/qa4/500000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
1.0
|
|
|
|
|
|
results/ARMT/qa4/64000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
1.0
|
|
|
|
|
|
results/ARMT/qa4/8000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
1.0
|
|
|
|
|
|
results/ARMT/qa5/1000000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
0.978
|
|
|
|
|
|
results/ARMT/qa5/10000000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
0.87
|
|
|
|
|
|
results/ARMT/qa5/128000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
0.988
|
|
|
|
|
|
results/ARMT/qa5/16000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
0.9940000000000001
|
|
|
|
|
|
results/ARMT/qa5/32000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
0.9890000000000001
|
|
|
|
|
|
results/ARMT/qa5/4000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
0.995
|
|
|
|
|
|
results/ARMT/qa5/500000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
0.982
|
|
|
|
|
|
results/ARMT/qa5/64000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
0.9890000000000001
|
|
|
|
|
|
results/ARMT/qa5/8000.csv
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
result
|
2 |
-
0.993
|
|
|
|
|
|
results/{GPT-4 β GPT-4 (gpt-4-0125-preview)}/qa1/0.csv
RENAMED
File without changes
|
results/{GPT-4 β GPT-4 (gpt-4-0125-preview)}/qa1/1000.csv
RENAMED
File without changes
|
results/{GPT-4 β GPT-4 (gpt-4-0125-preview)}/qa1/128000.csv
RENAMED
File without changes
|
results/{GPT-4 β GPT-4 (gpt-4-0125-preview)}/qa1/16000.csv
RENAMED
File without changes
|
results/{GPT-4 β GPT-4 (gpt-4-0125-preview)}/qa1/2000.csv
RENAMED
File without changes
|
results/{GPT-4 β GPT-4 (gpt-4-0125-preview)}/qa1/32000.csv
RENAMED
File without changes
|