booydar commited on
Commit
170a088
Β·
1 Parent(s): b4a4293

cleanup; add average columns

Browse files
This view is limited to 50 files because it contains too many changes. Β  See raw diff
Files changed (50) hide show
  1. app.py +16 -9
  2. results/ARMT/qa1/1000000.csv +0 -2
  3. results/ARMT/qa1/10000000.csv +0 -2
  4. results/ARMT/qa1/128000.csv +0 -2
  5. results/ARMT/qa1/32000.csv +0 -2
  6. results/ARMT/qa1/500000.csv +0 -2
  7. results/ARMT/qa1/64000.csv +0 -2
  8. results/ARMT/qa1/8000.csv +0 -2
  9. results/ARMT/qa2/1000000.csv +0 -2
  10. results/ARMT/qa2/10000000.csv +0 -2
  11. results/ARMT/qa2/128000.csv +0 -2
  12. results/ARMT/qa2/16000.csv +0 -2
  13. results/ARMT/qa2/32000.csv +0 -2
  14. results/ARMT/qa2/4000.csv +0 -2
  15. results/ARMT/qa2/500000.csv +0 -2
  16. results/ARMT/qa2/64000.csv +0 -2
  17. results/ARMT/qa2/8000.csv +0 -2
  18. results/ARMT/qa3/1000000.csv +0 -2
  19. results/ARMT/qa3/10000000.csv +0 -2
  20. results/ARMT/qa3/128000.csv +0 -2
  21. results/ARMT/qa3/16000.csv +0 -2
  22. results/ARMT/qa3/32000.csv +0 -2
  23. results/ARMT/qa3/4000.csv +0 -2
  24. results/ARMT/qa3/500000.csv +0 -2
  25. results/ARMT/qa3/64000.csv +0 -2
  26. results/ARMT/qa3/8000.csv +0 -2
  27. results/ARMT/qa4/1000000.csv +0 -2
  28. results/ARMT/qa4/10000000.csv +0 -2
  29. results/ARMT/qa4/128000.csv +0 -2
  30. results/ARMT/qa4/16000.csv +0 -2
  31. results/ARMT/qa4/32000.csv +0 -2
  32. results/ARMT/qa4/4000.csv +0 -2
  33. results/ARMT/qa4/500000.csv +0 -2
  34. results/ARMT/qa4/64000.csv +0 -2
  35. results/ARMT/qa4/8000.csv +0 -2
  36. results/ARMT/qa5/1000000.csv +0 -2
  37. results/ARMT/qa5/10000000.csv +0 -2
  38. results/ARMT/qa5/128000.csv +0 -2
  39. results/ARMT/qa5/16000.csv +0 -2
  40. results/ARMT/qa5/32000.csv +0 -2
  41. results/ARMT/qa5/4000.csv +0 -2
  42. results/ARMT/qa5/500000.csv +0 -2
  43. results/ARMT/qa5/64000.csv +0 -2
  44. results/ARMT/qa5/8000.csv +0 -2
  45. results/{GPT-4 β†’ GPT-4 (gpt-4-0125-preview)}/qa1/0.csv +0 -0
  46. results/{GPT-4 β†’ GPT-4 (gpt-4-0125-preview)}/qa1/1000.csv +0 -0
  47. results/{GPT-4 β†’ GPT-4 (gpt-4-0125-preview)}/qa1/128000.csv +0 -0
  48. results/{GPT-4 β†’ GPT-4 (gpt-4-0125-preview)}/qa1/16000.csv +0 -0
  49. results/{GPT-4 β†’ GPT-4 (gpt-4-0125-preview)}/qa1/2000.csv +0 -0
  50. results/{GPT-4 β†’ GPT-4 (gpt-4-0125-preview)}/qa1/32000.csv +0 -0
app.py CHANGED
@@ -1,8 +1,8 @@
1
  """A gradio app that renders a static leaderboard. This is used for Hugging Face Space."""
2
- import ast
3
- import argparse
4
- import glob
5
- import pickle
6
  import gradio as gr
7
  import numpy as np
8
  import pandas as pd
@@ -21,7 +21,7 @@ def make_default_md():
21
  return leaderboard_md
22
 
23
  def make_arena_leaderboard_md(total_models):
24
- leaderboard_md = f"""Total #models: **{total_models}**. Last updated: May 09, 2024."""
25
  return leaderboard_md
26
 
27
  def make_model_desc_md(f_len):
@@ -63,7 +63,11 @@ def load_model(folders, tab_name, msg_lengths):
63
  for rank, i in enumerate(np.argsort(mean_score)):
64
  results['Rank'][i] = rank + 1
65
 
66
- return pd.DataFrame(results).sort_values(['Rank'])
 
 
 
 
67
 
68
  def build_leaderboard_tab(folders):
69
  default_md = make_default_md()
@@ -86,7 +90,8 @@ def build_leaderboard_tab(folders):
86
  df = load_model(folders, tab_name, msg_lengths)
87
  cmap = LinearSegmentedColormap.from_list('ryg', ["red", "yellow", "green"], N=256)
88
 
89
- df = df.style.background_gradient(cmap=cmap, vmin=0, vmax=100, subset=list(msg_lengths.values()))
 
90
  # arena table
91
  with gr.Tab(tab_name, id=tab_id):
92
  md = make_arena_leaderboard_md(len(folders))
@@ -95,7 +100,7 @@ def build_leaderboard_tab(folders):
95
  headers=[
96
  "Rank",
97
  "Model",
98
- ] + list(msg_lengths.values()),
99
  datatype=[
100
  "str",
101
  "markdown",
@@ -106,11 +111,13 @@ def build_leaderboard_tab(folders):
106
  "str",
107
  "str",
108
  "str",
 
 
109
  ],
110
  value=df,
111
  elem_id="arena_leaderboard_dataframe",
112
  height=700,
113
- column_widths=[20, 150] + [20] * len(msg_lengths),
114
  wrap=True,
115
  )
116
 
 
1
  """A gradio app that renders a static leaderboard. This is used for Hugging Face Space."""
2
+ # import ast
3
+ # import argparse
4
+ # import glob
5
+ # import pickle
6
  import gradio as gr
7
  import numpy as np
8
  import pandas as pd
 
21
  return leaderboard_md
22
 
23
  def make_arena_leaderboard_md(total_models):
24
+ leaderboard_md = f"""Total #models: **{total_models}**. Last updated: July 26, 2024."""
25
  return leaderboard_md
26
 
27
  def make_model_desc_md(f_len):
 
63
  for rank, i in enumerate(np.argsort(mean_score)):
64
  results['Rank'][i] = rank + 1
65
 
66
+ res_df = pd.DataFrame(results).sort_values(['Rank'])
67
+ # print(res_df.head())
68
+ res_df['Avg ≀32k'] = res_df[res_df.columns[2:7]].astype(float).fillna(0).mean(axis=1).astype(int)
69
+ res_df['Avg ≀128k'] = res_df[res_df.columns[2:9]].astype(float).fillna(0).mean(axis=1).astype(int)
70
+ return res_df
71
 
72
  def build_leaderboard_tab(folders):
73
  default_md = make_default_md()
 
90
  df = load_model(folders, tab_name, msg_lengths)
91
  cmap = LinearSegmentedColormap.from_list('ryg', ["red", "yellow", "green"], N=256)
92
 
93
+ # df = df.style.background_gradient(cmap=cmap, vmin=0, vmax=100, subset=list(msg_lengths.values()))
94
+ df = df.style.background_gradient(cmap=cmap, vmin=0, vmax=100, subset=df.columns[2:])
95
  # arena table
96
  with gr.Tab(tab_name, id=tab_id):
97
  md = make_arena_leaderboard_md(len(folders))
 
100
  headers=[
101
  "Rank",
102
  "Model",
103
+ ] + list(msg_lengths.values()) + ['Avg ≀32k', 'Avg ≀128k'],
104
  datatype=[
105
  "str",
106
  "markdown",
 
111
  "str",
112
  "str",
113
  "str",
114
+ "str",
115
+ "str",
116
  ],
117
  value=df,
118
  elem_id="arena_leaderboard_dataframe",
119
  height=700,
120
+ column_widths=[20, 150] + [20] * len(msg_lengths) + [20] * 2,
121
  wrap=True,
122
  )
123
 
results/ARMT/qa1/1000000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 0.9940000000000001
 
 
 
results/ARMT/qa1/10000000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 0.9740000000000001
 
 
 
results/ARMT/qa1/128000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 1.0
 
 
 
results/ARMT/qa1/32000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 1.0
 
 
 
results/ARMT/qa1/500000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 0.9990000000000001
 
 
 
results/ARMT/qa1/64000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 1.0
 
 
 
results/ARMT/qa1/8000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 0.9990000000000001
 
 
 
results/ARMT/qa2/1000000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 0.996
 
 
 
results/ARMT/qa2/10000000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 0.8170000000000001
 
 
 
results/ARMT/qa2/128000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 1.0
 
 
 
results/ARMT/qa2/16000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 1.0
 
 
 
results/ARMT/qa2/32000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 1.0
 
 
 
results/ARMT/qa2/4000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 0.998
 
 
 
results/ARMT/qa2/500000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 0.997
 
 
 
results/ARMT/qa2/64000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 1.0
 
 
 
results/ARMT/qa2/8000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 1.0
 
 
 
results/ARMT/qa3/1000000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 0.564
 
 
 
results/ARMT/qa3/10000000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 0.275
 
 
 
results/ARMT/qa3/128000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 0.804
 
 
 
results/ARMT/qa3/16000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 0.927
 
 
 
results/ARMT/qa3/32000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 0.907
 
 
 
results/ARMT/qa3/4000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 0.909
 
 
 
results/ARMT/qa3/500000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 0.679
 
 
 
results/ARMT/qa3/64000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 0.883
 
 
 
results/ARMT/qa3/8000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 0.92
 
 
 
results/ARMT/qa4/1000000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 0.998
 
 
 
results/ARMT/qa4/10000000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 0.932
 
 
 
results/ARMT/qa4/128000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 1.0
 
 
 
results/ARMT/qa4/16000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 1.0
 
 
 
results/ARMT/qa4/32000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 1.0
 
 
 
results/ARMT/qa4/4000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 1.0
 
 
 
results/ARMT/qa4/500000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 1.0
 
 
 
results/ARMT/qa4/64000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 1.0
 
 
 
results/ARMT/qa4/8000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 1.0
 
 
 
results/ARMT/qa5/1000000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 0.978
 
 
 
results/ARMT/qa5/10000000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 0.87
 
 
 
results/ARMT/qa5/128000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 0.988
 
 
 
results/ARMT/qa5/16000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 0.9940000000000001
 
 
 
results/ARMT/qa5/32000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 0.9890000000000001
 
 
 
results/ARMT/qa5/4000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 0.995
 
 
 
results/ARMT/qa5/500000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 0.982
 
 
 
results/ARMT/qa5/64000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 0.9890000000000001
 
 
 
results/ARMT/qa5/8000.csv DELETED
@@ -1,2 +0,0 @@
1
- result
2
- 0.993
 
 
 
results/{GPT-4 β†’ GPT-4 (gpt-4-0125-preview)}/qa1/0.csv RENAMED
File without changes
results/{GPT-4 β†’ GPT-4 (gpt-4-0125-preview)}/qa1/1000.csv RENAMED
File without changes
results/{GPT-4 β†’ GPT-4 (gpt-4-0125-preview)}/qa1/128000.csv RENAMED
File without changes
results/{GPT-4 β†’ GPT-4 (gpt-4-0125-preview)}/qa1/16000.csv RENAMED
File without changes
results/{GPT-4 β†’ GPT-4 (gpt-4-0125-preview)}/qa1/2000.csv RENAMED
File without changes
results/{GPT-4 β†’ GPT-4 (gpt-4-0125-preview)}/qa1/32000.csv RENAMED
File without changes