diff --git a/app.py b/app.py index 27f2e293840b6b558f72b1188c589495fdfec03d..962b0f6658735b547c58a8c8a10012d08d141753 100644 --- a/app.py +++ b/app.py @@ -1,8 +1,8 @@ """A gradio app that renders a static leaderboard. This is used for Hugging Face Space.""" -import ast -import argparse -import glob -import pickle +# import ast +# import argparse +# import glob +# import pickle import gradio as gr import numpy as np import pandas as pd @@ -21,7 +21,7 @@ def make_default_md(): return leaderboard_md def make_arena_leaderboard_md(total_models): - leaderboard_md = f"""Total #models: **{total_models}**. Last updated: May 09, 2024.""" + leaderboard_md = f"""Total #models: **{total_models}**. Last updated: July 26, 2024.""" return leaderboard_md def make_model_desc_md(f_len): @@ -63,7 +63,11 @@ def load_model(folders, tab_name, msg_lengths): for rank, i in enumerate(np.argsort(mean_score)): results['Rank'][i] = rank + 1 - return pd.DataFrame(results).sort_values(['Rank']) + res_df = pd.DataFrame(results).sort_values(['Rank']) + # print(res_df.head()) + res_df['Avg ≤32k'] = res_df[res_df.columns[2:7]].astype(float).fillna(0).mean(axis=1).astype(int) + res_df['Avg ≤128k'] = res_df[res_df.columns[2:9]].astype(float).fillna(0).mean(axis=1).astype(int) + return res_df def build_leaderboard_tab(folders): default_md = make_default_md() @@ -86,7 +90,8 @@ def build_leaderboard_tab(folders): df = load_model(folders, tab_name, msg_lengths) cmap = LinearSegmentedColormap.from_list('ryg', ["red", "yellow", "green"], N=256) - df = df.style.background_gradient(cmap=cmap, vmin=0, vmax=100, subset=list(msg_lengths.values())) + # df = df.style.background_gradient(cmap=cmap, vmin=0, vmax=100, subset=list(msg_lengths.values())) + df = df.style.background_gradient(cmap=cmap, vmin=0, vmax=100, subset=df.columns[2:]) # arena table with gr.Tab(tab_name, id=tab_id): md = make_arena_leaderboard_md(len(folders)) @@ -95,7 +100,7 @@ def build_leaderboard_tab(folders): headers=[ "Rank", "Model", - ] + list(msg_lengths.values()), + ] + list(msg_lengths.values()) + ['Avg ≤32k', 'Avg ≤128k'], datatype=[ "str", "markdown", @@ -106,11 +111,13 @@ def build_leaderboard_tab(folders): "str", "str", "str", + "str", + "str", ], value=df, elem_id="arena_leaderboard_dataframe", height=700, - column_widths=[20, 150] + [20] * len(msg_lengths), + column_widths=[20, 150] + [20] * len(msg_lengths) + [20] * 2, wrap=True, ) diff --git a/results/ARMT/qa1/1000000.csv b/results/ARMT/qa1/1000000.csv deleted file mode 100644 index e37ae6562a17ef3d28246fdfb97356fc9c6cf081..0000000000000000000000000000000000000000 --- a/results/ARMT/qa1/1000000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -0.9940000000000001 diff --git a/results/ARMT/qa1/10000000.csv b/results/ARMT/qa1/10000000.csv deleted file mode 100644 index c21f27b70cd7f93717d8b59c2455fbf253fac75f..0000000000000000000000000000000000000000 --- a/results/ARMT/qa1/10000000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -0.9740000000000001 diff --git a/results/ARMT/qa1/128000.csv b/results/ARMT/qa1/128000.csv deleted file mode 100644 index 5e7680dbef8e417669b25ed944afbd0c47ee5728..0000000000000000000000000000000000000000 --- a/results/ARMT/qa1/128000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -1.0 diff --git a/results/ARMT/qa1/32000.csv b/results/ARMT/qa1/32000.csv deleted file mode 100644 index 5e7680dbef8e417669b25ed944afbd0c47ee5728..0000000000000000000000000000000000000000 --- a/results/ARMT/qa1/32000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -1.0 diff --git a/results/ARMT/qa1/500000.csv b/results/ARMT/qa1/500000.csv deleted file mode 100644 index f072598258cbc7e7200addd66867afc1410e938c..0000000000000000000000000000000000000000 --- a/results/ARMT/qa1/500000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -0.9990000000000001 diff --git a/results/ARMT/qa1/64000.csv b/results/ARMT/qa1/64000.csv deleted file mode 100644 index 5e7680dbef8e417669b25ed944afbd0c47ee5728..0000000000000000000000000000000000000000 --- a/results/ARMT/qa1/64000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -1.0 diff --git a/results/ARMT/qa1/8000.csv b/results/ARMT/qa1/8000.csv deleted file mode 100644 index f072598258cbc7e7200addd66867afc1410e938c..0000000000000000000000000000000000000000 --- a/results/ARMT/qa1/8000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -0.9990000000000001 diff --git a/results/ARMT/qa2/1000000.csv b/results/ARMT/qa2/1000000.csv deleted file mode 100644 index d76580181072b6f17ef112a0df77ae7d75df91bc..0000000000000000000000000000000000000000 --- a/results/ARMT/qa2/1000000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -0.996 diff --git a/results/ARMT/qa2/10000000.csv b/results/ARMT/qa2/10000000.csv deleted file mode 100644 index e01722b44677b00482c03d7fa3f1138e1db39b40..0000000000000000000000000000000000000000 --- a/results/ARMT/qa2/10000000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -0.8170000000000001 diff --git a/results/ARMT/qa2/128000.csv b/results/ARMT/qa2/128000.csv deleted file mode 100644 index 5e7680dbef8e417669b25ed944afbd0c47ee5728..0000000000000000000000000000000000000000 --- a/results/ARMT/qa2/128000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -1.0 diff --git a/results/ARMT/qa2/16000.csv b/results/ARMT/qa2/16000.csv deleted file mode 100644 index 5e7680dbef8e417669b25ed944afbd0c47ee5728..0000000000000000000000000000000000000000 --- a/results/ARMT/qa2/16000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -1.0 diff --git a/results/ARMT/qa2/32000.csv b/results/ARMT/qa2/32000.csv deleted file mode 100644 index 5e7680dbef8e417669b25ed944afbd0c47ee5728..0000000000000000000000000000000000000000 --- a/results/ARMT/qa2/32000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -1.0 diff --git a/results/ARMT/qa2/4000.csv b/results/ARMT/qa2/4000.csv deleted file mode 100644 index 14cf5802a7905821f08297f495d9d4bdde6bbe1a..0000000000000000000000000000000000000000 --- a/results/ARMT/qa2/4000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -0.998 diff --git a/results/ARMT/qa2/500000.csv b/results/ARMT/qa2/500000.csv deleted file mode 100644 index a5490c038cca1199fc02be4b11087065a95c4349..0000000000000000000000000000000000000000 --- a/results/ARMT/qa2/500000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -0.997 diff --git a/results/ARMT/qa2/64000.csv b/results/ARMT/qa2/64000.csv deleted file mode 100644 index 5e7680dbef8e417669b25ed944afbd0c47ee5728..0000000000000000000000000000000000000000 --- a/results/ARMT/qa2/64000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -1.0 diff --git a/results/ARMT/qa2/8000.csv b/results/ARMT/qa2/8000.csv deleted file mode 100644 index 5e7680dbef8e417669b25ed944afbd0c47ee5728..0000000000000000000000000000000000000000 --- a/results/ARMT/qa2/8000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -1.0 diff --git a/results/ARMT/qa3/1000000.csv b/results/ARMT/qa3/1000000.csv deleted file mode 100644 index b39d4d721edf97a3238b8a06267ec2bf97e749f0..0000000000000000000000000000000000000000 --- a/results/ARMT/qa3/1000000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -0.564 diff --git a/results/ARMT/qa3/10000000.csv b/results/ARMT/qa3/10000000.csv deleted file mode 100644 index c840a16bde27a03f6af98b54b366e351c2150113..0000000000000000000000000000000000000000 --- a/results/ARMT/qa3/10000000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -0.275 diff --git a/results/ARMT/qa3/128000.csv b/results/ARMT/qa3/128000.csv deleted file mode 100644 index 2831b9f80e4bbc6b89e9d3548a8a64eeec278f2f..0000000000000000000000000000000000000000 --- a/results/ARMT/qa3/128000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -0.804 diff --git a/results/ARMT/qa3/16000.csv b/results/ARMT/qa3/16000.csv deleted file mode 100644 index 5b4939ca6205c9812db39b1b56342c7c2016e4e7..0000000000000000000000000000000000000000 --- a/results/ARMT/qa3/16000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -0.927 diff --git a/results/ARMT/qa3/32000.csv b/results/ARMT/qa3/32000.csv deleted file mode 100644 index 44d6cdbe9b4187a4dca80bbca75c2f6a33b1e5a8..0000000000000000000000000000000000000000 --- a/results/ARMT/qa3/32000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -0.907 diff --git a/results/ARMT/qa3/4000.csv b/results/ARMT/qa3/4000.csv deleted file mode 100644 index 338e672b25357639d328698eac3185ae9c3b518f..0000000000000000000000000000000000000000 --- a/results/ARMT/qa3/4000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -0.909 diff --git a/results/ARMT/qa3/500000.csv b/results/ARMT/qa3/500000.csv deleted file mode 100644 index 7cc5d6db040f4ee36f2432607513a71b45958d74..0000000000000000000000000000000000000000 --- a/results/ARMT/qa3/500000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -0.679 diff --git a/results/ARMT/qa3/64000.csv b/results/ARMT/qa3/64000.csv deleted file mode 100644 index 20500d54ec1c8319aedf197aa0867e7bbce44de5..0000000000000000000000000000000000000000 --- a/results/ARMT/qa3/64000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -0.883 diff --git a/results/ARMT/qa3/8000.csv b/results/ARMT/qa3/8000.csv deleted file mode 100644 index 3481f7eb969c00e7bfa5471bc1d59aff167571b9..0000000000000000000000000000000000000000 --- a/results/ARMT/qa3/8000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -0.92 diff --git a/results/ARMT/qa4/1000000.csv b/results/ARMT/qa4/1000000.csv deleted file mode 100644 index 14cf5802a7905821f08297f495d9d4bdde6bbe1a..0000000000000000000000000000000000000000 --- a/results/ARMT/qa4/1000000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -0.998 diff --git a/results/ARMT/qa4/10000000.csv b/results/ARMT/qa4/10000000.csv deleted file mode 100644 index 211498b85e7f8b5a213826510ce1b98044d38bba..0000000000000000000000000000000000000000 --- a/results/ARMT/qa4/10000000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -0.932 diff --git a/results/ARMT/qa4/128000.csv b/results/ARMT/qa4/128000.csv deleted file mode 100644 index 5e7680dbef8e417669b25ed944afbd0c47ee5728..0000000000000000000000000000000000000000 --- a/results/ARMT/qa4/128000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -1.0 diff --git a/results/ARMT/qa4/16000.csv b/results/ARMT/qa4/16000.csv deleted file mode 100644 index 5e7680dbef8e417669b25ed944afbd0c47ee5728..0000000000000000000000000000000000000000 --- a/results/ARMT/qa4/16000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -1.0 diff --git a/results/ARMT/qa4/32000.csv b/results/ARMT/qa4/32000.csv deleted file mode 100644 index 5e7680dbef8e417669b25ed944afbd0c47ee5728..0000000000000000000000000000000000000000 --- a/results/ARMT/qa4/32000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -1.0 diff --git a/results/ARMT/qa4/4000.csv b/results/ARMT/qa4/4000.csv deleted file mode 100644 index 5e7680dbef8e417669b25ed944afbd0c47ee5728..0000000000000000000000000000000000000000 --- a/results/ARMT/qa4/4000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -1.0 diff --git a/results/ARMT/qa4/500000.csv b/results/ARMT/qa4/500000.csv deleted file mode 100644 index 5e7680dbef8e417669b25ed944afbd0c47ee5728..0000000000000000000000000000000000000000 --- a/results/ARMT/qa4/500000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -1.0 diff --git a/results/ARMT/qa4/64000.csv b/results/ARMT/qa4/64000.csv deleted file mode 100644 index 5e7680dbef8e417669b25ed944afbd0c47ee5728..0000000000000000000000000000000000000000 --- a/results/ARMT/qa4/64000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -1.0 diff --git a/results/ARMT/qa4/8000.csv b/results/ARMT/qa4/8000.csv deleted file mode 100644 index 5e7680dbef8e417669b25ed944afbd0c47ee5728..0000000000000000000000000000000000000000 --- a/results/ARMT/qa4/8000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -1.0 diff --git a/results/ARMT/qa5/1000000.csv b/results/ARMT/qa5/1000000.csv deleted file mode 100644 index bb0d43cbcabfb645e504abc4098b6bd99eb84e6f..0000000000000000000000000000000000000000 --- a/results/ARMT/qa5/1000000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -0.978 diff --git a/results/ARMT/qa5/10000000.csv b/results/ARMT/qa5/10000000.csv deleted file mode 100644 index c6433e48ff353e15ab5cceeb6a44202ebdf5b739..0000000000000000000000000000000000000000 --- a/results/ARMT/qa5/10000000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -0.87 diff --git a/results/ARMT/qa5/128000.csv b/results/ARMT/qa5/128000.csv deleted file mode 100644 index 34b97f0f336efb36d63ca188097303a2d0ee3874..0000000000000000000000000000000000000000 --- a/results/ARMT/qa5/128000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -0.988 diff --git a/results/ARMT/qa5/16000.csv b/results/ARMT/qa5/16000.csv deleted file mode 100644 index e37ae6562a17ef3d28246fdfb97356fc9c6cf081..0000000000000000000000000000000000000000 --- a/results/ARMT/qa5/16000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -0.9940000000000001 diff --git a/results/ARMT/qa5/32000.csv b/results/ARMT/qa5/32000.csv deleted file mode 100644 index 7204542643cbce2e06f4fb2edfb1b1419e87a296..0000000000000000000000000000000000000000 --- a/results/ARMT/qa5/32000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -0.9890000000000001 diff --git a/results/ARMT/qa5/4000.csv b/results/ARMT/qa5/4000.csv deleted file mode 100644 index 51c91a3c4f5070ca370272beaac9c7674545da96..0000000000000000000000000000000000000000 --- a/results/ARMT/qa5/4000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -0.995 diff --git a/results/ARMT/qa5/500000.csv b/results/ARMT/qa5/500000.csv deleted file mode 100644 index a3476c7e769bcb5d3d2f0b6a740cd784f154f903..0000000000000000000000000000000000000000 --- a/results/ARMT/qa5/500000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -0.982 diff --git a/results/ARMT/qa5/64000.csv b/results/ARMT/qa5/64000.csv deleted file mode 100644 index 7204542643cbce2e06f4fb2edfb1b1419e87a296..0000000000000000000000000000000000000000 --- a/results/ARMT/qa5/64000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -0.9890000000000001 diff --git a/results/ARMT/qa5/8000.csv b/results/ARMT/qa5/8000.csv deleted file mode 100644 index b9de09c3d5c6237e907048dcf100593225554298..0000000000000000000000000000000000000000 --- a/results/ARMT/qa5/8000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -0.993 diff --git a/results/GPT-4/qa1/0.csv b/results/GPT-4 (gpt-4-0125-preview)/qa1/0.csv similarity index 100% rename from results/GPT-4/qa1/0.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa1/0.csv diff --git a/results/GPT-4/qa1/1000.csv b/results/GPT-4 (gpt-4-0125-preview)/qa1/1000.csv similarity index 100% rename from results/GPT-4/qa1/1000.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa1/1000.csv diff --git a/results/GPT-4/qa1/128000.csv b/results/GPT-4 (gpt-4-0125-preview)/qa1/128000.csv similarity index 100% rename from results/GPT-4/qa1/128000.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa1/128000.csv diff --git a/results/GPT-4/qa1/16000.csv b/results/GPT-4 (gpt-4-0125-preview)/qa1/16000.csv similarity index 100% rename from results/GPT-4/qa1/16000.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa1/16000.csv diff --git a/results/GPT-4/qa1/2000.csv b/results/GPT-4 (gpt-4-0125-preview)/qa1/2000.csv similarity index 100% rename from results/GPT-4/qa1/2000.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa1/2000.csv diff --git a/results/GPT-4/qa1/32000.csv b/results/GPT-4 (gpt-4-0125-preview)/qa1/32000.csv similarity index 100% rename from results/GPT-4/qa1/32000.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa1/32000.csv diff --git a/results/GPT-4/qa1/4000.csv b/results/GPT-4 (gpt-4-0125-preview)/qa1/4000.csv similarity index 100% rename from results/GPT-4/qa1/4000.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa1/4000.csv diff --git a/results/GPT-4/qa1/64000.csv b/results/GPT-4 (gpt-4-0125-preview)/qa1/64000.csv similarity index 100% rename from results/GPT-4/qa1/64000.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa1/64000.csv diff --git a/results/GPT-4/qa1/8000.csv b/results/GPT-4 (gpt-4-0125-preview)/qa1/8000.csv similarity index 100% rename from results/GPT-4/qa1/8000.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa1/8000.csv diff --git a/results/GPT-4/qa2/0.csv b/results/GPT-4 (gpt-4-0125-preview)/qa2/0.csv similarity index 100% rename from results/GPT-4/qa2/0.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa2/0.csv diff --git a/results/GPT-4/qa2/1000.csv b/results/GPT-4 (gpt-4-0125-preview)/qa2/1000.csv similarity index 100% rename from results/GPT-4/qa2/1000.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa2/1000.csv diff --git a/results/GPT-4/qa2/128000.csv b/results/GPT-4 (gpt-4-0125-preview)/qa2/128000.csv similarity index 100% rename from results/GPT-4/qa2/128000.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa2/128000.csv diff --git a/results/GPT-4/qa2/16000.csv b/results/GPT-4 (gpt-4-0125-preview)/qa2/16000.csv similarity index 100% rename from results/GPT-4/qa2/16000.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa2/16000.csv diff --git a/results/GPT-4/qa2/2000.csv b/results/GPT-4 (gpt-4-0125-preview)/qa2/2000.csv similarity index 100% rename from results/GPT-4/qa2/2000.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa2/2000.csv diff --git a/results/GPT-4/qa2/32000.csv b/results/GPT-4 (gpt-4-0125-preview)/qa2/32000.csv similarity index 100% rename from results/GPT-4/qa2/32000.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa2/32000.csv diff --git a/results/GPT-4/qa2/4000.csv b/results/GPT-4 (gpt-4-0125-preview)/qa2/4000.csv similarity index 100% rename from results/GPT-4/qa2/4000.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa2/4000.csv diff --git a/results/GPT-4/qa2/64000.csv b/results/GPT-4 (gpt-4-0125-preview)/qa2/64000.csv similarity index 100% rename from results/GPT-4/qa2/64000.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa2/64000.csv diff --git a/results/GPT-4/qa2/8000.csv b/results/GPT-4 (gpt-4-0125-preview)/qa2/8000.csv similarity index 100% rename from results/GPT-4/qa2/8000.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa2/8000.csv diff --git a/results/GPT-4/qa3/0.csv b/results/GPT-4 (gpt-4-0125-preview)/qa3/0.csv similarity index 100% rename from results/GPT-4/qa3/0.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa3/0.csv diff --git a/results/GPT-4/qa3/1000.csv b/results/GPT-4 (gpt-4-0125-preview)/qa3/1000.csv similarity index 100% rename from results/GPT-4/qa3/1000.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa3/1000.csv diff --git a/results/GPT-4/qa3/128000.csv b/results/GPT-4 (gpt-4-0125-preview)/qa3/128000.csv similarity index 100% rename from results/GPT-4/qa3/128000.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa3/128000.csv diff --git a/results/GPT-4/qa3/16000.csv b/results/GPT-4 (gpt-4-0125-preview)/qa3/16000.csv similarity index 100% rename from results/GPT-4/qa3/16000.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa3/16000.csv diff --git a/results/GPT-4/qa3/2000.csv b/results/GPT-4 (gpt-4-0125-preview)/qa3/2000.csv similarity index 100% rename from results/GPT-4/qa3/2000.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa3/2000.csv diff --git a/results/GPT-4/qa3/32000.csv b/results/GPT-4 (gpt-4-0125-preview)/qa3/32000.csv similarity index 100% rename from results/GPT-4/qa3/32000.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa3/32000.csv diff --git a/results/GPT-4/qa3/4000.csv b/results/GPT-4 (gpt-4-0125-preview)/qa3/4000.csv similarity index 100% rename from results/GPT-4/qa3/4000.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa3/4000.csv diff --git a/results/GPT-4/qa3/64000.csv b/results/GPT-4 (gpt-4-0125-preview)/qa3/64000.csv similarity index 100% rename from results/GPT-4/qa3/64000.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa3/64000.csv diff --git a/results/GPT-4/qa3/8000.csv b/results/GPT-4 (gpt-4-0125-preview)/qa3/8000.csv similarity index 100% rename from results/GPT-4/qa3/8000.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa3/8000.csv diff --git a/results/GPT-4/qa4/0.csv b/results/GPT-4 (gpt-4-0125-preview)/qa4/0.csv similarity index 100% rename from results/GPT-4/qa4/0.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa4/0.csv diff --git a/results/GPT-4/qa4/1000.csv b/results/GPT-4 (gpt-4-0125-preview)/qa4/1000.csv similarity index 100% rename from results/GPT-4/qa4/1000.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa4/1000.csv diff --git a/results/GPT-4/qa4/128000.csv b/results/GPT-4 (gpt-4-0125-preview)/qa4/128000.csv similarity index 100% rename from results/GPT-4/qa4/128000.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa4/128000.csv diff --git a/results/GPT-4/qa4/16000.csv b/results/GPT-4 (gpt-4-0125-preview)/qa4/16000.csv similarity index 100% rename from results/GPT-4/qa4/16000.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa4/16000.csv diff --git a/results/GPT-4/qa4/2000.csv b/results/GPT-4 (gpt-4-0125-preview)/qa4/2000.csv similarity index 100% rename from results/GPT-4/qa4/2000.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa4/2000.csv diff --git a/results/GPT-4/qa4/32000.csv b/results/GPT-4 (gpt-4-0125-preview)/qa4/32000.csv similarity index 100% rename from results/GPT-4/qa4/32000.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa4/32000.csv diff --git a/results/GPT-4/qa4/4000.csv b/results/GPT-4 (gpt-4-0125-preview)/qa4/4000.csv similarity index 100% rename from results/GPT-4/qa4/4000.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa4/4000.csv diff --git a/results/GPT-4/qa4/64000.csv b/results/GPT-4 (gpt-4-0125-preview)/qa4/64000.csv similarity index 100% rename from results/GPT-4/qa4/64000.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa4/64000.csv diff --git a/results/GPT-4/qa4/8000.csv b/results/GPT-4 (gpt-4-0125-preview)/qa4/8000.csv similarity index 100% rename from results/GPT-4/qa4/8000.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa4/8000.csv diff --git a/results/GPT-4/qa5/0.csv b/results/GPT-4 (gpt-4-0125-preview)/qa5/0.csv similarity index 100% rename from results/GPT-4/qa5/0.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa5/0.csv diff --git a/results/GPT-4/qa5/1000.csv b/results/GPT-4 (gpt-4-0125-preview)/qa5/1000.csv similarity index 100% rename from results/GPT-4/qa5/1000.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa5/1000.csv diff --git a/results/GPT-4/qa5/128000.csv b/results/GPT-4 (gpt-4-0125-preview)/qa5/128000.csv similarity index 100% rename from results/GPT-4/qa5/128000.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa5/128000.csv diff --git a/results/GPT-4/qa5/16000.csv b/results/GPT-4 (gpt-4-0125-preview)/qa5/16000.csv similarity index 100% rename from results/GPT-4/qa5/16000.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa5/16000.csv diff --git a/results/GPT-4/qa5/2000.csv b/results/GPT-4 (gpt-4-0125-preview)/qa5/2000.csv similarity index 100% rename from results/GPT-4/qa5/2000.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa5/2000.csv diff --git a/results/GPT-4/qa5/32000.csv b/results/GPT-4 (gpt-4-0125-preview)/qa5/32000.csv similarity index 100% rename from results/GPT-4/qa5/32000.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa5/32000.csv diff --git a/results/GPT-4/qa5/4000.csv b/results/GPT-4 (gpt-4-0125-preview)/qa5/4000.csv similarity index 100% rename from results/GPT-4/qa5/4000.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa5/4000.csv diff --git a/results/GPT-4/qa5/64000.csv b/results/GPT-4 (gpt-4-0125-preview)/qa5/64000.csv similarity index 100% rename from results/GPT-4/qa5/64000.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa5/64000.csv diff --git a/results/GPT-4/qa5/8000.csv b/results/GPT-4 (gpt-4-0125-preview)/qa5/8000.csv similarity index 100% rename from results/GPT-4/qa5/8000.csv rename to results/GPT-4 (gpt-4-0125-preview)/qa5/8000.csv diff --git a/results/RMT-Retrieval/qa1/16000.csv b/results/RMT-Retrieval/qa1/16000.csv deleted file mode 100644 index f072598258cbc7e7200addd66867afc1410e938c..0000000000000000000000000000000000000000 --- a/results/RMT-Retrieval/qa1/16000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -0.9990000000000001 diff --git a/results/RMT-Retrieval/qa1/4000.csv b/results/RMT-Retrieval/qa1/4000.csv deleted file mode 100644 index f072598258cbc7e7200addd66867afc1410e938c..0000000000000000000000000000000000000000 --- a/results/RMT-Retrieval/qa1/4000.csv +++ /dev/null @@ -1,2 +0,0 @@ -result -0.9990000000000001 diff --git a/results/RMT-Retrieval/qa1/0.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa1/0.csv similarity index 100% rename from results/RMT-Retrieval/qa1/0.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa1/0.csv diff --git a/results/RMT-Retrieval/qa1/1000000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa1/1000000.csv similarity index 100% rename from results/RMT-Retrieval/qa1/1000000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa1/1000000.csv diff --git a/results/RMT-Retrieval/qa1/10000000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa1/10000000.csv similarity index 100% rename from results/RMT-Retrieval/qa1/10000000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa1/10000000.csv diff --git a/results/RMT-Retrieval/qa1/128000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa1/128000.csv similarity index 100% rename from results/RMT-Retrieval/qa1/128000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa1/128000.csv diff --git a/results/ARMT/qa1/16000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa1/16000.csv similarity index 100% rename from results/ARMT/qa1/16000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa1/16000.csv diff --git a/results/RMT-Retrieval/qa1/32000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa1/32000.csv similarity index 100% rename from results/RMT-Retrieval/qa1/32000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa1/32000.csv diff --git a/results/ARMT/qa1/4000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa1/4000.csv similarity index 100% rename from results/ARMT/qa1/4000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa1/4000.csv diff --git a/results/RMT-Retrieval/qa1/500000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa1/500000.csv similarity index 100% rename from results/RMT-Retrieval/qa1/500000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa1/500000.csv diff --git a/results/RMT-Retrieval/qa1/64000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa1/64000.csv similarity index 100% rename from results/RMT-Retrieval/qa1/64000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa1/64000.csv diff --git a/results/RMT-Retrieval/qa1/8000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa1/8000.csv similarity index 100% rename from results/RMT-Retrieval/qa1/8000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa1/8000.csv diff --git a/results/RMT-Retrieval/qa2/0.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa2/0.csv similarity index 100% rename from results/RMT-Retrieval/qa2/0.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa2/0.csv diff --git a/results/RMT-Retrieval/qa2/1000000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa2/1000000.csv similarity index 100% rename from results/RMT-Retrieval/qa2/1000000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa2/1000000.csv diff --git a/results/RMT-Retrieval/qa2/10000000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa2/10000000.csv similarity index 100% rename from results/RMT-Retrieval/qa2/10000000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa2/10000000.csv diff --git a/results/RMT-Retrieval/qa2/128000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa2/128000.csv similarity index 100% rename from results/RMT-Retrieval/qa2/128000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa2/128000.csv diff --git a/results/RMT-Retrieval/qa2/16000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa2/16000.csv similarity index 100% rename from results/RMT-Retrieval/qa2/16000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa2/16000.csv diff --git a/results/RMT-Retrieval/qa2/32000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa2/32000.csv similarity index 100% rename from results/RMT-Retrieval/qa2/32000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa2/32000.csv diff --git a/results/RMT-Retrieval/qa2/4000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa2/4000.csv similarity index 100% rename from results/RMT-Retrieval/qa2/4000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa2/4000.csv diff --git a/results/RMT-Retrieval/qa2/500000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa2/500000.csv similarity index 100% rename from results/RMT-Retrieval/qa2/500000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa2/500000.csv diff --git a/results/RMT-Retrieval/qa2/64000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa2/64000.csv similarity index 100% rename from results/RMT-Retrieval/qa2/64000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa2/64000.csv diff --git a/results/RMT-Retrieval/qa2/8000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa2/8000.csv similarity index 100% rename from results/RMT-Retrieval/qa2/8000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa2/8000.csv diff --git a/results/RMT-Retrieval/qa3/0.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa3/0.csv similarity index 100% rename from results/RMT-Retrieval/qa3/0.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa3/0.csv diff --git a/results/RMT-Retrieval/qa3/1000000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa3/1000000.csv similarity index 100% rename from results/RMT-Retrieval/qa3/1000000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa3/1000000.csv diff --git a/results/RMT-Retrieval/qa3/10000000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa3/10000000.csv similarity index 100% rename from results/RMT-Retrieval/qa3/10000000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa3/10000000.csv diff --git a/results/RMT-Retrieval/qa3/128000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa3/128000.csv similarity index 100% rename from results/RMT-Retrieval/qa3/128000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa3/128000.csv diff --git a/results/RMT-Retrieval/qa3/16000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa3/16000.csv similarity index 100% rename from results/RMT-Retrieval/qa3/16000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa3/16000.csv diff --git a/results/RMT-Retrieval/qa3/32000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa3/32000.csv similarity index 100% rename from results/RMT-Retrieval/qa3/32000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa3/32000.csv diff --git a/results/RMT-Retrieval/qa3/4000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa3/4000.csv similarity index 100% rename from results/RMT-Retrieval/qa3/4000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa3/4000.csv diff --git a/results/RMT-Retrieval/qa3/500000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa3/500000.csv similarity index 100% rename from results/RMT-Retrieval/qa3/500000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa3/500000.csv diff --git a/results/RMT-Retrieval/qa3/64000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa3/64000.csv similarity index 100% rename from results/RMT-Retrieval/qa3/64000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa3/64000.csv diff --git a/results/RMT-Retrieval/qa3/8000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa3/8000.csv similarity index 100% rename from results/RMT-Retrieval/qa3/8000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa3/8000.csv diff --git a/results/RMT-Retrieval/qa4/0.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa4/0.csv similarity index 100% rename from results/RMT-Retrieval/qa4/0.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa4/0.csv diff --git a/results/RMT-Retrieval/qa4/1000000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa4/1000000.csv similarity index 100% rename from results/RMT-Retrieval/qa4/1000000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa4/1000000.csv diff --git a/results/RMT-Retrieval/qa4/10000000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa4/10000000.csv similarity index 100% rename from results/RMT-Retrieval/qa4/10000000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa4/10000000.csv diff --git a/results/RMT-Retrieval/qa4/128000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa4/128000.csv similarity index 100% rename from results/RMT-Retrieval/qa4/128000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa4/128000.csv diff --git a/results/RMT-Retrieval/qa4/16000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa4/16000.csv similarity index 100% rename from results/RMT-Retrieval/qa4/16000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa4/16000.csv diff --git a/results/RMT-Retrieval/qa4/32000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa4/32000.csv similarity index 100% rename from results/RMT-Retrieval/qa4/32000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa4/32000.csv diff --git a/results/RMT-Retrieval/qa4/4000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa4/4000.csv similarity index 100% rename from results/RMT-Retrieval/qa4/4000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa4/4000.csv diff --git a/results/RMT-Retrieval/qa4/500000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa4/500000.csv similarity index 100% rename from results/RMT-Retrieval/qa4/500000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa4/500000.csv diff --git a/results/RMT-Retrieval/qa4/64000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa4/64000.csv similarity index 100% rename from results/RMT-Retrieval/qa4/64000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa4/64000.csv diff --git a/results/RMT-Retrieval/qa4/8000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa4/8000.csv similarity index 100% rename from results/RMT-Retrieval/qa4/8000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa4/8000.csv diff --git a/results/RMT-Retrieval/qa5/0.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa5/0.csv similarity index 100% rename from results/RMT-Retrieval/qa5/0.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa5/0.csv diff --git a/results/RMT-Retrieval/qa5/1000000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa5/1000000.csv similarity index 100% rename from results/RMT-Retrieval/qa5/1000000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa5/1000000.csv diff --git a/results/RMT-Retrieval/qa5/10000000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa5/10000000.csv similarity index 100% rename from results/RMT-Retrieval/qa5/10000000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa5/10000000.csv diff --git a/results/RMT-Retrieval/qa5/128000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa5/128000.csv similarity index 100% rename from results/RMT-Retrieval/qa5/128000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa5/128000.csv diff --git a/results/RMT-Retrieval/qa5/16000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa5/16000.csv similarity index 100% rename from results/RMT-Retrieval/qa5/16000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa5/16000.csv diff --git a/results/RMT-Retrieval/qa5/32000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa5/32000.csv similarity index 100% rename from results/RMT-Retrieval/qa5/32000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa5/32000.csv diff --git a/results/RMT-Retrieval/qa5/4000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa5/4000.csv similarity index 100% rename from results/RMT-Retrieval/qa5/4000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa5/4000.csv diff --git a/results/RMT-Retrieval/qa5/500000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa5/500000.csv similarity index 100% rename from results/RMT-Retrieval/qa5/500000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa5/500000.csv diff --git a/results/RMT-Retrieval/qa5/64000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa5/64000.csv similarity index 100% rename from results/RMT-Retrieval/qa5/64000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa5/64000.csv diff --git a/results/RMT-Retrieval/qa5/8000.csv b/results/~ RMT-Retrieval (137M) fine-tune/qa5/8000.csv similarity index 100% rename from results/RMT-Retrieval/qa5/8000.csv rename to results/~ RMT-Retrieval (137M) fine-tune/qa5/8000.csv