diff --git a/app.py b/app.py
index 962b0f6658735b547c58a8c8a10012d08d141753..4104cefa01721667919ffdc6def670c5ad40b07c 100644
--- a/app.py
+++ b/app.py
@@ -86,7 +86,7 @@ def build_leaderboard_tab(folders):
}
with gr.Tabs() as tabs:
- for tab_id, tab_name in enumerate(['qa1','qa2', 'qa3', 'qa4', 'qa5']):
+ for tab_id, tab_name in enumerate(['avg', 'qa1','qa2', 'qa3', 'qa4', 'qa5']):
df = load_model(folders, tab_name, msg_lengths)
cmap = LinearSegmentedColormap.from_list('ryg', ["red", "yellow", "green"], N=256)
diff --git a/data/BABILong NeurIPS24 Figs - leaderboard.csv b/data/BABILong NeurIPS24 Figs - leaderboard.csv
index 352a6a20cc4a2522cb3f05fdf529342b3e16c017..b677a5df6d9392ecc205fd56b1ae419691d99d5e 100644
--- a/data/BABILong NeurIPS24 Figs - leaderboard.csv
+++ b/data/BABILong NeurIPS24 Figs - leaderboard.csv
@@ -21,8 +21,10 @@ activation-beacon-mistral-7b,avg,59,56,51,48,43,37,36,27,14,,,
Phi-3-mini-128k-instruct,avg,64,57,55,51,50,46,42,37,7,,,
ai21labs/Jamba-v0.1,avg,65,53,50,48,46,45,41,40,34,,,
c4ai-command-r-v01,avg,64,64,63,61,59,52,51,46,38,,,
+Meta-Llama-3.1-8B-Instruct,avg,67,68,66,66,62,60,56,49,39,,,
Phi-3-medium-128k-instruct,avg,72,70,67,62,60,57,53,45,30,,,
GPT-4,avg,87,81,77,74,71,64,53,43,36,,,
+Meta-Llama-3.1-70B-Instruct,avg,85,81,78,74,70,65,59,53,45,,,
~ Mamba (130M) fine-tune,avg,,,,"98,7","98,5","98,5","98,1",97,"92,5",,,
Llama3-ChatQA-1.5-8B + RAG,avg,48,48,47,46,45,45,44,42,45,42,39,37
~ RMT (137M) fine-tune,avg,"99,36","97,4","94,66","92,32","89,9","85,62","77,88","69,86","58,52","46,36","42,84","33,78"
diff --git a/notebooks/process_results_csv.ipynb b/notebooks/process_results_csv.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..e152c27e3af245061ebe8691267b4a47cae80491
--- /dev/null
+++ b/notebooks/process_results_csv.ipynb
@@ -0,0 +1,291 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import os\n",
+ "\n",
+ "import re"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "res_path = '../results'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "p = \"/home/jovyan/rmt/babilong-leaderboard/data/BABILong NeurIPS24 Figs - leaderboard.csv\"\n",
+ "res_df = pd.read_csv(p)\n",
+ "# res_df = res_df[res_df.task.isin(['qa1', 'qa2', 'qa3', 'qa4', 'qa5'])]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "lens = [0, 1000, 2000, 4000, 8000, 16000, 32000, 64000, 128000, 500000, 1000000, 10000000]\n",
+ "len_names = ['0K', '1K', '2K', '4K', '8K', '16K', '32K', '64K', '128K', '512K', '1M', '10M']\n",
+ "\n",
+ "for model_name in res_df.Model.unique():\n",
+ " model_df = res_df[res_df.Model == model_name]\n",
+ " model_name = re.sub('/', ' ', model_name)\n",
+ " for i, row in model_df.iterrows():\n",
+ " for l, ln in zip(lens, len_names):\n",
+ " score = row[ln]\n",
+ " # print(score)\n",
+ " if not pd.isna(score):\n",
+ " score = re.sub(',', '.', score)\n",
+ " score = float(score) / 100\n",
+ " os.makedirs(os.path.join(res_path, model_name), exist_ok=True)\n",
+ " os.makedirs(os.path.join(res_path, model_name, row.task), exist_ok=True)\n",
+ " path = os.path.join(res_path, model_name, row.task, f'{l}.csv')\n",
+ " df = pd.DataFrame([{'result': score}])\n",
+ " df.to_csv(path, index=False)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Calculate average results"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "model_names = next(os.walk(res_path))[1]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " 1\n",
+ "0 2"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "pd.DataFrame([{1: 2}])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'../results/GPT-3.5 fine-tuned (trained on 100 samples)/qa2'"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "task_path"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "GPT-4\n",
+ "GPT-3.5 fine-tuned (trained on 100 samples)\n",
+ "GPT-3.5 fine-tuned (trained on 1000 samples)\n",
+ "GPT-3.5\n",
+ "GPT4 + RAG by segments\n",
+ "GPT4 + RAG by sentences\n",
+ "GPT4 + Retrieve sentences (new 100 samples)\n",
+ "Mistral medium (xxB)\n",
+ "Mistral\n",
+ "GPT-2 (137M)\n",
+ "mamba-2.8b-hf\n",
+ "rwkv-6-world-7b\n",
+ "v5-Eagle-7B-HF\n",
+ "Meta-Llama-3-8B-Instruct\n",
+ "LLaMA-2-7B-32K\n",
+ "longchat-7b-v1.5-32k\n",
+ "LongAlpaca-13B\n",
+ "Llama-2-7B-32K-Instruct\n",
+ "Mistral-7b-Instruct-v0.2\n",
+ "Mixtral-8x7B-Instruct-v0.1\n",
+ "Mixtral-8x22B-Instruct-v0.1\n",
+ "activation-beacon-llama2-7b-chat\n",
+ "Yarn-Mistral-7b-128k\n",
+ "chatglm3-6b-128k\n",
+ "activation-beacon-mistral-7b\n",
+ "Phi-3-mini-128k-instruct\n",
+ "c4ai-command-r-v01\n",
+ "Phi-3-medium-128k-instruct\n",
+ "~ Mamba (130M) fine-tune\n",
+ "Llama3-ChatQA-1.5-8B + RAG\n",
+ "~ RMT (137M) fine-tune\n",
+ "~ ARMT (137M) fine-tune\n",
+ "01-ai Yi-34B\n",
+ "01-ai Yi-34B-200k\n",
+ "01-ai Yi-9B-200k\n",
+ "ai21labs Jamba-v0.1\n",
+ "~ RMT-Retrieval (137M) fine-tune\n",
+ "GPT-4 (gpt-4-0125-preview)\n",
+ "Meta-Llama-3.1-8B-Instruct\n",
+ "Meta-Llama-3.1-70B-Instruct\n"
+ ]
+ }
+ ],
+ "source": [
+ "for mn in model_names:\n",
+ " print(mn)\n",
+ " avg_path = os.path.join(res_path, mn, 'avg')\n",
+ " if os.path.exists(avg_path):\n",
+ " continue\n",
+ " \n",
+ " scores = {}\n",
+ " for task_name in [f'qa{i}' for i in range(1, 6)]:\n",
+ " task_path = os.path.join(res_path, mn, task_name)\n",
+ " if not os.path.exists(task_path):\n",
+ " continue\n",
+ "\n",
+ " filenames = next(os.walk(task_path))[2]\n",
+ " for fn in filenames:\n",
+ " len_name = fn.split('.')[0]\n",
+ " df = pd.read_csv(os.path.join(task_path, fn))\n",
+ " \n",
+ " score = df.result.mean()\n",
+ " if len_name not in scores:\n",
+ " scores[len_name] = [score]\n",
+ " else:\n",
+ " scores[len_name].append(score)\n",
+ "\n",
+ " for k,v in scores.items():\n",
+ " sc = np.mean(v)\n",
+ " out_path = os.path.join(avg_path, k + '.csv')\n",
+ " df = pd.DataFrame([{'result': sc}])\n",
+ " if len(v) < 5:\n",
+ " continue\n",
+ " os.makedirs(avg_path, exist_ok=True)\n",
+ " df.to_csv(out_path, index=False)\n",
+ " print(out_path)\n",
+ " # 1/0\n",
+ "\n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'16000': [0.58], '32000': [0.33], '4000': [0.73], '8000': [0.75]}"
+ ]
+ },
+ "execution_count": 27,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "scores"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.9"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebooks/test.ipynb b/notebooks/test.ipynb
deleted file mode 100644
index 70aa3aa36e3c0a985a27da5988269df92a3614b4..0000000000000000000000000000000000000000
--- a/notebooks/test.ipynb
+++ /dev/null
@@ -1,78 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [],
- "source": [
- "import pandas as pd\n",
- "import os"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 24,
- "metadata": {},
- "outputs": [],
- "source": [
- "res_path = '../results'"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 25,
- "metadata": {},
- "outputs": [],
- "source": [
- "p = \"/home/jovyan/rmt/babilong-leaderboard/data/BABILong NeurIPS24 Figs - leaderboard.csv\"\n",
- "res_df = pd.read_csv(p)\n",
- "res_df = res_df[res_df.task.isin(['qa1', 'qa2', 'qa3', 'qa4', 'qa5'])]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 30,
- "metadata": {},
- "outputs": [],
- "source": [
- "lens = [0, 1000, 2000, 4000, 8000, 16000, 32000, 64000, 128000, 500000, 1000000, 10000000]\n",
- "len_names = ['0K', '1K', '2K', '4K', '8K', '16K', '32K', '64K', '128K', '512K', '1M', '10M']\n",
- "\n",
- "for model_name in res_df.Model.unique():\n",
- " model_df = res_df[res_df.Model == model_name]\n",
- " for i, row in model_df.iterrows():\n",
- " for l, ln in zip(lens, len_names):\n",
- " score = row[ln]\n",
- " # print(score)\n",
- " if not pd.isna(score):\n",
- " os.makedirs(os.path.join(res_path, model_name), exist_ok=True)\n",
- " os.makedirs(os.path.join(res_path, model_name, row.task), exist_ok=True)\n",
- " path = os.path.join(res_path, model_name, row.task, f'{l}.csv')\n",
- " df = pd.DataFrame([{'result': score}])\n",
- " df.to_csv(path, index=False)"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.10.13"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/results/01-ai Yi-34B-200k/avg/0.csv b/results/01-ai Yi-34B-200k/avg/0.csv
new file mode 100644
index 0000000000000000000000000000000000000000..4966aebbf8b964c945f71bad33d92c5102844c2d
--- /dev/null
+++ b/results/01-ai Yi-34B-200k/avg/0.csv
@@ -0,0 +1,2 @@
+result
+0.65
diff --git a/results/01-ai Yi-34B-200k/avg/1000.csv b/results/01-ai Yi-34B-200k/avg/1000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..1fa3b0194650ca40fe580d554b8cf29e18fb4d4b
--- /dev/null
+++ b/results/01-ai Yi-34B-200k/avg/1000.csv
@@ -0,0 +1,2 @@
+result
+0.59
diff --git a/results/01-ai Yi-34B-200k/avg/16000.csv b/results/01-ai Yi-34B-200k/avg/16000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..e8921d51a14f6c6af82b659f2641c450327d151a
--- /dev/null
+++ b/results/01-ai Yi-34B-200k/avg/16000.csv
@@ -0,0 +1,2 @@
+result
+0.5
diff --git a/results/01-ai Yi-34B-200k/avg/2000.csv b/results/01-ai Yi-34B-200k/avg/2000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..7e19811b296e6cf21c8a6d216c28c84251eb122d
--- /dev/null
+++ b/results/01-ai Yi-34B-200k/avg/2000.csv
@@ -0,0 +1,2 @@
+result
+0.56
diff --git a/results/01-ai Yi-34B-200k/avg/32000.csv b/results/01-ai Yi-34B-200k/avg/32000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..ea17e680f171042bd7e193d2fe91edb1f4a271e9
--- /dev/null
+++ b/results/01-ai Yi-34B-200k/avg/32000.csv
@@ -0,0 +1,2 @@
+result
+0.48
diff --git a/results/01-ai Yi-34B-200k/avg/4000.csv b/results/01-ai Yi-34B-200k/avg/4000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..55837811362d6f27a37b7a72f0edcb238c90ccf6
--- /dev/null
+++ b/results/01-ai Yi-34B-200k/avg/4000.csv
@@ -0,0 +1,2 @@
+result
+0.54
diff --git a/results/01-ai Yi-34B-200k/avg/64000.csv b/results/01-ai Yi-34B-200k/avg/64000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..ea17e680f171042bd7e193d2fe91edb1f4a271e9
--- /dev/null
+++ b/results/01-ai Yi-34B-200k/avg/64000.csv
@@ -0,0 +1,2 @@
+result
+0.48
diff --git a/results/01-ai Yi-34B-200k/avg/8000.csv b/results/01-ai Yi-34B-200k/avg/8000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..39467bcb402a65ea39eb4898a1c28fc11507f9c6
--- /dev/null
+++ b/results/01-ai Yi-34B-200k/avg/8000.csv
@@ -0,0 +1,2 @@
+result
+0.52
diff --git a/results/01-ai Yi-34B/avg/0.csv b/results/01-ai Yi-34B/avg/0.csv
new file mode 100644
index 0000000000000000000000000000000000000000..e952da3a8db796734136b86b833a087661b59f2d
--- /dev/null
+++ b/results/01-ai Yi-34B/avg/0.csv
@@ -0,0 +1,2 @@
+result
+0.72
diff --git a/results/01-ai Yi-34B/avg/1000.csv b/results/01-ai Yi-34B/avg/1000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..39467bcb402a65ea39eb4898a1c28fc11507f9c6
--- /dev/null
+++ b/results/01-ai Yi-34B/avg/1000.csv
@@ -0,0 +1,2 @@
+result
+0.52
diff --git a/results/01-ai Yi-34B/avg/16000.csv b/results/01-ai Yi-34B/avg/16000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..e5a7b897d5b6eeaaf9d23030dd640cd27415a1c9
--- /dev/null
+++ b/results/01-ai Yi-34B/avg/16000.csv
@@ -0,0 +1,2 @@
+result
+0.31
diff --git a/results/01-ai Yi-34B/avg/2000.csv b/results/01-ai Yi-34B/avg/2000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..8ded6f83954586c44ad05174b7a3b44c84e6e261
--- /dev/null
+++ b/results/01-ai Yi-34B/avg/2000.csv
@@ -0,0 +1,2 @@
+result
+0.43
diff --git a/results/01-ai Yi-34B/avg/32000.csv b/results/01-ai Yi-34B/avg/32000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..b19740076b62c87f4353fac30eab97aa8b4611fd
--- /dev/null
+++ b/results/01-ai Yi-34B/avg/32000.csv
@@ -0,0 +1,2 @@
+result
+0.04
diff --git a/results/01-ai Yi-34B/avg/4000.csv b/results/01-ai Yi-34B/avg/4000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..ce21d5886fbcbcae20b93146113ddcd40f561ebf
--- /dev/null
+++ b/results/01-ai Yi-34B/avg/4000.csv
@@ -0,0 +1,2 @@
+result
+0.37
diff --git a/results/01-ai Yi-34B/avg/8000.csv b/results/01-ai Yi-34B/avg/8000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..d220a0008f0e11c42ab018c0243594c09e74f118
--- /dev/null
+++ b/results/01-ai Yi-34B/avg/8000.csv
@@ -0,0 +1,2 @@
+result
+0.38
diff --git a/results/01-ai Yi-9B-200k/avg/0.csv b/results/01-ai Yi-9B-200k/avg/0.csv
new file mode 100644
index 0000000000000000000000000000000000000000..39467bcb402a65ea39eb4898a1c28fc11507f9c6
--- /dev/null
+++ b/results/01-ai Yi-9B-200k/avg/0.csv
@@ -0,0 +1,2 @@
+result
+0.52
diff --git a/results/01-ai Yi-9B-200k/avg/1000.csv b/results/01-ai Yi-9B-200k/avg/1000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..d1af3c1139d6c25d52f1f6e3ec43e2d84a1f69c7
--- /dev/null
+++ b/results/01-ai Yi-9B-200k/avg/1000.csv
@@ -0,0 +1,2 @@
+result
+0.55
diff --git a/results/01-ai Yi-9B-200k/avg/128000.csv b/results/01-ai Yi-9B-200k/avg/128000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..1628a5c7821b58b09e92c607a6d582cab60789f7
--- /dev/null
+++ b/results/01-ai Yi-9B-200k/avg/128000.csv
@@ -0,0 +1,2 @@
+result
+0.24
diff --git a/results/01-ai Yi-9B-200k/avg/16000.csv b/results/01-ai Yi-9B-200k/avg/16000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..740a999619db874dd9e457f930dc8f01c8c78f33
--- /dev/null
+++ b/results/01-ai Yi-9B-200k/avg/16000.csv
@@ -0,0 +1,2 @@
+result
+0.36
diff --git a/results/01-ai Yi-9B-200k/avg/2000.csv b/results/01-ai Yi-9B-200k/avg/2000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..ea17e680f171042bd7e193d2fe91edb1f4a271e9
--- /dev/null
+++ b/results/01-ai Yi-9B-200k/avg/2000.csv
@@ -0,0 +1,2 @@
+result
+0.48
diff --git a/results/01-ai Yi-9B-200k/avg/32000.csv b/results/01-ai Yi-9B-200k/avg/32000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..ce21d5886fbcbcae20b93146113ddcd40f561ebf
--- /dev/null
+++ b/results/01-ai Yi-9B-200k/avg/32000.csv
@@ -0,0 +1,2 @@
+result
+0.37
diff --git a/results/01-ai Yi-9B-200k/avg/4000.csv b/results/01-ai Yi-9B-200k/avg/4000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..84a35040259e9a3c25d02785e6a8333143557008
--- /dev/null
+++ b/results/01-ai Yi-9B-200k/avg/4000.csv
@@ -0,0 +1,2 @@
+result
+0.46
diff --git a/results/01-ai Yi-9B-200k/avg/64000.csv b/results/01-ai Yi-9B-200k/avg/64000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..43e7b0059892d598aad8dec7e0c3b137de40b5a9
--- /dev/null
+++ b/results/01-ai Yi-9B-200k/avg/64000.csv
@@ -0,0 +1,2 @@
+result
+0.29
diff --git a/results/01-ai Yi-9B-200k/avg/8000.csv b/results/01-ai Yi-9B-200k/avg/8000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..89df95907c79f67b0441bcd8e01916d5ed3147ca
--- /dev/null
+++ b/results/01-ai Yi-9B-200k/avg/8000.csv
@@ -0,0 +1,2 @@
+result
+0.45
diff --git a/results/GPT-2 (137M)/avg/0.csv b/results/GPT-2 (137M)/avg/0.csv
new file mode 100644
index 0000000000000000000000000000000000000000..0a5e1292a7ec9eec0be5ab1ed37f761feec7241b
--- /dev/null
+++ b/results/GPT-2 (137M)/avg/0.csv
@@ -0,0 +1,2 @@
+result
+0.27
diff --git a/results/GPT-2 (137M)/avg/1000.csv b/results/GPT-2 (137M)/avg/1000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..466f801be654fefe9c3ea15569001ca34024a9ec
--- /dev/null
+++ b/results/GPT-2 (137M)/avg/1000.csv
@@ -0,0 +1,2 @@
+result
+0.15
diff --git a/results/GPT-4 (gpt-4-0125-preview)/avg/0.csv b/results/GPT-4 (gpt-4-0125-preview)/avg/0.csv
new file mode 100644
index 0000000000000000000000000000000000000000..be1f19880a38b06fc91fa035a2522e67ccf404be
--- /dev/null
+++ b/results/GPT-4 (gpt-4-0125-preview)/avg/0.csv
@@ -0,0 +1,2 @@
+result
+0.874
diff --git a/results/GPT-4 (gpt-4-0125-preview)/avg/1000.csv b/results/GPT-4 (gpt-4-0125-preview)/avg/1000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..a9066deebfc3de66f6a71d33fafe29752ef8551b
--- /dev/null
+++ b/results/GPT-4 (gpt-4-0125-preview)/avg/1000.csv
@@ -0,0 +1,2 @@
+result
+0.8140000000000001
diff --git a/results/GPT-4 (gpt-4-0125-preview)/avg/128000.csv b/results/GPT-4 (gpt-4-0125-preview)/avg/128000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..22c2f48ccd46b9771eaf807dd78afe6462ec5f2d
--- /dev/null
+++ b/results/GPT-4 (gpt-4-0125-preview)/avg/128000.csv
@@ -0,0 +1,2 @@
+result
+0.358
diff --git a/results/GPT-4 (gpt-4-0125-preview)/avg/16000.csv b/results/GPT-4 (gpt-4-0125-preview)/avg/16000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..738e2e53bbbac478ccf741fe98586b4fce44336a
--- /dev/null
+++ b/results/GPT-4 (gpt-4-0125-preview)/avg/16000.csv
@@ -0,0 +1,2 @@
+result
+0.6399999999999999
diff --git a/results/GPT-4 (gpt-4-0125-preview)/avg/2000.csv b/results/GPT-4 (gpt-4-0125-preview)/avg/2000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..f9b2476a243cdf9b8c2bbd1d6f3795f9145e66ff
--- /dev/null
+++ b/results/GPT-4 (gpt-4-0125-preview)/avg/2000.csv
@@ -0,0 +1,2 @@
+result
+0.768
diff --git a/results/GPT-4 (gpt-4-0125-preview)/avg/32000.csv b/results/GPT-4 (gpt-4-0125-preview)/avg/32000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..ff3b74862da2e52575f13ccbece4ce9275c4b6f0
--- /dev/null
+++ b/results/GPT-4 (gpt-4-0125-preview)/avg/32000.csv
@@ -0,0 +1,2 @@
+result
+0.526
diff --git a/results/GPT-4 (gpt-4-0125-preview)/avg/4000.csv b/results/GPT-4 (gpt-4-0125-preview)/avg/4000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..3f2dac4457b3854cf209f9442f5b55bec163ae3c
--- /dev/null
+++ b/results/GPT-4 (gpt-4-0125-preview)/avg/4000.csv
@@ -0,0 +1,2 @@
+result
+0.738
diff --git a/results/GPT-4 (gpt-4-0125-preview)/avg/64000.csv b/results/GPT-4 (gpt-4-0125-preview)/avg/64000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..632a771d928f8359027516f3978de05bc623dee8
--- /dev/null
+++ b/results/GPT-4 (gpt-4-0125-preview)/avg/64000.csv
@@ -0,0 +1,2 @@
+result
+0.42800000000000005
diff --git a/results/GPT-4 (gpt-4-0125-preview)/avg/8000.csv b/results/GPT-4 (gpt-4-0125-preview)/avg/8000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..cbfbe96871fdac9bfd91c74198420498e2fb6a1a
--- /dev/null
+++ b/results/GPT-4 (gpt-4-0125-preview)/avg/8000.csv
@@ -0,0 +1,2 @@
+result
+0.7120000000000001
diff --git a/results/LLaMA-2-7B-32K/avg/0.csv b/results/LLaMA-2-7B-32K/avg/0.csv
new file mode 100644
index 0000000000000000000000000000000000000000..67a246541636923a7023f90c1eda15aeb026f580
--- /dev/null
+++ b/results/LLaMA-2-7B-32K/avg/0.csv
@@ -0,0 +1,2 @@
+result
+0.41
diff --git a/results/LLaMA-2-7B-32K/avg/1000.csv b/results/LLaMA-2-7B-32K/avg/1000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..b3596ba97e928d291ed1c63db4564056b44e7785
--- /dev/null
+++ b/results/LLaMA-2-7B-32K/avg/1000.csv
@@ -0,0 +1,2 @@
+result
+0.53
diff --git a/results/LLaMA-2-7B-32K/avg/16000.csv b/results/LLaMA-2-7B-32K/avg/16000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..c297a96bf813f15aca75a5f65ed7e2d9a828d24c
--- /dev/null
+++ b/results/LLaMA-2-7B-32K/avg/16000.csv
@@ -0,0 +1,2 @@
+result
+0.32
diff --git a/results/LLaMA-2-7B-32K/avg/2000.csv b/results/LLaMA-2-7B-32K/avg/2000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..89df95907c79f67b0441bcd8e01916d5ed3147ca
--- /dev/null
+++ b/results/LLaMA-2-7B-32K/avg/2000.csv
@@ -0,0 +1,2 @@
+result
+0.45
diff --git a/results/LLaMA-2-7B-32K/avg/32000.csv b/results/LLaMA-2-7B-32K/avg/32000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..19336832ec944a8f1b52d2856bb788cfff85f78e
--- /dev/null
+++ b/results/LLaMA-2-7B-32K/avg/32000.csv
@@ -0,0 +1,2 @@
+result
+0.03
diff --git a/results/LLaMA-2-7B-32K/avg/4000.csv b/results/LLaMA-2-7B-32K/avg/4000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..7ce655c90b50c89eda745a06fd2d5d6da91ed9e6
--- /dev/null
+++ b/results/LLaMA-2-7B-32K/avg/4000.csv
@@ -0,0 +1,2 @@
+result
+0.4
diff --git a/results/LLaMA-2-7B-32K/avg/8000.csv b/results/LLaMA-2-7B-32K/avg/8000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..090b0cf09c2910e11dbaf6d48af4c6ae4efe87ad
--- /dev/null
+++ b/results/LLaMA-2-7B-32K/avg/8000.csv
@@ -0,0 +1,2 @@
+result
+0.39
diff --git a/results/Llama-2-7B-32K-Instruct/avg/0.csv b/results/Llama-2-7B-32K-Instruct/avg/0.csv
new file mode 100644
index 0000000000000000000000000000000000000000..2be09b645e270be85f5fe4f17f33d1301ff5d5fe
--- /dev/null
+++ b/results/Llama-2-7B-32K-Instruct/avg/0.csv
@@ -0,0 +1,2 @@
+result
+0.49
diff --git a/results/Llama-2-7B-32K-Instruct/avg/1000.csv b/results/Llama-2-7B-32K-Instruct/avg/1000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..39467bcb402a65ea39eb4898a1c28fc11507f9c6
--- /dev/null
+++ b/results/Llama-2-7B-32K-Instruct/avg/1000.csv
@@ -0,0 +1,2 @@
+result
+0.52
diff --git a/results/Llama-2-7B-32K-Instruct/avg/16000.csv b/results/Llama-2-7B-32K-Instruct/avg/16000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..5be5e2b842b0151eeaaf1248a55b12e87c0f1f2c
--- /dev/null
+++ b/results/Llama-2-7B-32K-Instruct/avg/16000.csv
@@ -0,0 +1,2 @@
+result
+0.35
diff --git a/results/Llama-2-7B-32K-Instruct/avg/2000.csv b/results/Llama-2-7B-32K-Instruct/avg/2000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..2be09b645e270be85f5fe4f17f33d1301ff5d5fe
--- /dev/null
+++ b/results/Llama-2-7B-32K-Instruct/avg/2000.csv
@@ -0,0 +1,2 @@
+result
+0.49
diff --git a/results/Llama-2-7B-32K-Instruct/avg/32000.csv b/results/Llama-2-7B-32K-Instruct/avg/32000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..11a090972d80aec2ff845c30aab47407be2352d9
--- /dev/null
+++ b/results/Llama-2-7B-32K-Instruct/avg/32000.csv
@@ -0,0 +1,2 @@
+result
+0.05
diff --git a/results/Llama-2-7B-32K-Instruct/avg/4000.csv b/results/Llama-2-7B-32K-Instruct/avg/4000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..8ded6f83954586c44ad05174b7a3b44c84e6e261
--- /dev/null
+++ b/results/Llama-2-7B-32K-Instruct/avg/4000.csv
@@ -0,0 +1,2 @@
+result
+0.43
diff --git a/results/Llama-2-7B-32K-Instruct/avg/8000.csv b/results/Llama-2-7B-32K-Instruct/avg/8000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..7ce655c90b50c89eda745a06fd2d5d6da91ed9e6
--- /dev/null
+++ b/results/Llama-2-7B-32K-Instruct/avg/8000.csv
@@ -0,0 +1,2 @@
+result
+0.4
diff --git a/results/Llama3-ChatQA-1.5-8B + RAG/avg/0.csv b/results/Llama3-ChatQA-1.5-8B + RAG/avg/0.csv
new file mode 100644
index 0000000000000000000000000000000000000000..ea17e680f171042bd7e193d2fe91edb1f4a271e9
--- /dev/null
+++ b/results/Llama3-ChatQA-1.5-8B + RAG/avg/0.csv
@@ -0,0 +1,2 @@
+result
+0.48
diff --git a/results/Llama3-ChatQA-1.5-8B + RAG/avg/1000.csv b/results/Llama3-ChatQA-1.5-8B + RAG/avg/1000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..ea17e680f171042bd7e193d2fe91edb1f4a271e9
--- /dev/null
+++ b/results/Llama3-ChatQA-1.5-8B + RAG/avg/1000.csv
@@ -0,0 +1,2 @@
+result
+0.48
diff --git a/results/Llama3-ChatQA-1.5-8B + RAG/avg/1000000.csv b/results/Llama3-ChatQA-1.5-8B + RAG/avg/1000000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..090b0cf09c2910e11dbaf6d48af4c6ae4efe87ad
--- /dev/null
+++ b/results/Llama3-ChatQA-1.5-8B + RAG/avg/1000000.csv
@@ -0,0 +1,2 @@
+result
+0.39
diff --git a/results/Llama3-ChatQA-1.5-8B + RAG/avg/10000000.csv b/results/Llama3-ChatQA-1.5-8B + RAG/avg/10000000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..ce21d5886fbcbcae20b93146113ddcd40f561ebf
--- /dev/null
+++ b/results/Llama3-ChatQA-1.5-8B + RAG/avg/10000000.csv
@@ -0,0 +1,2 @@
+result
+0.37
diff --git a/results/Llama3-ChatQA-1.5-8B + RAG/avg/128000.csv b/results/Llama3-ChatQA-1.5-8B + RAG/avg/128000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..89df95907c79f67b0441bcd8e01916d5ed3147ca
--- /dev/null
+++ b/results/Llama3-ChatQA-1.5-8B + RAG/avg/128000.csv
@@ -0,0 +1,2 @@
+result
+0.45
diff --git a/results/Llama3-ChatQA-1.5-8B + RAG/avg/16000.csv b/results/Llama3-ChatQA-1.5-8B + RAG/avg/16000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..89df95907c79f67b0441bcd8e01916d5ed3147ca
--- /dev/null
+++ b/results/Llama3-ChatQA-1.5-8B + RAG/avg/16000.csv
@@ -0,0 +1,2 @@
+result
+0.45
diff --git a/results/Llama3-ChatQA-1.5-8B + RAG/avg/2000.csv b/results/Llama3-ChatQA-1.5-8B + RAG/avg/2000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..153d5a3b84e4e0e48ae0076c64cf47afb8b264f0
--- /dev/null
+++ b/results/Llama3-ChatQA-1.5-8B + RAG/avg/2000.csv
@@ -0,0 +1,2 @@
+result
+0.47
diff --git a/results/Llama3-ChatQA-1.5-8B + RAG/avg/32000.csv b/results/Llama3-ChatQA-1.5-8B + RAG/avg/32000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..e29f47d1cb9509158a457f351abf79cd9333a762
--- /dev/null
+++ b/results/Llama3-ChatQA-1.5-8B + RAG/avg/32000.csv
@@ -0,0 +1,2 @@
+result
+0.44
diff --git a/results/Llama3-ChatQA-1.5-8B + RAG/avg/4000.csv b/results/Llama3-ChatQA-1.5-8B + RAG/avg/4000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..84a35040259e9a3c25d02785e6a8333143557008
--- /dev/null
+++ b/results/Llama3-ChatQA-1.5-8B + RAG/avg/4000.csv
@@ -0,0 +1,2 @@
+result
+0.46
diff --git a/results/Llama3-ChatQA-1.5-8B + RAG/avg/500000.csv b/results/Llama3-ChatQA-1.5-8B + RAG/avg/500000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..82f83f2880089b96caf335389691d4e4aefd72a4
--- /dev/null
+++ b/results/Llama3-ChatQA-1.5-8B + RAG/avg/500000.csv
@@ -0,0 +1,2 @@
+result
+0.42
diff --git a/results/Llama3-ChatQA-1.5-8B + RAG/avg/64000.csv b/results/Llama3-ChatQA-1.5-8B + RAG/avg/64000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..82f83f2880089b96caf335389691d4e4aefd72a4
--- /dev/null
+++ b/results/Llama3-ChatQA-1.5-8B + RAG/avg/64000.csv
@@ -0,0 +1,2 @@
+result
+0.42
diff --git a/results/Llama3-ChatQA-1.5-8B + RAG/avg/8000.csv b/results/Llama3-ChatQA-1.5-8B + RAG/avg/8000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..89df95907c79f67b0441bcd8e01916d5ed3147ca
--- /dev/null
+++ b/results/Llama3-ChatQA-1.5-8B + RAG/avg/8000.csv
@@ -0,0 +1,2 @@
+result
+0.45
diff --git a/results/LongAlpaca-13B/avg/0.csv b/results/LongAlpaca-13B/avg/0.csv
new file mode 100644
index 0000000000000000000000000000000000000000..ea17e680f171042bd7e193d2fe91edb1f4a271e9
--- /dev/null
+++ b/results/LongAlpaca-13B/avg/0.csv
@@ -0,0 +1,2 @@
+result
+0.48
diff --git a/results/LongAlpaca-13B/avg/1000.csv b/results/LongAlpaca-13B/avg/1000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..153d5a3b84e4e0e48ae0076c64cf47afb8b264f0
--- /dev/null
+++ b/results/LongAlpaca-13B/avg/1000.csv
@@ -0,0 +1,2 @@
+result
+0.47
diff --git a/results/LongAlpaca-13B/avg/16000.csv b/results/LongAlpaca-13B/avg/16000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..740a999619db874dd9e457f930dc8f01c8c78f33
--- /dev/null
+++ b/results/LongAlpaca-13B/avg/16000.csv
@@ -0,0 +1,2 @@
+result
+0.36
diff --git a/results/LongAlpaca-13B/avg/2000.csv b/results/LongAlpaca-13B/avg/2000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..84a35040259e9a3c25d02785e6a8333143557008
--- /dev/null
+++ b/results/LongAlpaca-13B/avg/2000.csv
@@ -0,0 +1,2 @@
+result
+0.46
diff --git a/results/LongAlpaca-13B/avg/32000.csv b/results/LongAlpaca-13B/avg/32000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..b19740076b62c87f4353fac30eab97aa8b4611fd
--- /dev/null
+++ b/results/LongAlpaca-13B/avg/32000.csv
@@ -0,0 +1,2 @@
+result
+0.04
diff --git a/results/LongAlpaca-13B/avg/4000.csv b/results/LongAlpaca-13B/avg/4000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..8ded6f83954586c44ad05174b7a3b44c84e6e261
--- /dev/null
+++ b/results/LongAlpaca-13B/avg/4000.csv
@@ -0,0 +1,2 @@
+result
+0.43
diff --git a/results/LongAlpaca-13B/avg/8000.csv b/results/LongAlpaca-13B/avg/8000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..7ce655c90b50c89eda745a06fd2d5d6da91ed9e6
--- /dev/null
+++ b/results/LongAlpaca-13B/avg/8000.csv
@@ -0,0 +1,2 @@
+result
+0.4
diff --git a/results/Meta-Llama-3-8B-Instruct/avg/0.csv b/results/Meta-Llama-3-8B-Instruct/avg/0.csv
new file mode 100644
index 0000000000000000000000000000000000000000..235fdb9d924c52d3bfbeef527b47fcca8303f6b1
--- /dev/null
+++ b/results/Meta-Llama-3-8B-Instruct/avg/0.csv
@@ -0,0 +1,2 @@
+result
+0.64
diff --git a/results/Meta-Llama-3-8B-Instruct/avg/1000.csv b/results/Meta-Llama-3-8B-Instruct/avg/1000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..73185cf6124b62847537578b1975e3664741cd40
--- /dev/null
+++ b/results/Meta-Llama-3-8B-Instruct/avg/1000.csv
@@ -0,0 +1,2 @@
+result
+0.6
diff --git a/results/Meta-Llama-3-8B-Instruct/avg/2000.csv b/results/Meta-Llama-3-8B-Instruct/avg/2000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..cb4221b589457585a0f9e0d4400f8986ee7c41cc
--- /dev/null
+++ b/results/Meta-Llama-3-8B-Instruct/avg/2000.csv
@@ -0,0 +1,2 @@
+result
+0.58
diff --git a/results/Meta-Llama-3-8B-Instruct/avg/4000.csv b/results/Meta-Llama-3-8B-Instruct/avg/4000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..e8921d51a14f6c6af82b659f2641c450327d151a
--- /dev/null
+++ b/results/Meta-Llama-3-8B-Instruct/avg/4000.csv
@@ -0,0 +1,2 @@
+result
+0.5
diff --git a/results/Meta-Llama-3-8B-Instruct/avg/8000.csv b/results/Meta-Llama-3-8B-Instruct/avg/8000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..e29f47d1cb9509158a457f351abf79cd9333a762
--- /dev/null
+++ b/results/Meta-Llama-3-8B-Instruct/avg/8000.csv
@@ -0,0 +1,2 @@
+result
+0.44
diff --git a/results/Meta-Llama-3.1-70B-Instruct/avg/0.csv b/results/Meta-Llama-3.1-70B-Instruct/avg/0.csv
new file mode 100644
index 0000000000000000000000000000000000000000..83ba11dcff9cce6369fff6c946075d6ebeebd368
--- /dev/null
+++ b/results/Meta-Llama-3.1-70B-Instruct/avg/0.csv
@@ -0,0 +1,2 @@
+result
+0.85
diff --git a/results/Meta-Llama-3.1-70B-Instruct/avg/1000.csv b/results/Meta-Llama-3.1-70B-Instruct/avg/1000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..7a27968157e58ea8642cf16e7e0ab8172a0518f7
--- /dev/null
+++ b/results/Meta-Llama-3.1-70B-Instruct/avg/1000.csv
@@ -0,0 +1,2 @@
+result
+0.81
diff --git a/results/Meta-Llama-3.1-70B-Instruct/avg/128000.csv b/results/Meta-Llama-3.1-70B-Instruct/avg/128000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..89df95907c79f67b0441bcd8e01916d5ed3147ca
--- /dev/null
+++ b/results/Meta-Llama-3.1-70B-Instruct/avg/128000.csv
@@ -0,0 +1,2 @@
+result
+0.45
diff --git a/results/Meta-Llama-3.1-70B-Instruct/avg/16000.csv b/results/Meta-Llama-3.1-70B-Instruct/avg/16000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..4966aebbf8b964c945f71bad33d92c5102844c2d
--- /dev/null
+++ b/results/Meta-Llama-3.1-70B-Instruct/avg/16000.csv
@@ -0,0 +1,2 @@
+result
+0.65
diff --git a/results/Meta-Llama-3.1-70B-Instruct/avg/2000.csv b/results/Meta-Llama-3.1-70B-Instruct/avg/2000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..223d850464648a46d745f1c5e86c83752d9b2135
--- /dev/null
+++ b/results/Meta-Llama-3.1-70B-Instruct/avg/2000.csv
@@ -0,0 +1,2 @@
+result
+0.78
diff --git a/results/Meta-Llama-3.1-70B-Instruct/avg/32000.csv b/results/Meta-Llama-3.1-70B-Instruct/avg/32000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..1fa3b0194650ca40fe580d554b8cf29e18fb4d4b
--- /dev/null
+++ b/results/Meta-Llama-3.1-70B-Instruct/avg/32000.csv
@@ -0,0 +1,2 @@
+result
+0.59
diff --git a/results/Meta-Llama-3.1-70B-Instruct/avg/4000.csv b/results/Meta-Llama-3.1-70B-Instruct/avg/4000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..d8ec7e4d79540fb339feccca0ce6b7ea60e8aafa
--- /dev/null
+++ b/results/Meta-Llama-3.1-70B-Instruct/avg/4000.csv
@@ -0,0 +1,2 @@
+result
+0.74
diff --git a/results/Meta-Llama-3.1-70B-Instruct/avg/64000.csv b/results/Meta-Llama-3.1-70B-Instruct/avg/64000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..b3596ba97e928d291ed1c63db4564056b44e7785
--- /dev/null
+++ b/results/Meta-Llama-3.1-70B-Instruct/avg/64000.csv
@@ -0,0 +1,2 @@
+result
+0.53
diff --git a/results/Meta-Llama-3.1-70B-Instruct/avg/8000.csv b/results/Meta-Llama-3.1-70B-Instruct/avg/8000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..30b8677e38c6147f378843d83cdb738e20fb9c5a
--- /dev/null
+++ b/results/Meta-Llama-3.1-70B-Instruct/avg/8000.csv
@@ -0,0 +1,2 @@
+result
+0.7
diff --git a/results/Meta-Llama-3.1-8B-Instruct/avg/0.csv b/results/Meta-Llama-3.1-8B-Instruct/avg/0.csv
new file mode 100644
index 0000000000000000000000000000000000000000..1f7c0e94d6341eed38b798bbe83f395efb572b0a
--- /dev/null
+++ b/results/Meta-Llama-3.1-8B-Instruct/avg/0.csv
@@ -0,0 +1,2 @@
+result
+0.67
diff --git a/results/Meta-Llama-3.1-8B-Instruct/avg/1000.csv b/results/Meta-Llama-3.1-8B-Instruct/avg/1000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..22fb8b3d67c0ed0ec33e60f591d8d5e7580fff53
--- /dev/null
+++ b/results/Meta-Llama-3.1-8B-Instruct/avg/1000.csv
@@ -0,0 +1,2 @@
+result
+0.68
diff --git a/results/Meta-Llama-3.1-8B-Instruct/avg/128000.csv b/results/Meta-Llama-3.1-8B-Instruct/avg/128000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..090b0cf09c2910e11dbaf6d48af4c6ae4efe87ad
--- /dev/null
+++ b/results/Meta-Llama-3.1-8B-Instruct/avg/128000.csv
@@ -0,0 +1,2 @@
+result
+0.39
diff --git a/results/Meta-Llama-3.1-8B-Instruct/avg/16000.csv b/results/Meta-Llama-3.1-8B-Instruct/avg/16000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..73185cf6124b62847537578b1975e3664741cd40
--- /dev/null
+++ b/results/Meta-Llama-3.1-8B-Instruct/avg/16000.csv
@@ -0,0 +1,2 @@
+result
+0.6
diff --git a/results/Meta-Llama-3.1-8B-Instruct/avg/2000.csv b/results/Meta-Llama-3.1-8B-Instruct/avg/2000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..06469c9fe3accdc55c9f65a81b7b58307b21a674
--- /dev/null
+++ b/results/Meta-Llama-3.1-8B-Instruct/avg/2000.csv
@@ -0,0 +1,2 @@
+result
+0.66
diff --git a/results/Meta-Llama-3.1-8B-Instruct/avg/32000.csv b/results/Meta-Llama-3.1-8B-Instruct/avg/32000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..7e19811b296e6cf21c8a6d216c28c84251eb122d
--- /dev/null
+++ b/results/Meta-Llama-3.1-8B-Instruct/avg/32000.csv
@@ -0,0 +1,2 @@
+result
+0.56
diff --git a/results/Meta-Llama-3.1-8B-Instruct/avg/4000.csv b/results/Meta-Llama-3.1-8B-Instruct/avg/4000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..06469c9fe3accdc55c9f65a81b7b58307b21a674
--- /dev/null
+++ b/results/Meta-Llama-3.1-8B-Instruct/avg/4000.csv
@@ -0,0 +1,2 @@
+result
+0.66
diff --git a/results/Meta-Llama-3.1-8B-Instruct/avg/64000.csv b/results/Meta-Llama-3.1-8B-Instruct/avg/64000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..2be09b645e270be85f5fe4f17f33d1301ff5d5fe
--- /dev/null
+++ b/results/Meta-Llama-3.1-8B-Instruct/avg/64000.csv
@@ -0,0 +1,2 @@
+result
+0.49
diff --git a/results/Meta-Llama-3.1-8B-Instruct/avg/8000.csv b/results/Meta-Llama-3.1-8B-Instruct/avg/8000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..90476c2461795796d79aacaea3589b52b4c7a571
--- /dev/null
+++ b/results/Meta-Llama-3.1-8B-Instruct/avg/8000.csv
@@ -0,0 +1,2 @@
+result
+0.62
diff --git a/results/Mistral-7b-Instruct-v0.2/avg/0.csv b/results/Mistral-7b-Instruct-v0.2/avg/0.csv
new file mode 100644
index 0000000000000000000000000000000000000000..73185cf6124b62847537578b1975e3664741cd40
--- /dev/null
+++ b/results/Mistral-7b-Instruct-v0.2/avg/0.csv
@@ -0,0 +1,2 @@
+result
+0.6
diff --git a/results/Mistral-7b-Instruct-v0.2/avg/1000.csv b/results/Mistral-7b-Instruct-v0.2/avg/1000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..7e19811b296e6cf21c8a6d216c28c84251eb122d
--- /dev/null
+++ b/results/Mistral-7b-Instruct-v0.2/avg/1000.csv
@@ -0,0 +1,2 @@
+result
+0.56
diff --git a/results/Mistral-7b-Instruct-v0.2/avg/16000.csv b/results/Mistral-7b-Instruct-v0.2/avg/16000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..82f83f2880089b96caf335389691d4e4aefd72a4
--- /dev/null
+++ b/results/Mistral-7b-Instruct-v0.2/avg/16000.csv
@@ -0,0 +1,2 @@
+result
+0.42
diff --git a/results/Mistral-7b-Instruct-v0.2/avg/2000.csv b/results/Mistral-7b-Instruct-v0.2/avg/2000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..39467bcb402a65ea39eb4898a1c28fc11507f9c6
--- /dev/null
+++ b/results/Mistral-7b-Instruct-v0.2/avg/2000.csv
@@ -0,0 +1,2 @@
+result
+0.52
diff --git a/results/Mistral-7b-Instruct-v0.2/avg/32000.csv b/results/Mistral-7b-Instruct-v0.2/avg/32000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..ce21d5886fbcbcae20b93146113ddcd40f561ebf
--- /dev/null
+++ b/results/Mistral-7b-Instruct-v0.2/avg/32000.csv
@@ -0,0 +1,2 @@
+result
+0.37
diff --git a/results/Mistral-7b-Instruct-v0.2/avg/4000.csv b/results/Mistral-7b-Instruct-v0.2/avg/4000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..2be09b645e270be85f5fe4f17f33d1301ff5d5fe
--- /dev/null
+++ b/results/Mistral-7b-Instruct-v0.2/avg/4000.csv
@@ -0,0 +1,2 @@
+result
+0.49
diff --git a/results/Mistral-7b-Instruct-v0.2/avg/8000.csv b/results/Mistral-7b-Instruct-v0.2/avg/8000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..89df95907c79f67b0441bcd8e01916d5ed3147ca
--- /dev/null
+++ b/results/Mistral-7b-Instruct-v0.2/avg/8000.csv
@@ -0,0 +1,2 @@
+result
+0.45
diff --git a/results/Mistral/avg/0.csv b/results/Mistral/avg/0.csv
new file mode 100644
index 0000000000000000000000000000000000000000..e952da3a8db796734136b86b833a087661b59f2d
--- /dev/null
+++ b/results/Mistral/avg/0.csv
@@ -0,0 +1,2 @@
+result
+0.72
diff --git a/results/Mistral/avg/16000.csv b/results/Mistral/avg/16000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..590e72f0c5a2aea0283613b945cb8184582b980b
--- /dev/null
+++ b/results/Mistral/avg/16000.csv
@@ -0,0 +1,2 @@
+result
+0.44000000000000006
diff --git a/results/Mistral/avg/32000.csv b/results/Mistral/avg/32000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..6c2e835608d123a8b2253a3f25f1860969cf60cf
--- /dev/null
+++ b/results/Mistral/avg/32000.csv
@@ -0,0 +1,2 @@
+result
+0.372
diff --git a/results/Mistral/avg/4000.csv b/results/Mistral/avg/4000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..7d81f926ca158ab1ab70fa95f90aeb62aebdd468
--- /dev/null
+++ b/results/Mistral/avg/4000.csv
@@ -0,0 +1,2 @@
+result
+0.546
diff --git a/results/Mistral/avg/8000.csv b/results/Mistral/avg/8000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..deed529b2467e14283200bb2c6b8aca8b9c5c834
--- /dev/null
+++ b/results/Mistral/avg/8000.csv
@@ -0,0 +1,2 @@
+result
+0.5199999999999999
diff --git a/results/Mixtral-8x22B-Instruct-v0.1/avg/0.csv b/results/Mixtral-8x22B-Instruct-v0.1/avg/0.csv
new file mode 100644
index 0000000000000000000000000000000000000000..76adb21b45a8d8b7a67041be02009075571488ff
--- /dev/null
+++ b/results/Mixtral-8x22B-Instruct-v0.1/avg/0.csv
@@ -0,0 +1,2 @@
+result
+0.75
diff --git a/results/Mixtral-8x22B-Instruct-v0.1/avg/1000.csv b/results/Mixtral-8x22B-Instruct-v0.1/avg/1000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..fdeb0e98d24f032d7d0c55367ac3d84c009cbe2b
--- /dev/null
+++ b/results/Mixtral-8x22B-Instruct-v0.1/avg/1000.csv
@@ -0,0 +1,2 @@
+result
+0.73
diff --git a/results/Mixtral-8x22B-Instruct-v0.1/avg/16000.csv b/results/Mixtral-8x22B-Instruct-v0.1/avg/16000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..197c7d251ccb4bc0c03d2533cb5529c1fe530129
--- /dev/null
+++ b/results/Mixtral-8x22B-Instruct-v0.1/avg/16000.csv
@@ -0,0 +1,2 @@
+result
+0.51
diff --git a/results/Mixtral-8x22B-Instruct-v0.1/avg/2000.csv b/results/Mixtral-8x22B-Instruct-v0.1/avg/2000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..30b8677e38c6147f378843d83cdb738e20fb9c5a
--- /dev/null
+++ b/results/Mixtral-8x22B-Instruct-v0.1/avg/2000.csv
@@ -0,0 +1,2 @@
+result
+0.7
diff --git a/results/Mixtral-8x22B-Instruct-v0.1/avg/32000.csv b/results/Mixtral-8x22B-Instruct-v0.1/avg/32000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..8ded6f83954586c44ad05174b7a3b44c84e6e261
--- /dev/null
+++ b/results/Mixtral-8x22B-Instruct-v0.1/avg/32000.csv
@@ -0,0 +1,2 @@
+result
+0.43
diff --git a/results/Mixtral-8x22B-Instruct-v0.1/avg/4000.csv b/results/Mixtral-8x22B-Instruct-v0.1/avg/4000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..4966aebbf8b964c945f71bad33d92c5102844c2d
--- /dev/null
+++ b/results/Mixtral-8x22B-Instruct-v0.1/avg/4000.csv
@@ -0,0 +1,2 @@
+result
+0.65
diff --git a/results/Mixtral-8x22B-Instruct-v0.1/avg/64000.csv b/results/Mixtral-8x22B-Instruct-v0.1/avg/64000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..5be5e2b842b0151eeaaf1248a55b12e87c0f1f2c
--- /dev/null
+++ b/results/Mixtral-8x22B-Instruct-v0.1/avg/64000.csv
@@ -0,0 +1,2 @@
+result
+0.35
diff --git a/results/Mixtral-8x22B-Instruct-v0.1/avg/8000.csv b/results/Mixtral-8x22B-Instruct-v0.1/avg/8000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..cb4221b589457585a0f9e0d4400f8986ee7c41cc
--- /dev/null
+++ b/results/Mixtral-8x22B-Instruct-v0.1/avg/8000.csv
@@ -0,0 +1,2 @@
+result
+0.58
diff --git a/results/Mixtral-8x7B-Instruct-v0.1/avg/0.csv b/results/Mixtral-8x7B-Instruct-v0.1/avg/0.csv
new file mode 100644
index 0000000000000000000000000000000000000000..4966aebbf8b964c945f71bad33d92c5102844c2d
--- /dev/null
+++ b/results/Mixtral-8x7B-Instruct-v0.1/avg/0.csv
@@ -0,0 +1,2 @@
+result
+0.65
diff --git a/results/Mixtral-8x7B-Instruct-v0.1/avg/1000.csv b/results/Mixtral-8x7B-Instruct-v0.1/avg/1000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..66043869d21fb20185f28d3a911a996089595b43
--- /dev/null
+++ b/results/Mixtral-8x7B-Instruct-v0.1/avg/1000.csv
@@ -0,0 +1,2 @@
+result
+0.63
diff --git a/results/Mixtral-8x7B-Instruct-v0.1/avg/16000.csv b/results/Mixtral-8x7B-Instruct-v0.1/avg/16000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..84a35040259e9a3c25d02785e6a8333143557008
--- /dev/null
+++ b/results/Mixtral-8x7B-Instruct-v0.1/avg/16000.csv
@@ -0,0 +1,2 @@
+result
+0.46
diff --git a/results/Mixtral-8x7B-Instruct-v0.1/avg/2000.csv b/results/Mixtral-8x7B-Instruct-v0.1/avg/2000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..73185cf6124b62847537578b1975e3664741cd40
--- /dev/null
+++ b/results/Mixtral-8x7B-Instruct-v0.1/avg/2000.csv
@@ -0,0 +1,2 @@
+result
+0.6
diff --git a/results/Mixtral-8x7B-Instruct-v0.1/avg/32000.csv b/results/Mixtral-8x7B-Instruct-v0.1/avg/32000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..7ce655c90b50c89eda745a06fd2d5d6da91ed9e6
--- /dev/null
+++ b/results/Mixtral-8x7B-Instruct-v0.1/avg/32000.csv
@@ -0,0 +1,2 @@
+result
+0.4
diff --git a/results/Mixtral-8x7B-Instruct-v0.1/avg/4000.csv b/results/Mixtral-8x7B-Instruct-v0.1/avg/4000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..d1af3c1139d6c25d52f1f6e3ec43e2d84a1f69c7
--- /dev/null
+++ b/results/Mixtral-8x7B-Instruct-v0.1/avg/4000.csv
@@ -0,0 +1,2 @@
+result
+0.55
diff --git a/results/Mixtral-8x7B-Instruct-v0.1/avg/8000.csv b/results/Mixtral-8x7B-Instruct-v0.1/avg/8000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..e8921d51a14f6c6af82b659f2641c450327d151a
--- /dev/null
+++ b/results/Mixtral-8x7B-Instruct-v0.1/avg/8000.csv
@@ -0,0 +1,2 @@
+result
+0.5
diff --git a/results/Phi-3-medium-128k-instruct/avg/0.csv b/results/Phi-3-medium-128k-instruct/avg/0.csv
new file mode 100644
index 0000000000000000000000000000000000000000..e952da3a8db796734136b86b833a087661b59f2d
--- /dev/null
+++ b/results/Phi-3-medium-128k-instruct/avg/0.csv
@@ -0,0 +1,2 @@
+result
+0.72
diff --git a/results/Phi-3-medium-128k-instruct/avg/1000.csv b/results/Phi-3-medium-128k-instruct/avg/1000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..30b8677e38c6147f378843d83cdb738e20fb9c5a
--- /dev/null
+++ b/results/Phi-3-medium-128k-instruct/avg/1000.csv
@@ -0,0 +1,2 @@
+result
+0.7
diff --git a/results/Phi-3-medium-128k-instruct/avg/128000.csv b/results/Phi-3-medium-128k-instruct/avg/128000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..372e6db8948b50d2a36c8d54b399e2b73d84a10b
--- /dev/null
+++ b/results/Phi-3-medium-128k-instruct/avg/128000.csv
@@ -0,0 +1,2 @@
+result
+0.3
diff --git a/results/Phi-3-medium-128k-instruct/avg/16000.csv b/results/Phi-3-medium-128k-instruct/avg/16000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..6f7b039b44a27dd5a1d4701de8c71b443d754a95
--- /dev/null
+++ b/results/Phi-3-medium-128k-instruct/avg/16000.csv
@@ -0,0 +1,2 @@
+result
+0.57
diff --git a/results/Phi-3-medium-128k-instruct/avg/2000.csv b/results/Phi-3-medium-128k-instruct/avg/2000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..1f7c0e94d6341eed38b798bbe83f395efb572b0a
--- /dev/null
+++ b/results/Phi-3-medium-128k-instruct/avg/2000.csv
@@ -0,0 +1,2 @@
+result
+0.67
diff --git a/results/Phi-3-medium-128k-instruct/avg/32000.csv b/results/Phi-3-medium-128k-instruct/avg/32000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..b3596ba97e928d291ed1c63db4564056b44e7785
--- /dev/null
+++ b/results/Phi-3-medium-128k-instruct/avg/32000.csv
@@ -0,0 +1,2 @@
+result
+0.53
diff --git a/results/Phi-3-medium-128k-instruct/avg/4000.csv b/results/Phi-3-medium-128k-instruct/avg/4000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..90476c2461795796d79aacaea3589b52b4c7a571
--- /dev/null
+++ b/results/Phi-3-medium-128k-instruct/avg/4000.csv
@@ -0,0 +1,2 @@
+result
+0.62
diff --git a/results/Phi-3-medium-128k-instruct/avg/64000.csv b/results/Phi-3-medium-128k-instruct/avg/64000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..89df95907c79f67b0441bcd8e01916d5ed3147ca
--- /dev/null
+++ b/results/Phi-3-medium-128k-instruct/avg/64000.csv
@@ -0,0 +1,2 @@
+result
+0.45
diff --git a/results/Phi-3-medium-128k-instruct/avg/8000.csv b/results/Phi-3-medium-128k-instruct/avg/8000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..73185cf6124b62847537578b1975e3664741cd40
--- /dev/null
+++ b/results/Phi-3-medium-128k-instruct/avg/8000.csv
@@ -0,0 +1,2 @@
+result
+0.6
diff --git a/results/Phi-3-mini-128k-instruct/avg/0.csv b/results/Phi-3-mini-128k-instruct/avg/0.csv
new file mode 100644
index 0000000000000000000000000000000000000000..235fdb9d924c52d3bfbeef527b47fcca8303f6b1
--- /dev/null
+++ b/results/Phi-3-mini-128k-instruct/avg/0.csv
@@ -0,0 +1,2 @@
+result
+0.64
diff --git a/results/Phi-3-mini-128k-instruct/avg/1000.csv b/results/Phi-3-mini-128k-instruct/avg/1000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..6f7b039b44a27dd5a1d4701de8c71b443d754a95
--- /dev/null
+++ b/results/Phi-3-mini-128k-instruct/avg/1000.csv
@@ -0,0 +1,2 @@
+result
+0.57
diff --git a/results/Phi-3-mini-128k-instruct/avg/128000.csv b/results/Phi-3-mini-128k-instruct/avg/128000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..8fc9f31a839fd188fa8898df3ecfd89dd57b2c57
--- /dev/null
+++ b/results/Phi-3-mini-128k-instruct/avg/128000.csv
@@ -0,0 +1,2 @@
+result
+0.07
diff --git a/results/Phi-3-mini-128k-instruct/avg/16000.csv b/results/Phi-3-mini-128k-instruct/avg/16000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..84a35040259e9a3c25d02785e6a8333143557008
--- /dev/null
+++ b/results/Phi-3-mini-128k-instruct/avg/16000.csv
@@ -0,0 +1,2 @@
+result
+0.46
diff --git a/results/Phi-3-mini-128k-instruct/avg/2000.csv b/results/Phi-3-mini-128k-instruct/avg/2000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..d1af3c1139d6c25d52f1f6e3ec43e2d84a1f69c7
--- /dev/null
+++ b/results/Phi-3-mini-128k-instruct/avg/2000.csv
@@ -0,0 +1,2 @@
+result
+0.55
diff --git a/results/Phi-3-mini-128k-instruct/avg/32000.csv b/results/Phi-3-mini-128k-instruct/avg/32000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..82f83f2880089b96caf335389691d4e4aefd72a4
--- /dev/null
+++ b/results/Phi-3-mini-128k-instruct/avg/32000.csv
@@ -0,0 +1,2 @@
+result
+0.42
diff --git a/results/Phi-3-mini-128k-instruct/avg/4000.csv b/results/Phi-3-mini-128k-instruct/avg/4000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..197c7d251ccb4bc0c03d2533cb5529c1fe530129
--- /dev/null
+++ b/results/Phi-3-mini-128k-instruct/avg/4000.csv
@@ -0,0 +1,2 @@
+result
+0.51
diff --git a/results/Phi-3-mini-128k-instruct/avg/64000.csv b/results/Phi-3-mini-128k-instruct/avg/64000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..ce21d5886fbcbcae20b93146113ddcd40f561ebf
--- /dev/null
+++ b/results/Phi-3-mini-128k-instruct/avg/64000.csv
@@ -0,0 +1,2 @@
+result
+0.37
diff --git a/results/Phi-3-mini-128k-instruct/avg/8000.csv b/results/Phi-3-mini-128k-instruct/avg/8000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..e8921d51a14f6c6af82b659f2641c450327d151a
--- /dev/null
+++ b/results/Phi-3-mini-128k-instruct/avg/8000.csv
@@ -0,0 +1,2 @@
+result
+0.5
diff --git a/results/Yarn-Mistral-7b-128k/avg/0.csv b/results/Yarn-Mistral-7b-128k/avg/0.csv
new file mode 100644
index 0000000000000000000000000000000000000000..197c7d251ccb4bc0c03d2533cb5529c1fe530129
--- /dev/null
+++ b/results/Yarn-Mistral-7b-128k/avg/0.csv
@@ -0,0 +1,2 @@
+result
+0.51
diff --git a/results/Yarn-Mistral-7b-128k/avg/1000.csv b/results/Yarn-Mistral-7b-128k/avg/1000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..39467bcb402a65ea39eb4898a1c28fc11507f9c6
--- /dev/null
+++ b/results/Yarn-Mistral-7b-128k/avg/1000.csv
@@ -0,0 +1,2 @@
+result
+0.52
diff --git a/results/Yarn-Mistral-7b-128k/avg/128000.csv b/results/Yarn-Mistral-7b-128k/avg/128000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..0b053187266d9bf5c3e7fbc2f34fbabcbb35b1c9
--- /dev/null
+++ b/results/Yarn-Mistral-7b-128k/avg/128000.csv
@@ -0,0 +1,2 @@
+result
+0.09
diff --git a/results/Yarn-Mistral-7b-128k/avg/16000.csv b/results/Yarn-Mistral-7b-128k/avg/16000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..372e6db8948b50d2a36c8d54b399e2b73d84a10b
--- /dev/null
+++ b/results/Yarn-Mistral-7b-128k/avg/16000.csv
@@ -0,0 +1,2 @@
+result
+0.3
diff --git a/results/Yarn-Mistral-7b-128k/avg/2000.csv b/results/Yarn-Mistral-7b-128k/avg/2000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..8ded6f83954586c44ad05174b7a3b44c84e6e261
--- /dev/null
+++ b/results/Yarn-Mistral-7b-128k/avg/2000.csv
@@ -0,0 +1,2 @@
+result
+0.43
diff --git a/results/Yarn-Mistral-7b-128k/avg/32000.csv b/results/Yarn-Mistral-7b-128k/avg/32000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..f6db9c63579e516a6fccaae3048e30f40f5fb7bd
--- /dev/null
+++ b/results/Yarn-Mistral-7b-128k/avg/32000.csv
@@ -0,0 +1,2 @@
+result
+0.16
diff --git a/results/Yarn-Mistral-7b-128k/avg/4000.csv b/results/Yarn-Mistral-7b-128k/avg/4000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..7ce655c90b50c89eda745a06fd2d5d6da91ed9e6
--- /dev/null
+++ b/results/Yarn-Mistral-7b-128k/avg/4000.csv
@@ -0,0 +1,2 @@
+result
+0.4
diff --git a/results/Yarn-Mistral-7b-128k/avg/64000.csv b/results/Yarn-Mistral-7b-128k/avg/64000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..3dd2162250ba01e8e3f1241626a410b2757ba120
--- /dev/null
+++ b/results/Yarn-Mistral-7b-128k/avg/64000.csv
@@ -0,0 +1,2 @@
+result
+0.1
diff --git a/results/Yarn-Mistral-7b-128k/avg/8000.csv b/results/Yarn-Mistral-7b-128k/avg/8000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..d220a0008f0e11c42ab018c0243594c09e74f118
--- /dev/null
+++ b/results/Yarn-Mistral-7b-128k/avg/8000.csv
@@ -0,0 +1,2 @@
+result
+0.38
diff --git a/results/activation-beacon-llama2-7b-chat/avg/0.csv b/results/activation-beacon-llama2-7b-chat/avg/0.csv
new file mode 100644
index 0000000000000000000000000000000000000000..d1af3c1139d6c25d52f1f6e3ec43e2d84a1f69c7
--- /dev/null
+++ b/results/activation-beacon-llama2-7b-chat/avg/0.csv
@@ -0,0 +1,2 @@
+result
+0.55
diff --git a/results/activation-beacon-llama2-7b-chat/avg/1000.csv b/results/activation-beacon-llama2-7b-chat/avg/1000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..39467bcb402a65ea39eb4898a1c28fc11507f9c6
--- /dev/null
+++ b/results/activation-beacon-llama2-7b-chat/avg/1000.csv
@@ -0,0 +1,2 @@
+result
+0.52
diff --git a/results/activation-beacon-llama2-7b-chat/avg/128000.csv b/results/activation-beacon-llama2-7b-chat/avg/128000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..86b92dd0e26fa2fea5cc68e01132af62350a3a59
--- /dev/null
+++ b/results/activation-beacon-llama2-7b-chat/avg/128000.csv
@@ -0,0 +1,2 @@
+result
+0.06
diff --git a/results/activation-beacon-llama2-7b-chat/avg/16000.csv b/results/activation-beacon-llama2-7b-chat/avg/16000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..5ab29f154b21da70f9778e41715bff64df1d4d64
--- /dev/null
+++ b/results/activation-beacon-llama2-7b-chat/avg/16000.csv
@@ -0,0 +1,2 @@
+result
+0.23
diff --git a/results/activation-beacon-llama2-7b-chat/avg/2000.csv b/results/activation-beacon-llama2-7b-chat/avg/2000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..153d5a3b84e4e0e48ae0076c64cf47afb8b264f0
--- /dev/null
+++ b/results/activation-beacon-llama2-7b-chat/avg/2000.csv
@@ -0,0 +1,2 @@
+result
+0.47
diff --git a/results/activation-beacon-llama2-7b-chat/avg/32000.csv b/results/activation-beacon-llama2-7b-chat/avg/32000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..f6db9c63579e516a6fccaae3048e30f40f5fb7bd
--- /dev/null
+++ b/results/activation-beacon-llama2-7b-chat/avg/32000.csv
@@ -0,0 +1,2 @@
+result
+0.16
diff --git a/results/activation-beacon-llama2-7b-chat/avg/4000.csv b/results/activation-beacon-llama2-7b-chat/avg/4000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..8ded6f83954586c44ad05174b7a3b44c84e6e261
--- /dev/null
+++ b/results/activation-beacon-llama2-7b-chat/avg/4000.csv
@@ -0,0 +1,2 @@
+result
+0.43
diff --git a/results/activation-beacon-llama2-7b-chat/avg/64000.csv b/results/activation-beacon-llama2-7b-chat/avg/64000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..1bbf04acff1bd08a47015d76283cad0180c9e78c
--- /dev/null
+++ b/results/activation-beacon-llama2-7b-chat/avg/64000.csv
@@ -0,0 +1,2 @@
+result
+0.08
diff --git a/results/activation-beacon-llama2-7b-chat/avg/8000.csv b/results/activation-beacon-llama2-7b-chat/avg/8000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..740a999619db874dd9e457f930dc8f01c8c78f33
--- /dev/null
+++ b/results/activation-beacon-llama2-7b-chat/avg/8000.csv
@@ -0,0 +1,2 @@
+result
+0.36
diff --git a/results/activation-beacon-mistral-7b/avg/0.csv b/results/activation-beacon-mistral-7b/avg/0.csv
new file mode 100644
index 0000000000000000000000000000000000000000..1fa3b0194650ca40fe580d554b8cf29e18fb4d4b
--- /dev/null
+++ b/results/activation-beacon-mistral-7b/avg/0.csv
@@ -0,0 +1,2 @@
+result
+0.59
diff --git a/results/activation-beacon-mistral-7b/avg/1000.csv b/results/activation-beacon-mistral-7b/avg/1000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..7e19811b296e6cf21c8a6d216c28c84251eb122d
--- /dev/null
+++ b/results/activation-beacon-mistral-7b/avg/1000.csv
@@ -0,0 +1,2 @@
+result
+0.56
diff --git a/results/activation-beacon-mistral-7b/avg/128000.csv b/results/activation-beacon-mistral-7b/avg/128000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..2e4950cdc3d91596fe0128c450b34c9b0e91d91b
--- /dev/null
+++ b/results/activation-beacon-mistral-7b/avg/128000.csv
@@ -0,0 +1,2 @@
+result
+0.14
diff --git a/results/activation-beacon-mistral-7b/avg/16000.csv b/results/activation-beacon-mistral-7b/avg/16000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..ce21d5886fbcbcae20b93146113ddcd40f561ebf
--- /dev/null
+++ b/results/activation-beacon-mistral-7b/avg/16000.csv
@@ -0,0 +1,2 @@
+result
+0.37
diff --git a/results/activation-beacon-mistral-7b/avg/2000.csv b/results/activation-beacon-mistral-7b/avg/2000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..197c7d251ccb4bc0c03d2533cb5529c1fe530129
--- /dev/null
+++ b/results/activation-beacon-mistral-7b/avg/2000.csv
@@ -0,0 +1,2 @@
+result
+0.51
diff --git a/results/activation-beacon-mistral-7b/avg/32000.csv b/results/activation-beacon-mistral-7b/avg/32000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..740a999619db874dd9e457f930dc8f01c8c78f33
--- /dev/null
+++ b/results/activation-beacon-mistral-7b/avg/32000.csv
@@ -0,0 +1,2 @@
+result
+0.36
diff --git a/results/activation-beacon-mistral-7b/avg/4000.csv b/results/activation-beacon-mistral-7b/avg/4000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..ea17e680f171042bd7e193d2fe91edb1f4a271e9
--- /dev/null
+++ b/results/activation-beacon-mistral-7b/avg/4000.csv
@@ -0,0 +1,2 @@
+result
+0.48
diff --git a/results/activation-beacon-mistral-7b/avg/64000.csv b/results/activation-beacon-mistral-7b/avg/64000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..0a5e1292a7ec9eec0be5ab1ed37f761feec7241b
--- /dev/null
+++ b/results/activation-beacon-mistral-7b/avg/64000.csv
@@ -0,0 +1,2 @@
+result
+0.27
diff --git a/results/activation-beacon-mistral-7b/avg/8000.csv b/results/activation-beacon-mistral-7b/avg/8000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..8ded6f83954586c44ad05174b7a3b44c84e6e261
--- /dev/null
+++ b/results/activation-beacon-mistral-7b/avg/8000.csv
@@ -0,0 +1,2 @@
+result
+0.43
diff --git a/results/ai21labs Jamba-v0.1/avg/0.csv b/results/ai21labs Jamba-v0.1/avg/0.csv
new file mode 100644
index 0000000000000000000000000000000000000000..4966aebbf8b964c945f71bad33d92c5102844c2d
--- /dev/null
+++ b/results/ai21labs Jamba-v0.1/avg/0.csv
@@ -0,0 +1,2 @@
+result
+0.65
diff --git a/results/ai21labs Jamba-v0.1/avg/1000.csv b/results/ai21labs Jamba-v0.1/avg/1000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..b3596ba97e928d291ed1c63db4564056b44e7785
--- /dev/null
+++ b/results/ai21labs Jamba-v0.1/avg/1000.csv
@@ -0,0 +1,2 @@
+result
+0.53
diff --git a/results/ai21labs Jamba-v0.1/avg/128000.csv b/results/ai21labs Jamba-v0.1/avg/128000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..a35ac122d663d8cbc1c0af4a9995f6290585cd61
--- /dev/null
+++ b/results/ai21labs Jamba-v0.1/avg/128000.csv
@@ -0,0 +1,2 @@
+result
+0.34
diff --git a/results/ai21labs Jamba-v0.1/avg/16000.csv b/results/ai21labs Jamba-v0.1/avg/16000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..89df95907c79f67b0441bcd8e01916d5ed3147ca
--- /dev/null
+++ b/results/ai21labs Jamba-v0.1/avg/16000.csv
@@ -0,0 +1,2 @@
+result
+0.45
diff --git a/results/ai21labs Jamba-v0.1/avg/2000.csv b/results/ai21labs Jamba-v0.1/avg/2000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..e8921d51a14f6c6af82b659f2641c450327d151a
--- /dev/null
+++ b/results/ai21labs Jamba-v0.1/avg/2000.csv
@@ -0,0 +1,2 @@
+result
+0.5
diff --git a/results/ai21labs Jamba-v0.1/avg/32000.csv b/results/ai21labs Jamba-v0.1/avg/32000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..67a246541636923a7023f90c1eda15aeb026f580
--- /dev/null
+++ b/results/ai21labs Jamba-v0.1/avg/32000.csv
@@ -0,0 +1,2 @@
+result
+0.41
diff --git a/results/ai21labs Jamba-v0.1/avg/4000.csv b/results/ai21labs Jamba-v0.1/avg/4000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..ea17e680f171042bd7e193d2fe91edb1f4a271e9
--- /dev/null
+++ b/results/ai21labs Jamba-v0.1/avg/4000.csv
@@ -0,0 +1,2 @@
+result
+0.48
diff --git a/results/ai21labs Jamba-v0.1/avg/64000.csv b/results/ai21labs Jamba-v0.1/avg/64000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..7ce655c90b50c89eda745a06fd2d5d6da91ed9e6
--- /dev/null
+++ b/results/ai21labs Jamba-v0.1/avg/64000.csv
@@ -0,0 +1,2 @@
+result
+0.4
diff --git a/results/ai21labs Jamba-v0.1/avg/8000.csv b/results/ai21labs Jamba-v0.1/avg/8000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..84a35040259e9a3c25d02785e6a8333143557008
--- /dev/null
+++ b/results/ai21labs Jamba-v0.1/avg/8000.csv
@@ -0,0 +1,2 @@
+result
+0.46
diff --git a/results/c4ai-command-r-v01/avg/0.csv b/results/c4ai-command-r-v01/avg/0.csv
new file mode 100644
index 0000000000000000000000000000000000000000..235fdb9d924c52d3bfbeef527b47fcca8303f6b1
--- /dev/null
+++ b/results/c4ai-command-r-v01/avg/0.csv
@@ -0,0 +1,2 @@
+result
+0.64
diff --git a/results/c4ai-command-r-v01/avg/1000.csv b/results/c4ai-command-r-v01/avg/1000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..235fdb9d924c52d3bfbeef527b47fcca8303f6b1
--- /dev/null
+++ b/results/c4ai-command-r-v01/avg/1000.csv
@@ -0,0 +1,2 @@
+result
+0.64
diff --git a/results/c4ai-command-r-v01/avg/128000.csv b/results/c4ai-command-r-v01/avg/128000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..d220a0008f0e11c42ab018c0243594c09e74f118
--- /dev/null
+++ b/results/c4ai-command-r-v01/avg/128000.csv
@@ -0,0 +1,2 @@
+result
+0.38
diff --git a/results/c4ai-command-r-v01/avg/16000.csv b/results/c4ai-command-r-v01/avg/16000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..39467bcb402a65ea39eb4898a1c28fc11507f9c6
--- /dev/null
+++ b/results/c4ai-command-r-v01/avg/16000.csv
@@ -0,0 +1,2 @@
+result
+0.52
diff --git a/results/c4ai-command-r-v01/avg/2000.csv b/results/c4ai-command-r-v01/avg/2000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..66043869d21fb20185f28d3a911a996089595b43
--- /dev/null
+++ b/results/c4ai-command-r-v01/avg/2000.csv
@@ -0,0 +1,2 @@
+result
+0.63
diff --git a/results/c4ai-command-r-v01/avg/32000.csv b/results/c4ai-command-r-v01/avg/32000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..197c7d251ccb4bc0c03d2533cb5529c1fe530129
--- /dev/null
+++ b/results/c4ai-command-r-v01/avg/32000.csv
@@ -0,0 +1,2 @@
+result
+0.51
diff --git a/results/c4ai-command-r-v01/avg/4000.csv b/results/c4ai-command-r-v01/avg/4000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..d20e89361832ddf1abbe151b539dc5cbc2fdb159
--- /dev/null
+++ b/results/c4ai-command-r-v01/avg/4000.csv
@@ -0,0 +1,2 @@
+result
+0.61
diff --git a/results/c4ai-command-r-v01/avg/64000.csv b/results/c4ai-command-r-v01/avg/64000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..84a35040259e9a3c25d02785e6a8333143557008
--- /dev/null
+++ b/results/c4ai-command-r-v01/avg/64000.csv
@@ -0,0 +1,2 @@
+result
+0.46
diff --git a/results/c4ai-command-r-v01/avg/8000.csv b/results/c4ai-command-r-v01/avg/8000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..1fa3b0194650ca40fe580d554b8cf29e18fb4d4b
--- /dev/null
+++ b/results/c4ai-command-r-v01/avg/8000.csv
@@ -0,0 +1,2 @@
+result
+0.59
diff --git a/results/chatglm3-6b-128k/avg/0.csv b/results/chatglm3-6b-128k/avg/0.csv
new file mode 100644
index 0000000000000000000000000000000000000000..7e19811b296e6cf21c8a6d216c28c84251eb122d
--- /dev/null
+++ b/results/chatglm3-6b-128k/avg/0.csv
@@ -0,0 +1,2 @@
+result
+0.56
diff --git a/results/chatglm3-6b-128k/avg/1000.csv b/results/chatglm3-6b-128k/avg/1000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..d1af3c1139d6c25d52f1f6e3ec43e2d84a1f69c7
--- /dev/null
+++ b/results/chatglm3-6b-128k/avg/1000.csv
@@ -0,0 +1,2 @@
+result
+0.55
diff --git a/results/chatglm3-6b-128k/avg/128000.csv b/results/chatglm3-6b-128k/avg/128000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..a2b1af532eedf76087542137a189b2da2c33a421
--- /dev/null
+++ b/results/chatglm3-6b-128k/avg/128000.csv
@@ -0,0 +1,2 @@
+result
+0.13
diff --git a/results/chatglm3-6b-128k/avg/16000.csv b/results/chatglm3-6b-128k/avg/16000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..67a246541636923a7023f90c1eda15aeb026f580
--- /dev/null
+++ b/results/chatglm3-6b-128k/avg/16000.csv
@@ -0,0 +1,2 @@
+result
+0.41
diff --git a/results/chatglm3-6b-128k/avg/2000.csv b/results/chatglm3-6b-128k/avg/2000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..197c7d251ccb4bc0c03d2533cb5529c1fe530129
--- /dev/null
+++ b/results/chatglm3-6b-128k/avg/2000.csv
@@ -0,0 +1,2 @@
+result
+0.51
diff --git a/results/chatglm3-6b-128k/avg/32000.csv b/results/chatglm3-6b-128k/avg/32000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..740a999619db874dd9e457f930dc8f01c8c78f33
--- /dev/null
+++ b/results/chatglm3-6b-128k/avg/32000.csv
@@ -0,0 +1,2 @@
+result
+0.36
diff --git a/results/chatglm3-6b-128k/avg/4000.csv b/results/chatglm3-6b-128k/avg/4000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..ea17e680f171042bd7e193d2fe91edb1f4a271e9
--- /dev/null
+++ b/results/chatglm3-6b-128k/avg/4000.csv
@@ -0,0 +1,2 @@
+result
+0.48
diff --git a/results/chatglm3-6b-128k/avg/64000.csv b/results/chatglm3-6b-128k/avg/64000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..246a133d32405f187fce2a1d910781fb6ed86d87
--- /dev/null
+++ b/results/chatglm3-6b-128k/avg/64000.csv
@@ -0,0 +1,2 @@
+result
+0.21
diff --git a/results/chatglm3-6b-128k/avg/8000.csv b/results/chatglm3-6b-128k/avg/8000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..84a35040259e9a3c25d02785e6a8333143557008
--- /dev/null
+++ b/results/chatglm3-6b-128k/avg/8000.csv
@@ -0,0 +1,2 @@
+result
+0.46
diff --git a/results/longchat-7b-v1.5-32k/avg/0.csv b/results/longchat-7b-v1.5-32k/avg/0.csv
new file mode 100644
index 0000000000000000000000000000000000000000..84a35040259e9a3c25d02785e6a8333143557008
--- /dev/null
+++ b/results/longchat-7b-v1.5-32k/avg/0.csv
@@ -0,0 +1,2 @@
+result
+0.46
diff --git a/results/longchat-7b-v1.5-32k/avg/1000.csv b/results/longchat-7b-v1.5-32k/avg/1000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..82f83f2880089b96caf335389691d4e4aefd72a4
--- /dev/null
+++ b/results/longchat-7b-v1.5-32k/avg/1000.csv
@@ -0,0 +1,2 @@
+result
+0.42
diff --git a/results/longchat-7b-v1.5-32k/avg/16000.csv b/results/longchat-7b-v1.5-32k/avg/16000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..090b0cf09c2910e11dbaf6d48af4c6ae4efe87ad
--- /dev/null
+++ b/results/longchat-7b-v1.5-32k/avg/16000.csv
@@ -0,0 +1,2 @@
+result
+0.39
diff --git a/results/longchat-7b-v1.5-32k/avg/2000.csv b/results/longchat-7b-v1.5-32k/avg/2000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..7ce655c90b50c89eda745a06fd2d5d6da91ed9e6
--- /dev/null
+++ b/results/longchat-7b-v1.5-32k/avg/2000.csv
@@ -0,0 +1,2 @@
+result
+0.4
diff --git a/results/longchat-7b-v1.5-32k/avg/32000.csv b/results/longchat-7b-v1.5-32k/avg/32000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..11a090972d80aec2ff845c30aab47407be2352d9
--- /dev/null
+++ b/results/longchat-7b-v1.5-32k/avg/32000.csv
@@ -0,0 +1,2 @@
+result
+0.05
diff --git a/results/longchat-7b-v1.5-32k/avg/4000.csv b/results/longchat-7b-v1.5-32k/avg/4000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..67a246541636923a7023f90c1eda15aeb026f580
--- /dev/null
+++ b/results/longchat-7b-v1.5-32k/avg/4000.csv
@@ -0,0 +1,2 @@
+result
+0.41
diff --git a/results/longchat-7b-v1.5-32k/avg/8000.csv b/results/longchat-7b-v1.5-32k/avg/8000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..82f83f2880089b96caf335389691d4e4aefd72a4
--- /dev/null
+++ b/results/longchat-7b-v1.5-32k/avg/8000.csv
@@ -0,0 +1,2 @@
+result
+0.42
diff --git a/results/mamba-2.8b-hf/avg/0.csv b/results/mamba-2.8b-hf/avg/0.csv
new file mode 100644
index 0000000000000000000000000000000000000000..30b8677e38c6147f378843d83cdb738e20fb9c5a
--- /dev/null
+++ b/results/mamba-2.8b-hf/avg/0.csv
@@ -0,0 +1,2 @@
+result
+0.7
diff --git a/results/mamba-2.8b-hf/avg/1000.csv b/results/mamba-2.8b-hf/avg/1000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..39467bcb402a65ea39eb4898a1c28fc11507f9c6
--- /dev/null
+++ b/results/mamba-2.8b-hf/avg/1000.csv
@@ -0,0 +1,2 @@
+result
+0.52
diff --git a/results/mamba-2.8b-hf/avg/2000.csv b/results/mamba-2.8b-hf/avg/2000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..5be5e2b842b0151eeaaf1248a55b12e87c0f1f2c
--- /dev/null
+++ b/results/mamba-2.8b-hf/avg/2000.csv
@@ -0,0 +1,2 @@
+result
+0.35
diff --git a/results/mamba-2.8b-hf/avg/4000.csv b/results/mamba-2.8b-hf/avg/4000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..0b053187266d9bf5c3e7fbc2f34fbabcbb35b1c9
--- /dev/null
+++ b/results/mamba-2.8b-hf/avg/4000.csv
@@ -0,0 +1,2 @@
+result
+0.09
diff --git a/results/mamba-2.8b-hf/avg/8000.csv b/results/mamba-2.8b-hf/avg/8000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..9e437f1fcd823b898c1f4556c0805a4b31957695
--- /dev/null
+++ b/results/mamba-2.8b-hf/avg/8000.csv
@@ -0,0 +1,2 @@
+result
+0.0
diff --git a/results/rwkv-6-world-7b/avg/0.csv b/results/rwkv-6-world-7b/avg/0.csv
new file mode 100644
index 0000000000000000000000000000000000000000..7e19811b296e6cf21c8a6d216c28c84251eb122d
--- /dev/null
+++ b/results/rwkv-6-world-7b/avg/0.csv
@@ -0,0 +1,2 @@
+result
+0.56
diff --git a/results/rwkv-6-world-7b/avg/1000.csv b/results/rwkv-6-world-7b/avg/1000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..d1af3c1139d6c25d52f1f6e3ec43e2d84a1f69c7
--- /dev/null
+++ b/results/rwkv-6-world-7b/avg/1000.csv
@@ -0,0 +1,2 @@
+result
+0.55
diff --git a/results/rwkv-6-world-7b/avg/2000.csv b/results/rwkv-6-world-7b/avg/2000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..ea17e680f171042bd7e193d2fe91edb1f4a271e9
--- /dev/null
+++ b/results/rwkv-6-world-7b/avg/2000.csv
@@ -0,0 +1,2 @@
+result
+0.48
diff --git a/results/rwkv-6-world-7b/avg/4000.csv b/results/rwkv-6-world-7b/avg/4000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..5be5e2b842b0151eeaaf1248a55b12e87c0f1f2c
--- /dev/null
+++ b/results/rwkv-6-world-7b/avg/4000.csv
@@ -0,0 +1,2 @@
+result
+0.35
diff --git a/results/rwkv-6-world-7b/avg/8000.csv b/results/rwkv-6-world-7b/avg/8000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..8fc9f31a839fd188fa8898df3ecfd89dd57b2c57
--- /dev/null
+++ b/results/rwkv-6-world-7b/avg/8000.csv
@@ -0,0 +1,2 @@
+result
+0.07
diff --git a/results/v5-Eagle-7B-HF/avg/0.csv b/results/v5-Eagle-7B-HF/avg/0.csv
new file mode 100644
index 0000000000000000000000000000000000000000..90476c2461795796d79aacaea3589b52b4c7a571
--- /dev/null
+++ b/results/v5-Eagle-7B-HF/avg/0.csv
@@ -0,0 +1,2 @@
+result
+0.62
diff --git a/results/v5-Eagle-7B-HF/avg/1000.csv b/results/v5-Eagle-7B-HF/avg/1000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..55837811362d6f27a37b7a72f0edcb238c90ccf6
--- /dev/null
+++ b/results/v5-Eagle-7B-HF/avg/1000.csv
@@ -0,0 +1,2 @@
+result
+0.54
diff --git a/results/v5-Eagle-7B-HF/avg/2000.csv b/results/v5-Eagle-7B-HF/avg/2000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..ea17e680f171042bd7e193d2fe91edb1f4a271e9
--- /dev/null
+++ b/results/v5-Eagle-7B-HF/avg/2000.csv
@@ -0,0 +1,2 @@
+result
+0.48
diff --git a/results/v5-Eagle-7B-HF/avg/4000.csv b/results/v5-Eagle-7B-HF/avg/4000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..67a246541636923a7023f90c1eda15aeb026f580
--- /dev/null
+++ b/results/v5-Eagle-7B-HF/avg/4000.csv
@@ -0,0 +1,2 @@
+result
+0.41
diff --git a/results/v5-Eagle-7B-HF/avg/8000.csv b/results/v5-Eagle-7B-HF/avg/8000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..4acadf7f9b2babb34a649576957ab9cd4145f88d
--- /dev/null
+++ b/results/v5-Eagle-7B-HF/avg/8000.csv
@@ -0,0 +1,2 @@
+result
+0.02
diff --git a/results/~ ARMT (137M) fine-tune/avg/0.csv b/results/~ ARMT (137M) fine-tune/avg/0.csv
new file mode 100644
index 0000000000000000000000000000000000000000..b8927d83d4b33cf0391dc05c87ce4da1b1666c64
--- /dev/null
+++ b/results/~ ARMT (137M) fine-tune/avg/0.csv
@@ -0,0 +1,2 @@
+result
+0.9932
diff --git a/results/~ ARMT (137M) fine-tune/avg/1000000.csv b/results/~ ARMT (137M) fine-tune/avg/1000000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..b4f630a2846c9003dd0d35ef4c9ec7f55a3bc893
--- /dev/null
+++ b/results/~ ARMT (137M) fine-tune/avg/1000000.csv
@@ -0,0 +1,2 @@
+result
+0.934
diff --git a/results/~ ARMT (137M) fine-tune/avg/10000000.csv b/results/~ ARMT (137M) fine-tune/avg/10000000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..dd32bf97d7fd4d9d175735f83d69d7507364b018
--- /dev/null
+++ b/results/~ ARMT (137M) fine-tune/avg/10000000.csv
@@ -0,0 +1,2 @@
+result
+0.7659999999999999
diff --git a/results/~ ARMT (137M) fine-tune/avg/128000.csv b/results/~ ARMT (137M) fine-tune/avg/128000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..bba3edb4d9e88824b6cb532710a74d3d2081266e
--- /dev/null
+++ b/results/~ ARMT (137M) fine-tune/avg/128000.csv
@@ -0,0 +1,2 @@
+result
+0.9690000000000001
diff --git a/results/~ ARMT (137M) fine-tune/avg/16000.csv b/results/~ ARMT (137M) fine-tune/avg/16000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..a954a79438d45afec3f9650e2d151eeb609e3ca8
--- /dev/null
+++ b/results/~ ARMT (137M) fine-tune/avg/16000.csv
@@ -0,0 +1,2 @@
+result
+0.981
diff --git a/results/~ ARMT (137M) fine-tune/avg/32000.csv b/results/~ ARMT (137M) fine-tune/avg/32000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..9b92468b080e05d8e2a04b3f2f443e2ecdb03cdc
--- /dev/null
+++ b/results/~ ARMT (137M) fine-tune/avg/32000.csv
@@ -0,0 +1,2 @@
+result
+0.98
diff --git a/results/~ ARMT (137M) fine-tune/avg/4000.csv b/results/~ ARMT (137M) fine-tune/avg/4000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..a954a79438d45afec3f9650e2d151eeb609e3ca8
--- /dev/null
+++ b/results/~ ARMT (137M) fine-tune/avg/4000.csv
@@ -0,0 +1,2 @@
+result
+0.981
diff --git a/results/~ ARMT (137M) fine-tune/avg/500000.csv b/results/~ ARMT (137M) fine-tune/avg/500000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..00e3093e5457665a78622bd377dc5c77bcfa8549
--- /dev/null
+++ b/results/~ ARMT (137M) fine-tune/avg/500000.csv
@@ -0,0 +1,2 @@
+result
+0.953
diff --git a/results/~ ARMT (137M) fine-tune/avg/64000.csv b/results/~ ARMT (137M) fine-tune/avg/64000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..3e5622225f7370911f29a7052015db97da8ae02a
--- /dev/null
+++ b/results/~ ARMT (137M) fine-tune/avg/64000.csv
@@ -0,0 +1,2 @@
+result
+0.9790000000000001
diff --git a/results/~ ARMT (137M) fine-tune/avg/8000.csv b/results/~ ARMT (137M) fine-tune/avg/8000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..a3476c7e769bcb5d3d2f0b6a740cd784f154f903
--- /dev/null
+++ b/results/~ ARMT (137M) fine-tune/avg/8000.csv
@@ -0,0 +1,2 @@
+result
+0.982
diff --git a/results/~ Mamba (130M) fine-tune/avg/128000.csv b/results/~ Mamba (130M) fine-tune/avg/128000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..b25ae7f18f736e6f40e42e2e728e5595fd2d23ea
--- /dev/null
+++ b/results/~ Mamba (130M) fine-tune/avg/128000.csv
@@ -0,0 +1,2 @@
+result
+0.925
diff --git a/results/~ Mamba (130M) fine-tune/avg/16000.csv b/results/~ Mamba (130M) fine-tune/avg/16000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..56ee6362b1194a7b16b0a90570a722d25caea106
--- /dev/null
+++ b/results/~ Mamba (130M) fine-tune/avg/16000.csv
@@ -0,0 +1,2 @@
+result
+0.985
diff --git a/results/~ Mamba (130M) fine-tune/avg/32000.csv b/results/~ Mamba (130M) fine-tune/avg/32000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..a954a79438d45afec3f9650e2d151eeb609e3ca8
--- /dev/null
+++ b/results/~ Mamba (130M) fine-tune/avg/32000.csv
@@ -0,0 +1,2 @@
+result
+0.981
diff --git a/results/~ Mamba (130M) fine-tune/avg/4000.csv b/results/~ Mamba (130M) fine-tune/avg/4000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..19164839373fca15d08954246496aecda5c86ef2
--- /dev/null
+++ b/results/~ Mamba (130M) fine-tune/avg/4000.csv
@@ -0,0 +1,2 @@
+result
+0.987
diff --git a/results/~ Mamba (130M) fine-tune/avg/64000.csv b/results/~ Mamba (130M) fine-tune/avg/64000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..f403ecb7accc530a5f2556fc20afd0b451757307
--- /dev/null
+++ b/results/~ Mamba (130M) fine-tune/avg/64000.csv
@@ -0,0 +1,2 @@
+result
+0.97
diff --git a/results/~ Mamba (130M) fine-tune/avg/8000.csv b/results/~ Mamba (130M) fine-tune/avg/8000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..56ee6362b1194a7b16b0a90570a722d25caea106
--- /dev/null
+++ b/results/~ Mamba (130M) fine-tune/avg/8000.csv
@@ -0,0 +1,2 @@
+result
+0.985
diff --git a/results/~ RMT (137M) fine-tune/avg/0.csv b/results/~ RMT (137M) fine-tune/avg/0.csv
new file mode 100644
index 0000000000000000000000000000000000000000..e47efd6649b0171394f69ceba42ed5dce4493485
--- /dev/null
+++ b/results/~ RMT (137M) fine-tune/avg/0.csv
@@ -0,0 +1,2 @@
+result
+0.9936
diff --git a/results/~ RMT (137M) fine-tune/avg/1000.csv b/results/~ RMT (137M) fine-tune/avg/1000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..c21f27b70cd7f93717d8b59c2455fbf253fac75f
--- /dev/null
+++ b/results/~ RMT (137M) fine-tune/avg/1000.csv
@@ -0,0 +1,2 @@
+result
+0.9740000000000001
diff --git a/results/~ RMT (137M) fine-tune/avg/1000000.csv b/results/~ RMT (137M) fine-tune/avg/1000000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..3bed8979178f1b6de75350884df2d71cc103c3bb
--- /dev/null
+++ b/results/~ RMT (137M) fine-tune/avg/1000000.csv
@@ -0,0 +1,2 @@
+result
+0.42840000000000006
diff --git a/results/~ RMT (137M) fine-tune/avg/10000000.csv b/results/~ RMT (137M) fine-tune/avg/10000000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..c64e035e06a8895022768bfb1db25dd36fb0e67f
--- /dev/null
+++ b/results/~ RMT (137M) fine-tune/avg/10000000.csv
@@ -0,0 +1,2 @@
+result
+0.3378
diff --git a/results/~ RMT (137M) fine-tune/avg/128000.csv b/results/~ RMT (137M) fine-tune/avg/128000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..0e6c9100642132c9129d9ddc8ff416086575e902
--- /dev/null
+++ b/results/~ RMT (137M) fine-tune/avg/128000.csv
@@ -0,0 +1,2 @@
+result
+0.5852
diff --git a/results/~ RMT (137M) fine-tune/avg/16000.csv b/results/~ RMT (137M) fine-tune/avg/16000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..595b1f56b99e4dc155c6eefcf84362994631e27b
--- /dev/null
+++ b/results/~ RMT (137M) fine-tune/avg/16000.csv
@@ -0,0 +1,2 @@
+result
+0.8562000000000001
diff --git a/results/~ RMT (137M) fine-tune/avg/2000.csv b/results/~ RMT (137M) fine-tune/avg/2000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..c47e2d064a6f39ce226b148d38f75fd71c14d96f
--- /dev/null
+++ b/results/~ RMT (137M) fine-tune/avg/2000.csv
@@ -0,0 +1,2 @@
+result
+0.9466
diff --git a/results/~ RMT (137M) fine-tune/avg/32000.csv b/results/~ RMT (137M) fine-tune/avg/32000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..619306552f9ce4b4f586635db4bc54262c01ea21
--- /dev/null
+++ b/results/~ RMT (137M) fine-tune/avg/32000.csv
@@ -0,0 +1,2 @@
+result
+0.7787999999999999
diff --git a/results/~ RMT (137M) fine-tune/avg/4000.csv b/results/~ RMT (137M) fine-tune/avg/4000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..065d9ec8479b0d5cb4efad01c8485f571e5e0b9b
--- /dev/null
+++ b/results/~ RMT (137M) fine-tune/avg/4000.csv
@@ -0,0 +1,2 @@
+result
+0.9231999999999999
diff --git a/results/~ RMT (137M) fine-tune/avg/500000.csv b/results/~ RMT (137M) fine-tune/avg/500000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..41d218450ceac5c3e3a7e3a8b6994fa5aa47794b
--- /dev/null
+++ b/results/~ RMT (137M) fine-tune/avg/500000.csv
@@ -0,0 +1,2 @@
+result
+0.4636
diff --git a/results/~ RMT (137M) fine-tune/avg/64000.csv b/results/~ RMT (137M) fine-tune/avg/64000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..ba0605a0ef7c4301d86891483096c00d9f81eb49
--- /dev/null
+++ b/results/~ RMT (137M) fine-tune/avg/64000.csv
@@ -0,0 +1,2 @@
+result
+0.6986
diff --git a/results/~ RMT (137M) fine-tune/avg/8000.csv b/results/~ RMT (137M) fine-tune/avg/8000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..ed71caa3e0f078b4fb96916f7e257043d4d228cc
--- /dev/null
+++ b/results/~ RMT (137M) fine-tune/avg/8000.csv
@@ -0,0 +1,2 @@
+result
+0.899
diff --git a/results/~ RMT-Retrieval (137M) fine-tune/avg/0.csv b/results/~ RMT-Retrieval (137M) fine-tune/avg/0.csv
new file mode 100644
index 0000000000000000000000000000000000000000..d7ed0ed39053e3c82b2e022596158d8c5c904f39
--- /dev/null
+++ b/results/~ RMT-Retrieval (137M) fine-tune/avg/0.csv
@@ -0,0 +1,2 @@
+result
+0.9805999999999999
diff --git a/results/~ RMT-Retrieval (137M) fine-tune/avg/1000000.csv b/results/~ RMT-Retrieval (137M) fine-tune/avg/1000000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..a9f022f986a77dd6eda149640bc9fe167a69f969
--- /dev/null
+++ b/results/~ RMT-Retrieval (137M) fine-tune/avg/1000000.csv
@@ -0,0 +1,2 @@
+result
+0.4436
diff --git a/results/~ RMT-Retrieval (137M) fine-tune/avg/10000000.csv b/results/~ RMT-Retrieval (137M) fine-tune/avg/10000000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..74044b719a3a56d4310b21610f91957b2aad2ad0
--- /dev/null
+++ b/results/~ RMT-Retrieval (137M) fine-tune/avg/10000000.csv
@@ -0,0 +1,2 @@
+result
+0.32880000000000004
diff --git a/results/~ RMT-Retrieval (137M) fine-tune/avg/128000.csv b/results/~ RMT-Retrieval (137M) fine-tune/avg/128000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..d5ace03e36cda133583a570388078f97b1785064
--- /dev/null
+++ b/results/~ RMT-Retrieval (137M) fine-tune/avg/128000.csv
@@ -0,0 +1,2 @@
+result
+0.6476
diff --git a/results/~ RMT-Retrieval (137M) fine-tune/avg/16000.csv b/results/~ RMT-Retrieval (137M) fine-tune/avg/16000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..fbc465a8646b1c3a66bb4f402dec446f25cca5ef
--- /dev/null
+++ b/results/~ RMT-Retrieval (137M) fine-tune/avg/16000.csv
@@ -0,0 +1,2 @@
+result
+0.8798
diff --git a/results/~ RMT-Retrieval (137M) fine-tune/avg/32000.csv b/results/~ RMT-Retrieval (137M) fine-tune/avg/32000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..b9dc297afec7799ffd4c4abb495943c7c6318a3f
--- /dev/null
+++ b/results/~ RMT-Retrieval (137M) fine-tune/avg/32000.csv
@@ -0,0 +1,2 @@
+result
+0.8276
diff --git a/results/~ RMT-Retrieval (137M) fine-tune/avg/4000.csv b/results/~ RMT-Retrieval (137M) fine-tune/avg/4000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..f203a347ff9be5874734e6015508d0b3186afec2
--- /dev/null
+++ b/results/~ RMT-Retrieval (137M) fine-tune/avg/4000.csv
@@ -0,0 +1,2 @@
+result
+0.9260000000000002
diff --git a/results/~ RMT-Retrieval (137M) fine-tune/avg/500000.csv b/results/~ RMT-Retrieval (137M) fine-tune/avg/500000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..73017fd66de0a180c83fa309ebad2c236567c2b2
--- /dev/null
+++ b/results/~ RMT-Retrieval (137M) fine-tune/avg/500000.csv
@@ -0,0 +1,2 @@
+result
+0.4856
diff --git a/results/~ RMT-Retrieval (137M) fine-tune/avg/64000.csv b/results/~ RMT-Retrieval (137M) fine-tune/avg/64000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..07f95a7be03251a3c6af20678c35f265dcb4563f
--- /dev/null
+++ b/results/~ RMT-Retrieval (137M) fine-tune/avg/64000.csv
@@ -0,0 +1,2 @@
+result
+0.7384000000000001
diff --git a/results/~ RMT-Retrieval (137M) fine-tune/avg/8000.csv b/results/~ RMT-Retrieval (137M) fine-tune/avg/8000.csv
new file mode 100644
index 0000000000000000000000000000000000000000..85796b1c86d2a8718ed21550e66f588222a2ad43
--- /dev/null
+++ b/results/~ RMT-Retrieval (137M) fine-tune/avg/8000.csv
@@ -0,0 +1,2 @@
+result
+0.9032