diff --git a/app.py b/app.py index 962b0f6658735b547c58a8c8a10012d08d141753..4104cefa01721667919ffdc6def670c5ad40b07c 100644 --- a/app.py +++ b/app.py @@ -86,7 +86,7 @@ def build_leaderboard_tab(folders): } with gr.Tabs() as tabs: - for tab_id, tab_name in enumerate(['qa1','qa2', 'qa3', 'qa4', 'qa5']): + for tab_id, tab_name in enumerate(['avg', 'qa1','qa2', 'qa3', 'qa4', 'qa5']): df = load_model(folders, tab_name, msg_lengths) cmap = LinearSegmentedColormap.from_list('ryg', ["red", "yellow", "green"], N=256) diff --git a/data/BABILong NeurIPS24 Figs - leaderboard.csv b/data/BABILong NeurIPS24 Figs - leaderboard.csv index 352a6a20cc4a2522cb3f05fdf529342b3e16c017..b677a5df6d9392ecc205fd56b1ae419691d99d5e 100644 --- a/data/BABILong NeurIPS24 Figs - leaderboard.csv +++ b/data/BABILong NeurIPS24 Figs - leaderboard.csv @@ -21,8 +21,10 @@ activation-beacon-mistral-7b,avg,59,56,51,48,43,37,36,27,14,,, Phi-3-mini-128k-instruct,avg,64,57,55,51,50,46,42,37,7,,, ai21labs/Jamba-v0.1,avg,65,53,50,48,46,45,41,40,34,,, c4ai-command-r-v01,avg,64,64,63,61,59,52,51,46,38,,, +Meta-Llama-3.1-8B-Instruct,avg,67,68,66,66,62,60,56,49,39,,, Phi-3-medium-128k-instruct,avg,72,70,67,62,60,57,53,45,30,,, GPT-4,avg,87,81,77,74,71,64,53,43,36,,, +Meta-Llama-3.1-70B-Instruct,avg,85,81,78,74,70,65,59,53,45,,, ~ Mamba (130M) fine-tune,avg,,,,"98,7","98,5","98,5","98,1",97,"92,5",,, Llama3-ChatQA-1.5-8B + RAG,avg,48,48,47,46,45,45,44,42,45,42,39,37 ~ RMT (137M) fine-tune,avg,"99,36","97,4","94,66","92,32","89,9","85,62","77,88","69,86","58,52","46,36","42,84","33,78" diff --git a/notebooks/process_results_csv.ipynb b/notebooks/process_results_csv.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..e152c27e3af245061ebe8691267b4a47cae80491 --- /dev/null +++ b/notebooks/process_results_csv.ipynb @@ -0,0 +1,291 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import os\n", + "\n", + "import re" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "res_path = '../results'" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "p = \"/home/jovyan/rmt/babilong-leaderboard/data/BABILong NeurIPS24 Figs - leaderboard.csv\"\n", + "res_df = pd.read_csv(p)\n", + "# res_df = res_df[res_df.task.isin(['qa1', 'qa2', 'qa3', 'qa4', 'qa5'])]" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "lens = [0, 1000, 2000, 4000, 8000, 16000, 32000, 64000, 128000, 500000, 1000000, 10000000]\n", + "len_names = ['0K', '1K', '2K', '4K', '8K', '16K', '32K', '64K', '128K', '512K', '1M', '10M']\n", + "\n", + "for model_name in res_df.Model.unique():\n", + " model_df = res_df[res_df.Model == model_name]\n", + " model_name = re.sub('/', ' ', model_name)\n", + " for i, row in model_df.iterrows():\n", + " for l, ln in zip(lens, len_names):\n", + " score = row[ln]\n", + " # print(score)\n", + " if not pd.isna(score):\n", + " score = re.sub(',', '.', score)\n", + " score = float(score) / 100\n", + " os.makedirs(os.path.join(res_path, model_name), exist_ok=True)\n", + " os.makedirs(os.path.join(res_path, model_name, row.task), exist_ok=True)\n", + " path = os.path.join(res_path, model_name, row.task, f'{l}.csv')\n", + " df = pd.DataFrame([{'result': score}])\n", + " df.to_csv(path, index=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Calculate average results" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "model_names = next(os.walk(res_path))[1]" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
1
02
\n", + "
" + ], + "text/plain": [ + " 1\n", + "0 2" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.DataFrame([{1: 2}])" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'../results/GPT-3.5 fine-tuned (trained on 100 samples)/qa2'" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "task_path" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "GPT-4\n", + "GPT-3.5 fine-tuned (trained on 100 samples)\n", + "GPT-3.5 fine-tuned (trained on 1000 samples)\n", + "GPT-3.5\n", + "GPT4 + RAG by segments\n", + "GPT4 + RAG by sentences\n", + "GPT4 + Retrieve sentences (new 100 samples)\n", + "Mistral medium (xxB)\n", + "Mistral\n", + "GPT-2 (137M)\n", + "mamba-2.8b-hf\n", + "rwkv-6-world-7b\n", + "v5-Eagle-7B-HF\n", + "Meta-Llama-3-8B-Instruct\n", + "LLaMA-2-7B-32K\n", + "longchat-7b-v1.5-32k\n", + "LongAlpaca-13B\n", + "Llama-2-7B-32K-Instruct\n", + "Mistral-7b-Instruct-v0.2\n", + "Mixtral-8x7B-Instruct-v0.1\n", + "Mixtral-8x22B-Instruct-v0.1\n", + "activation-beacon-llama2-7b-chat\n", + "Yarn-Mistral-7b-128k\n", + "chatglm3-6b-128k\n", + "activation-beacon-mistral-7b\n", + "Phi-3-mini-128k-instruct\n", + "c4ai-command-r-v01\n", + "Phi-3-medium-128k-instruct\n", + "~ Mamba (130M) fine-tune\n", + "Llama3-ChatQA-1.5-8B + RAG\n", + "~ RMT (137M) fine-tune\n", + "~ ARMT (137M) fine-tune\n", + "01-ai Yi-34B\n", + "01-ai Yi-34B-200k\n", + "01-ai Yi-9B-200k\n", + "ai21labs Jamba-v0.1\n", + "~ RMT-Retrieval (137M) fine-tune\n", + "GPT-4 (gpt-4-0125-preview)\n", + "Meta-Llama-3.1-8B-Instruct\n", + "Meta-Llama-3.1-70B-Instruct\n" + ] + } + ], + "source": [ + "for mn in model_names:\n", + " print(mn)\n", + " avg_path = os.path.join(res_path, mn, 'avg')\n", + " if os.path.exists(avg_path):\n", + " continue\n", + " \n", + " scores = {}\n", + " for task_name in [f'qa{i}' for i in range(1, 6)]:\n", + " task_path = os.path.join(res_path, mn, task_name)\n", + " if not os.path.exists(task_path):\n", + " continue\n", + "\n", + " filenames = next(os.walk(task_path))[2]\n", + " for fn in filenames:\n", + " len_name = fn.split('.')[0]\n", + " df = pd.read_csv(os.path.join(task_path, fn))\n", + " \n", + " score = df.result.mean()\n", + " if len_name not in scores:\n", + " scores[len_name] = [score]\n", + " else:\n", + " scores[len_name].append(score)\n", + "\n", + " for k,v in scores.items():\n", + " sc = np.mean(v)\n", + " out_path = os.path.join(avg_path, k + '.csv')\n", + " df = pd.DataFrame([{'result': sc}])\n", + " if len(v) < 5:\n", + " continue\n", + " os.makedirs(avg_path, exist_ok=True)\n", + " df.to_csv(out_path, index=False)\n", + " print(out_path)\n", + " # 1/0\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'16000': [0.58], '32000': [0.33], '4000': [0.73], '8000': [0.75]}" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "scores" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/test.ipynb b/notebooks/test.ipynb deleted file mode 100644 index 70aa3aa36e3c0a985a27da5988269df92a3614b4..0000000000000000000000000000000000000000 --- a/notebooks/test.ipynb +++ /dev/null @@ -1,78 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import os" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [], - "source": [ - "res_path = '../results'" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [], - "source": [ - "p = \"/home/jovyan/rmt/babilong-leaderboard/data/BABILong NeurIPS24 Figs - leaderboard.csv\"\n", - "res_df = pd.read_csv(p)\n", - "res_df = res_df[res_df.task.isin(['qa1', 'qa2', 'qa3', 'qa4', 'qa5'])]" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [], - "source": [ - "lens = [0, 1000, 2000, 4000, 8000, 16000, 32000, 64000, 128000, 500000, 1000000, 10000000]\n", - "len_names = ['0K', '1K', '2K', '4K', '8K', '16K', '32K', '64K', '128K', '512K', '1M', '10M']\n", - "\n", - "for model_name in res_df.Model.unique():\n", - " model_df = res_df[res_df.Model == model_name]\n", - " for i, row in model_df.iterrows():\n", - " for l, ln in zip(lens, len_names):\n", - " score = row[ln]\n", - " # print(score)\n", - " if not pd.isna(score):\n", - " os.makedirs(os.path.join(res_path, model_name), exist_ok=True)\n", - " os.makedirs(os.path.join(res_path, model_name, row.task), exist_ok=True)\n", - " path = os.path.join(res_path, model_name, row.task, f'{l}.csv')\n", - " df = pd.DataFrame([{'result': score}])\n", - " df.to_csv(path, index=False)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.13" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/results/01-ai Yi-34B-200k/avg/0.csv b/results/01-ai Yi-34B-200k/avg/0.csv new file mode 100644 index 0000000000000000000000000000000000000000..4966aebbf8b964c945f71bad33d92c5102844c2d --- /dev/null +++ b/results/01-ai Yi-34B-200k/avg/0.csv @@ -0,0 +1,2 @@ +result +0.65 diff --git a/results/01-ai Yi-34B-200k/avg/1000.csv b/results/01-ai Yi-34B-200k/avg/1000.csv new file mode 100644 index 0000000000000000000000000000000000000000..1fa3b0194650ca40fe580d554b8cf29e18fb4d4b --- /dev/null +++ b/results/01-ai Yi-34B-200k/avg/1000.csv @@ -0,0 +1,2 @@ +result +0.59 diff --git a/results/01-ai Yi-34B-200k/avg/16000.csv b/results/01-ai Yi-34B-200k/avg/16000.csv new file mode 100644 index 0000000000000000000000000000000000000000..e8921d51a14f6c6af82b659f2641c450327d151a --- /dev/null +++ b/results/01-ai Yi-34B-200k/avg/16000.csv @@ -0,0 +1,2 @@ +result +0.5 diff --git a/results/01-ai Yi-34B-200k/avg/2000.csv b/results/01-ai Yi-34B-200k/avg/2000.csv new file mode 100644 index 0000000000000000000000000000000000000000..7e19811b296e6cf21c8a6d216c28c84251eb122d --- /dev/null +++ b/results/01-ai Yi-34B-200k/avg/2000.csv @@ -0,0 +1,2 @@ +result +0.56 diff --git a/results/01-ai Yi-34B-200k/avg/32000.csv b/results/01-ai Yi-34B-200k/avg/32000.csv new file mode 100644 index 0000000000000000000000000000000000000000..ea17e680f171042bd7e193d2fe91edb1f4a271e9 --- /dev/null +++ b/results/01-ai Yi-34B-200k/avg/32000.csv @@ -0,0 +1,2 @@ +result +0.48 diff --git a/results/01-ai Yi-34B-200k/avg/4000.csv b/results/01-ai Yi-34B-200k/avg/4000.csv new file mode 100644 index 0000000000000000000000000000000000000000..55837811362d6f27a37b7a72f0edcb238c90ccf6 --- /dev/null +++ b/results/01-ai Yi-34B-200k/avg/4000.csv @@ -0,0 +1,2 @@ +result +0.54 diff --git a/results/01-ai Yi-34B-200k/avg/64000.csv b/results/01-ai Yi-34B-200k/avg/64000.csv new file mode 100644 index 0000000000000000000000000000000000000000..ea17e680f171042bd7e193d2fe91edb1f4a271e9 --- /dev/null +++ b/results/01-ai Yi-34B-200k/avg/64000.csv @@ -0,0 +1,2 @@ +result +0.48 diff --git a/results/01-ai Yi-34B-200k/avg/8000.csv b/results/01-ai Yi-34B-200k/avg/8000.csv new file mode 100644 index 0000000000000000000000000000000000000000..39467bcb402a65ea39eb4898a1c28fc11507f9c6 --- /dev/null +++ b/results/01-ai Yi-34B-200k/avg/8000.csv @@ -0,0 +1,2 @@ +result +0.52 diff --git a/results/01-ai Yi-34B/avg/0.csv b/results/01-ai Yi-34B/avg/0.csv new file mode 100644 index 0000000000000000000000000000000000000000..e952da3a8db796734136b86b833a087661b59f2d --- /dev/null +++ b/results/01-ai Yi-34B/avg/0.csv @@ -0,0 +1,2 @@ +result +0.72 diff --git a/results/01-ai Yi-34B/avg/1000.csv b/results/01-ai Yi-34B/avg/1000.csv new file mode 100644 index 0000000000000000000000000000000000000000..39467bcb402a65ea39eb4898a1c28fc11507f9c6 --- /dev/null +++ b/results/01-ai Yi-34B/avg/1000.csv @@ -0,0 +1,2 @@ +result +0.52 diff --git a/results/01-ai Yi-34B/avg/16000.csv b/results/01-ai Yi-34B/avg/16000.csv new file mode 100644 index 0000000000000000000000000000000000000000..e5a7b897d5b6eeaaf9d23030dd640cd27415a1c9 --- /dev/null +++ b/results/01-ai Yi-34B/avg/16000.csv @@ -0,0 +1,2 @@ +result +0.31 diff --git a/results/01-ai Yi-34B/avg/2000.csv b/results/01-ai Yi-34B/avg/2000.csv new file mode 100644 index 0000000000000000000000000000000000000000..8ded6f83954586c44ad05174b7a3b44c84e6e261 --- /dev/null +++ b/results/01-ai Yi-34B/avg/2000.csv @@ -0,0 +1,2 @@ +result +0.43 diff --git a/results/01-ai Yi-34B/avg/32000.csv b/results/01-ai Yi-34B/avg/32000.csv new file mode 100644 index 0000000000000000000000000000000000000000..b19740076b62c87f4353fac30eab97aa8b4611fd --- /dev/null +++ b/results/01-ai Yi-34B/avg/32000.csv @@ -0,0 +1,2 @@ +result +0.04 diff --git a/results/01-ai Yi-34B/avg/4000.csv b/results/01-ai Yi-34B/avg/4000.csv new file mode 100644 index 0000000000000000000000000000000000000000..ce21d5886fbcbcae20b93146113ddcd40f561ebf --- /dev/null +++ b/results/01-ai Yi-34B/avg/4000.csv @@ -0,0 +1,2 @@ +result +0.37 diff --git a/results/01-ai Yi-34B/avg/8000.csv b/results/01-ai Yi-34B/avg/8000.csv new file mode 100644 index 0000000000000000000000000000000000000000..d220a0008f0e11c42ab018c0243594c09e74f118 --- /dev/null +++ b/results/01-ai Yi-34B/avg/8000.csv @@ -0,0 +1,2 @@ +result +0.38 diff --git a/results/01-ai Yi-9B-200k/avg/0.csv b/results/01-ai Yi-9B-200k/avg/0.csv new file mode 100644 index 0000000000000000000000000000000000000000..39467bcb402a65ea39eb4898a1c28fc11507f9c6 --- /dev/null +++ b/results/01-ai Yi-9B-200k/avg/0.csv @@ -0,0 +1,2 @@ +result +0.52 diff --git a/results/01-ai Yi-9B-200k/avg/1000.csv b/results/01-ai Yi-9B-200k/avg/1000.csv new file mode 100644 index 0000000000000000000000000000000000000000..d1af3c1139d6c25d52f1f6e3ec43e2d84a1f69c7 --- /dev/null +++ b/results/01-ai Yi-9B-200k/avg/1000.csv @@ -0,0 +1,2 @@ +result +0.55 diff --git a/results/01-ai Yi-9B-200k/avg/128000.csv b/results/01-ai Yi-9B-200k/avg/128000.csv new file mode 100644 index 0000000000000000000000000000000000000000..1628a5c7821b58b09e92c607a6d582cab60789f7 --- /dev/null +++ b/results/01-ai Yi-9B-200k/avg/128000.csv @@ -0,0 +1,2 @@ +result +0.24 diff --git a/results/01-ai Yi-9B-200k/avg/16000.csv b/results/01-ai Yi-9B-200k/avg/16000.csv new file mode 100644 index 0000000000000000000000000000000000000000..740a999619db874dd9e457f930dc8f01c8c78f33 --- /dev/null +++ b/results/01-ai Yi-9B-200k/avg/16000.csv @@ -0,0 +1,2 @@ +result +0.36 diff --git a/results/01-ai Yi-9B-200k/avg/2000.csv b/results/01-ai Yi-9B-200k/avg/2000.csv new file mode 100644 index 0000000000000000000000000000000000000000..ea17e680f171042bd7e193d2fe91edb1f4a271e9 --- /dev/null +++ b/results/01-ai Yi-9B-200k/avg/2000.csv @@ -0,0 +1,2 @@ +result +0.48 diff --git a/results/01-ai Yi-9B-200k/avg/32000.csv b/results/01-ai Yi-9B-200k/avg/32000.csv new file mode 100644 index 0000000000000000000000000000000000000000..ce21d5886fbcbcae20b93146113ddcd40f561ebf --- /dev/null +++ b/results/01-ai Yi-9B-200k/avg/32000.csv @@ -0,0 +1,2 @@ +result +0.37 diff --git a/results/01-ai Yi-9B-200k/avg/4000.csv b/results/01-ai Yi-9B-200k/avg/4000.csv new file mode 100644 index 0000000000000000000000000000000000000000..84a35040259e9a3c25d02785e6a8333143557008 --- /dev/null +++ b/results/01-ai Yi-9B-200k/avg/4000.csv @@ -0,0 +1,2 @@ +result +0.46 diff --git a/results/01-ai Yi-9B-200k/avg/64000.csv b/results/01-ai Yi-9B-200k/avg/64000.csv new file mode 100644 index 0000000000000000000000000000000000000000..43e7b0059892d598aad8dec7e0c3b137de40b5a9 --- /dev/null +++ b/results/01-ai Yi-9B-200k/avg/64000.csv @@ -0,0 +1,2 @@ +result +0.29 diff --git a/results/01-ai Yi-9B-200k/avg/8000.csv b/results/01-ai Yi-9B-200k/avg/8000.csv new file mode 100644 index 0000000000000000000000000000000000000000..89df95907c79f67b0441bcd8e01916d5ed3147ca --- /dev/null +++ b/results/01-ai Yi-9B-200k/avg/8000.csv @@ -0,0 +1,2 @@ +result +0.45 diff --git a/results/GPT-2 (137M)/avg/0.csv b/results/GPT-2 (137M)/avg/0.csv new file mode 100644 index 0000000000000000000000000000000000000000..0a5e1292a7ec9eec0be5ab1ed37f761feec7241b --- /dev/null +++ b/results/GPT-2 (137M)/avg/0.csv @@ -0,0 +1,2 @@ +result +0.27 diff --git a/results/GPT-2 (137M)/avg/1000.csv b/results/GPT-2 (137M)/avg/1000.csv new file mode 100644 index 0000000000000000000000000000000000000000..466f801be654fefe9c3ea15569001ca34024a9ec --- /dev/null +++ b/results/GPT-2 (137M)/avg/1000.csv @@ -0,0 +1,2 @@ +result +0.15 diff --git a/results/GPT-4 (gpt-4-0125-preview)/avg/0.csv b/results/GPT-4 (gpt-4-0125-preview)/avg/0.csv new file mode 100644 index 0000000000000000000000000000000000000000..be1f19880a38b06fc91fa035a2522e67ccf404be --- /dev/null +++ b/results/GPT-4 (gpt-4-0125-preview)/avg/0.csv @@ -0,0 +1,2 @@ +result +0.874 diff --git a/results/GPT-4 (gpt-4-0125-preview)/avg/1000.csv b/results/GPT-4 (gpt-4-0125-preview)/avg/1000.csv new file mode 100644 index 0000000000000000000000000000000000000000..a9066deebfc3de66f6a71d33fafe29752ef8551b --- /dev/null +++ b/results/GPT-4 (gpt-4-0125-preview)/avg/1000.csv @@ -0,0 +1,2 @@ +result +0.8140000000000001 diff --git a/results/GPT-4 (gpt-4-0125-preview)/avg/128000.csv b/results/GPT-4 (gpt-4-0125-preview)/avg/128000.csv new file mode 100644 index 0000000000000000000000000000000000000000..22c2f48ccd46b9771eaf807dd78afe6462ec5f2d --- /dev/null +++ b/results/GPT-4 (gpt-4-0125-preview)/avg/128000.csv @@ -0,0 +1,2 @@ +result +0.358 diff --git a/results/GPT-4 (gpt-4-0125-preview)/avg/16000.csv b/results/GPT-4 (gpt-4-0125-preview)/avg/16000.csv new file mode 100644 index 0000000000000000000000000000000000000000..738e2e53bbbac478ccf741fe98586b4fce44336a --- /dev/null +++ b/results/GPT-4 (gpt-4-0125-preview)/avg/16000.csv @@ -0,0 +1,2 @@ +result +0.6399999999999999 diff --git a/results/GPT-4 (gpt-4-0125-preview)/avg/2000.csv b/results/GPT-4 (gpt-4-0125-preview)/avg/2000.csv new file mode 100644 index 0000000000000000000000000000000000000000..f9b2476a243cdf9b8c2bbd1d6f3795f9145e66ff --- /dev/null +++ b/results/GPT-4 (gpt-4-0125-preview)/avg/2000.csv @@ -0,0 +1,2 @@ +result +0.768 diff --git a/results/GPT-4 (gpt-4-0125-preview)/avg/32000.csv b/results/GPT-4 (gpt-4-0125-preview)/avg/32000.csv new file mode 100644 index 0000000000000000000000000000000000000000..ff3b74862da2e52575f13ccbece4ce9275c4b6f0 --- /dev/null +++ b/results/GPT-4 (gpt-4-0125-preview)/avg/32000.csv @@ -0,0 +1,2 @@ +result +0.526 diff --git a/results/GPT-4 (gpt-4-0125-preview)/avg/4000.csv b/results/GPT-4 (gpt-4-0125-preview)/avg/4000.csv new file mode 100644 index 0000000000000000000000000000000000000000..3f2dac4457b3854cf209f9442f5b55bec163ae3c --- /dev/null +++ b/results/GPT-4 (gpt-4-0125-preview)/avg/4000.csv @@ -0,0 +1,2 @@ +result +0.738 diff --git a/results/GPT-4 (gpt-4-0125-preview)/avg/64000.csv b/results/GPT-4 (gpt-4-0125-preview)/avg/64000.csv new file mode 100644 index 0000000000000000000000000000000000000000..632a771d928f8359027516f3978de05bc623dee8 --- /dev/null +++ b/results/GPT-4 (gpt-4-0125-preview)/avg/64000.csv @@ -0,0 +1,2 @@ +result +0.42800000000000005 diff --git a/results/GPT-4 (gpt-4-0125-preview)/avg/8000.csv b/results/GPT-4 (gpt-4-0125-preview)/avg/8000.csv new file mode 100644 index 0000000000000000000000000000000000000000..cbfbe96871fdac9bfd91c74198420498e2fb6a1a --- /dev/null +++ b/results/GPT-4 (gpt-4-0125-preview)/avg/8000.csv @@ -0,0 +1,2 @@ +result +0.7120000000000001 diff --git a/results/LLaMA-2-7B-32K/avg/0.csv b/results/LLaMA-2-7B-32K/avg/0.csv new file mode 100644 index 0000000000000000000000000000000000000000..67a246541636923a7023f90c1eda15aeb026f580 --- /dev/null +++ b/results/LLaMA-2-7B-32K/avg/0.csv @@ -0,0 +1,2 @@ +result +0.41 diff --git a/results/LLaMA-2-7B-32K/avg/1000.csv b/results/LLaMA-2-7B-32K/avg/1000.csv new file mode 100644 index 0000000000000000000000000000000000000000..b3596ba97e928d291ed1c63db4564056b44e7785 --- /dev/null +++ b/results/LLaMA-2-7B-32K/avg/1000.csv @@ -0,0 +1,2 @@ +result +0.53 diff --git a/results/LLaMA-2-7B-32K/avg/16000.csv b/results/LLaMA-2-7B-32K/avg/16000.csv new file mode 100644 index 0000000000000000000000000000000000000000..c297a96bf813f15aca75a5f65ed7e2d9a828d24c --- /dev/null +++ b/results/LLaMA-2-7B-32K/avg/16000.csv @@ -0,0 +1,2 @@ +result +0.32 diff --git a/results/LLaMA-2-7B-32K/avg/2000.csv b/results/LLaMA-2-7B-32K/avg/2000.csv new file mode 100644 index 0000000000000000000000000000000000000000..89df95907c79f67b0441bcd8e01916d5ed3147ca --- /dev/null +++ b/results/LLaMA-2-7B-32K/avg/2000.csv @@ -0,0 +1,2 @@ +result +0.45 diff --git a/results/LLaMA-2-7B-32K/avg/32000.csv b/results/LLaMA-2-7B-32K/avg/32000.csv new file mode 100644 index 0000000000000000000000000000000000000000..19336832ec944a8f1b52d2856bb788cfff85f78e --- /dev/null +++ b/results/LLaMA-2-7B-32K/avg/32000.csv @@ -0,0 +1,2 @@ +result +0.03 diff --git a/results/LLaMA-2-7B-32K/avg/4000.csv b/results/LLaMA-2-7B-32K/avg/4000.csv new file mode 100644 index 0000000000000000000000000000000000000000..7ce655c90b50c89eda745a06fd2d5d6da91ed9e6 --- /dev/null +++ b/results/LLaMA-2-7B-32K/avg/4000.csv @@ -0,0 +1,2 @@ +result +0.4 diff --git a/results/LLaMA-2-7B-32K/avg/8000.csv b/results/LLaMA-2-7B-32K/avg/8000.csv new file mode 100644 index 0000000000000000000000000000000000000000..090b0cf09c2910e11dbaf6d48af4c6ae4efe87ad --- /dev/null +++ b/results/LLaMA-2-7B-32K/avg/8000.csv @@ -0,0 +1,2 @@ +result +0.39 diff --git a/results/Llama-2-7B-32K-Instruct/avg/0.csv b/results/Llama-2-7B-32K-Instruct/avg/0.csv new file mode 100644 index 0000000000000000000000000000000000000000..2be09b645e270be85f5fe4f17f33d1301ff5d5fe --- /dev/null +++ b/results/Llama-2-7B-32K-Instruct/avg/0.csv @@ -0,0 +1,2 @@ +result +0.49 diff --git a/results/Llama-2-7B-32K-Instruct/avg/1000.csv b/results/Llama-2-7B-32K-Instruct/avg/1000.csv new file mode 100644 index 0000000000000000000000000000000000000000..39467bcb402a65ea39eb4898a1c28fc11507f9c6 --- /dev/null +++ b/results/Llama-2-7B-32K-Instruct/avg/1000.csv @@ -0,0 +1,2 @@ +result +0.52 diff --git a/results/Llama-2-7B-32K-Instruct/avg/16000.csv b/results/Llama-2-7B-32K-Instruct/avg/16000.csv new file mode 100644 index 0000000000000000000000000000000000000000..5be5e2b842b0151eeaaf1248a55b12e87c0f1f2c --- /dev/null +++ b/results/Llama-2-7B-32K-Instruct/avg/16000.csv @@ -0,0 +1,2 @@ +result +0.35 diff --git a/results/Llama-2-7B-32K-Instruct/avg/2000.csv b/results/Llama-2-7B-32K-Instruct/avg/2000.csv new file mode 100644 index 0000000000000000000000000000000000000000..2be09b645e270be85f5fe4f17f33d1301ff5d5fe --- /dev/null +++ b/results/Llama-2-7B-32K-Instruct/avg/2000.csv @@ -0,0 +1,2 @@ +result +0.49 diff --git a/results/Llama-2-7B-32K-Instruct/avg/32000.csv b/results/Llama-2-7B-32K-Instruct/avg/32000.csv new file mode 100644 index 0000000000000000000000000000000000000000..11a090972d80aec2ff845c30aab47407be2352d9 --- /dev/null +++ b/results/Llama-2-7B-32K-Instruct/avg/32000.csv @@ -0,0 +1,2 @@ +result +0.05 diff --git a/results/Llama-2-7B-32K-Instruct/avg/4000.csv b/results/Llama-2-7B-32K-Instruct/avg/4000.csv new file mode 100644 index 0000000000000000000000000000000000000000..8ded6f83954586c44ad05174b7a3b44c84e6e261 --- /dev/null +++ b/results/Llama-2-7B-32K-Instruct/avg/4000.csv @@ -0,0 +1,2 @@ +result +0.43 diff --git a/results/Llama-2-7B-32K-Instruct/avg/8000.csv b/results/Llama-2-7B-32K-Instruct/avg/8000.csv new file mode 100644 index 0000000000000000000000000000000000000000..7ce655c90b50c89eda745a06fd2d5d6da91ed9e6 --- /dev/null +++ b/results/Llama-2-7B-32K-Instruct/avg/8000.csv @@ -0,0 +1,2 @@ +result +0.4 diff --git a/results/Llama3-ChatQA-1.5-8B + RAG/avg/0.csv b/results/Llama3-ChatQA-1.5-8B + RAG/avg/0.csv new file mode 100644 index 0000000000000000000000000000000000000000..ea17e680f171042bd7e193d2fe91edb1f4a271e9 --- /dev/null +++ b/results/Llama3-ChatQA-1.5-8B + RAG/avg/0.csv @@ -0,0 +1,2 @@ +result +0.48 diff --git a/results/Llama3-ChatQA-1.5-8B + RAG/avg/1000.csv b/results/Llama3-ChatQA-1.5-8B + RAG/avg/1000.csv new file mode 100644 index 0000000000000000000000000000000000000000..ea17e680f171042bd7e193d2fe91edb1f4a271e9 --- /dev/null +++ b/results/Llama3-ChatQA-1.5-8B + RAG/avg/1000.csv @@ -0,0 +1,2 @@ +result +0.48 diff --git a/results/Llama3-ChatQA-1.5-8B + RAG/avg/1000000.csv b/results/Llama3-ChatQA-1.5-8B + RAG/avg/1000000.csv new file mode 100644 index 0000000000000000000000000000000000000000..090b0cf09c2910e11dbaf6d48af4c6ae4efe87ad --- /dev/null +++ b/results/Llama3-ChatQA-1.5-8B + RAG/avg/1000000.csv @@ -0,0 +1,2 @@ +result +0.39 diff --git a/results/Llama3-ChatQA-1.5-8B + RAG/avg/10000000.csv b/results/Llama3-ChatQA-1.5-8B + RAG/avg/10000000.csv new file mode 100644 index 0000000000000000000000000000000000000000..ce21d5886fbcbcae20b93146113ddcd40f561ebf --- /dev/null +++ b/results/Llama3-ChatQA-1.5-8B + RAG/avg/10000000.csv @@ -0,0 +1,2 @@ +result +0.37 diff --git a/results/Llama3-ChatQA-1.5-8B + RAG/avg/128000.csv b/results/Llama3-ChatQA-1.5-8B + RAG/avg/128000.csv new file mode 100644 index 0000000000000000000000000000000000000000..89df95907c79f67b0441bcd8e01916d5ed3147ca --- /dev/null +++ b/results/Llama3-ChatQA-1.5-8B + RAG/avg/128000.csv @@ -0,0 +1,2 @@ +result +0.45 diff --git a/results/Llama3-ChatQA-1.5-8B + RAG/avg/16000.csv b/results/Llama3-ChatQA-1.5-8B + RAG/avg/16000.csv new file mode 100644 index 0000000000000000000000000000000000000000..89df95907c79f67b0441bcd8e01916d5ed3147ca --- /dev/null +++ b/results/Llama3-ChatQA-1.5-8B + RAG/avg/16000.csv @@ -0,0 +1,2 @@ +result +0.45 diff --git a/results/Llama3-ChatQA-1.5-8B + RAG/avg/2000.csv b/results/Llama3-ChatQA-1.5-8B + RAG/avg/2000.csv new file mode 100644 index 0000000000000000000000000000000000000000..153d5a3b84e4e0e48ae0076c64cf47afb8b264f0 --- /dev/null +++ b/results/Llama3-ChatQA-1.5-8B + RAG/avg/2000.csv @@ -0,0 +1,2 @@ +result +0.47 diff --git a/results/Llama3-ChatQA-1.5-8B + RAG/avg/32000.csv b/results/Llama3-ChatQA-1.5-8B + RAG/avg/32000.csv new file mode 100644 index 0000000000000000000000000000000000000000..e29f47d1cb9509158a457f351abf79cd9333a762 --- /dev/null +++ b/results/Llama3-ChatQA-1.5-8B + RAG/avg/32000.csv @@ -0,0 +1,2 @@ +result +0.44 diff --git a/results/Llama3-ChatQA-1.5-8B + RAG/avg/4000.csv b/results/Llama3-ChatQA-1.5-8B + RAG/avg/4000.csv new file mode 100644 index 0000000000000000000000000000000000000000..84a35040259e9a3c25d02785e6a8333143557008 --- /dev/null +++ b/results/Llama3-ChatQA-1.5-8B + RAG/avg/4000.csv @@ -0,0 +1,2 @@ +result +0.46 diff --git a/results/Llama3-ChatQA-1.5-8B + RAG/avg/500000.csv b/results/Llama3-ChatQA-1.5-8B + RAG/avg/500000.csv new file mode 100644 index 0000000000000000000000000000000000000000..82f83f2880089b96caf335389691d4e4aefd72a4 --- /dev/null +++ b/results/Llama3-ChatQA-1.5-8B + RAG/avg/500000.csv @@ -0,0 +1,2 @@ +result +0.42 diff --git a/results/Llama3-ChatQA-1.5-8B + RAG/avg/64000.csv b/results/Llama3-ChatQA-1.5-8B + RAG/avg/64000.csv new file mode 100644 index 0000000000000000000000000000000000000000..82f83f2880089b96caf335389691d4e4aefd72a4 --- /dev/null +++ b/results/Llama3-ChatQA-1.5-8B + RAG/avg/64000.csv @@ -0,0 +1,2 @@ +result +0.42 diff --git a/results/Llama3-ChatQA-1.5-8B + RAG/avg/8000.csv b/results/Llama3-ChatQA-1.5-8B + RAG/avg/8000.csv new file mode 100644 index 0000000000000000000000000000000000000000..89df95907c79f67b0441bcd8e01916d5ed3147ca --- /dev/null +++ b/results/Llama3-ChatQA-1.5-8B + RAG/avg/8000.csv @@ -0,0 +1,2 @@ +result +0.45 diff --git a/results/LongAlpaca-13B/avg/0.csv b/results/LongAlpaca-13B/avg/0.csv new file mode 100644 index 0000000000000000000000000000000000000000..ea17e680f171042bd7e193d2fe91edb1f4a271e9 --- /dev/null +++ b/results/LongAlpaca-13B/avg/0.csv @@ -0,0 +1,2 @@ +result +0.48 diff --git a/results/LongAlpaca-13B/avg/1000.csv b/results/LongAlpaca-13B/avg/1000.csv new file mode 100644 index 0000000000000000000000000000000000000000..153d5a3b84e4e0e48ae0076c64cf47afb8b264f0 --- /dev/null +++ b/results/LongAlpaca-13B/avg/1000.csv @@ -0,0 +1,2 @@ +result +0.47 diff --git a/results/LongAlpaca-13B/avg/16000.csv b/results/LongAlpaca-13B/avg/16000.csv new file mode 100644 index 0000000000000000000000000000000000000000..740a999619db874dd9e457f930dc8f01c8c78f33 --- /dev/null +++ b/results/LongAlpaca-13B/avg/16000.csv @@ -0,0 +1,2 @@ +result +0.36 diff --git a/results/LongAlpaca-13B/avg/2000.csv b/results/LongAlpaca-13B/avg/2000.csv new file mode 100644 index 0000000000000000000000000000000000000000..84a35040259e9a3c25d02785e6a8333143557008 --- /dev/null +++ b/results/LongAlpaca-13B/avg/2000.csv @@ -0,0 +1,2 @@ +result +0.46 diff --git a/results/LongAlpaca-13B/avg/32000.csv b/results/LongAlpaca-13B/avg/32000.csv new file mode 100644 index 0000000000000000000000000000000000000000..b19740076b62c87f4353fac30eab97aa8b4611fd --- /dev/null +++ b/results/LongAlpaca-13B/avg/32000.csv @@ -0,0 +1,2 @@ +result +0.04 diff --git a/results/LongAlpaca-13B/avg/4000.csv b/results/LongAlpaca-13B/avg/4000.csv new file mode 100644 index 0000000000000000000000000000000000000000..8ded6f83954586c44ad05174b7a3b44c84e6e261 --- /dev/null +++ b/results/LongAlpaca-13B/avg/4000.csv @@ -0,0 +1,2 @@ +result +0.43 diff --git a/results/LongAlpaca-13B/avg/8000.csv b/results/LongAlpaca-13B/avg/8000.csv new file mode 100644 index 0000000000000000000000000000000000000000..7ce655c90b50c89eda745a06fd2d5d6da91ed9e6 --- /dev/null +++ b/results/LongAlpaca-13B/avg/8000.csv @@ -0,0 +1,2 @@ +result +0.4 diff --git a/results/Meta-Llama-3-8B-Instruct/avg/0.csv b/results/Meta-Llama-3-8B-Instruct/avg/0.csv new file mode 100644 index 0000000000000000000000000000000000000000..235fdb9d924c52d3bfbeef527b47fcca8303f6b1 --- /dev/null +++ b/results/Meta-Llama-3-8B-Instruct/avg/0.csv @@ -0,0 +1,2 @@ +result +0.64 diff --git a/results/Meta-Llama-3-8B-Instruct/avg/1000.csv b/results/Meta-Llama-3-8B-Instruct/avg/1000.csv new file mode 100644 index 0000000000000000000000000000000000000000..73185cf6124b62847537578b1975e3664741cd40 --- /dev/null +++ b/results/Meta-Llama-3-8B-Instruct/avg/1000.csv @@ -0,0 +1,2 @@ +result +0.6 diff --git a/results/Meta-Llama-3-8B-Instruct/avg/2000.csv b/results/Meta-Llama-3-8B-Instruct/avg/2000.csv new file mode 100644 index 0000000000000000000000000000000000000000..cb4221b589457585a0f9e0d4400f8986ee7c41cc --- /dev/null +++ b/results/Meta-Llama-3-8B-Instruct/avg/2000.csv @@ -0,0 +1,2 @@ +result +0.58 diff --git a/results/Meta-Llama-3-8B-Instruct/avg/4000.csv b/results/Meta-Llama-3-8B-Instruct/avg/4000.csv new file mode 100644 index 0000000000000000000000000000000000000000..e8921d51a14f6c6af82b659f2641c450327d151a --- /dev/null +++ b/results/Meta-Llama-3-8B-Instruct/avg/4000.csv @@ -0,0 +1,2 @@ +result +0.5 diff --git a/results/Meta-Llama-3-8B-Instruct/avg/8000.csv b/results/Meta-Llama-3-8B-Instruct/avg/8000.csv new file mode 100644 index 0000000000000000000000000000000000000000..e29f47d1cb9509158a457f351abf79cd9333a762 --- /dev/null +++ b/results/Meta-Llama-3-8B-Instruct/avg/8000.csv @@ -0,0 +1,2 @@ +result +0.44 diff --git a/results/Meta-Llama-3.1-70B-Instruct/avg/0.csv b/results/Meta-Llama-3.1-70B-Instruct/avg/0.csv new file mode 100644 index 0000000000000000000000000000000000000000..83ba11dcff9cce6369fff6c946075d6ebeebd368 --- /dev/null +++ b/results/Meta-Llama-3.1-70B-Instruct/avg/0.csv @@ -0,0 +1,2 @@ +result +0.85 diff --git a/results/Meta-Llama-3.1-70B-Instruct/avg/1000.csv b/results/Meta-Llama-3.1-70B-Instruct/avg/1000.csv new file mode 100644 index 0000000000000000000000000000000000000000..7a27968157e58ea8642cf16e7e0ab8172a0518f7 --- /dev/null +++ b/results/Meta-Llama-3.1-70B-Instruct/avg/1000.csv @@ -0,0 +1,2 @@ +result +0.81 diff --git a/results/Meta-Llama-3.1-70B-Instruct/avg/128000.csv b/results/Meta-Llama-3.1-70B-Instruct/avg/128000.csv new file mode 100644 index 0000000000000000000000000000000000000000..89df95907c79f67b0441bcd8e01916d5ed3147ca --- /dev/null +++ b/results/Meta-Llama-3.1-70B-Instruct/avg/128000.csv @@ -0,0 +1,2 @@ +result +0.45 diff --git a/results/Meta-Llama-3.1-70B-Instruct/avg/16000.csv b/results/Meta-Llama-3.1-70B-Instruct/avg/16000.csv new file mode 100644 index 0000000000000000000000000000000000000000..4966aebbf8b964c945f71bad33d92c5102844c2d --- /dev/null +++ b/results/Meta-Llama-3.1-70B-Instruct/avg/16000.csv @@ -0,0 +1,2 @@ +result +0.65 diff --git a/results/Meta-Llama-3.1-70B-Instruct/avg/2000.csv b/results/Meta-Llama-3.1-70B-Instruct/avg/2000.csv new file mode 100644 index 0000000000000000000000000000000000000000..223d850464648a46d745f1c5e86c83752d9b2135 --- /dev/null +++ b/results/Meta-Llama-3.1-70B-Instruct/avg/2000.csv @@ -0,0 +1,2 @@ +result +0.78 diff --git a/results/Meta-Llama-3.1-70B-Instruct/avg/32000.csv b/results/Meta-Llama-3.1-70B-Instruct/avg/32000.csv new file mode 100644 index 0000000000000000000000000000000000000000..1fa3b0194650ca40fe580d554b8cf29e18fb4d4b --- /dev/null +++ b/results/Meta-Llama-3.1-70B-Instruct/avg/32000.csv @@ -0,0 +1,2 @@ +result +0.59 diff --git a/results/Meta-Llama-3.1-70B-Instruct/avg/4000.csv b/results/Meta-Llama-3.1-70B-Instruct/avg/4000.csv new file mode 100644 index 0000000000000000000000000000000000000000..d8ec7e4d79540fb339feccca0ce6b7ea60e8aafa --- /dev/null +++ b/results/Meta-Llama-3.1-70B-Instruct/avg/4000.csv @@ -0,0 +1,2 @@ +result +0.74 diff --git a/results/Meta-Llama-3.1-70B-Instruct/avg/64000.csv b/results/Meta-Llama-3.1-70B-Instruct/avg/64000.csv new file mode 100644 index 0000000000000000000000000000000000000000..b3596ba97e928d291ed1c63db4564056b44e7785 --- /dev/null +++ b/results/Meta-Llama-3.1-70B-Instruct/avg/64000.csv @@ -0,0 +1,2 @@ +result +0.53 diff --git a/results/Meta-Llama-3.1-70B-Instruct/avg/8000.csv b/results/Meta-Llama-3.1-70B-Instruct/avg/8000.csv new file mode 100644 index 0000000000000000000000000000000000000000..30b8677e38c6147f378843d83cdb738e20fb9c5a --- /dev/null +++ b/results/Meta-Llama-3.1-70B-Instruct/avg/8000.csv @@ -0,0 +1,2 @@ +result +0.7 diff --git a/results/Meta-Llama-3.1-8B-Instruct/avg/0.csv b/results/Meta-Llama-3.1-8B-Instruct/avg/0.csv new file mode 100644 index 0000000000000000000000000000000000000000..1f7c0e94d6341eed38b798bbe83f395efb572b0a --- /dev/null +++ b/results/Meta-Llama-3.1-8B-Instruct/avg/0.csv @@ -0,0 +1,2 @@ +result +0.67 diff --git a/results/Meta-Llama-3.1-8B-Instruct/avg/1000.csv b/results/Meta-Llama-3.1-8B-Instruct/avg/1000.csv new file mode 100644 index 0000000000000000000000000000000000000000..22fb8b3d67c0ed0ec33e60f591d8d5e7580fff53 --- /dev/null +++ b/results/Meta-Llama-3.1-8B-Instruct/avg/1000.csv @@ -0,0 +1,2 @@ +result +0.68 diff --git a/results/Meta-Llama-3.1-8B-Instruct/avg/128000.csv b/results/Meta-Llama-3.1-8B-Instruct/avg/128000.csv new file mode 100644 index 0000000000000000000000000000000000000000..090b0cf09c2910e11dbaf6d48af4c6ae4efe87ad --- /dev/null +++ b/results/Meta-Llama-3.1-8B-Instruct/avg/128000.csv @@ -0,0 +1,2 @@ +result +0.39 diff --git a/results/Meta-Llama-3.1-8B-Instruct/avg/16000.csv b/results/Meta-Llama-3.1-8B-Instruct/avg/16000.csv new file mode 100644 index 0000000000000000000000000000000000000000..73185cf6124b62847537578b1975e3664741cd40 --- /dev/null +++ b/results/Meta-Llama-3.1-8B-Instruct/avg/16000.csv @@ -0,0 +1,2 @@ +result +0.6 diff --git a/results/Meta-Llama-3.1-8B-Instruct/avg/2000.csv b/results/Meta-Llama-3.1-8B-Instruct/avg/2000.csv new file mode 100644 index 0000000000000000000000000000000000000000..06469c9fe3accdc55c9f65a81b7b58307b21a674 --- /dev/null +++ b/results/Meta-Llama-3.1-8B-Instruct/avg/2000.csv @@ -0,0 +1,2 @@ +result +0.66 diff --git a/results/Meta-Llama-3.1-8B-Instruct/avg/32000.csv b/results/Meta-Llama-3.1-8B-Instruct/avg/32000.csv new file mode 100644 index 0000000000000000000000000000000000000000..7e19811b296e6cf21c8a6d216c28c84251eb122d --- /dev/null +++ b/results/Meta-Llama-3.1-8B-Instruct/avg/32000.csv @@ -0,0 +1,2 @@ +result +0.56 diff --git a/results/Meta-Llama-3.1-8B-Instruct/avg/4000.csv b/results/Meta-Llama-3.1-8B-Instruct/avg/4000.csv new file mode 100644 index 0000000000000000000000000000000000000000..06469c9fe3accdc55c9f65a81b7b58307b21a674 --- /dev/null +++ b/results/Meta-Llama-3.1-8B-Instruct/avg/4000.csv @@ -0,0 +1,2 @@ +result +0.66 diff --git a/results/Meta-Llama-3.1-8B-Instruct/avg/64000.csv b/results/Meta-Llama-3.1-8B-Instruct/avg/64000.csv new file mode 100644 index 0000000000000000000000000000000000000000..2be09b645e270be85f5fe4f17f33d1301ff5d5fe --- /dev/null +++ b/results/Meta-Llama-3.1-8B-Instruct/avg/64000.csv @@ -0,0 +1,2 @@ +result +0.49 diff --git a/results/Meta-Llama-3.1-8B-Instruct/avg/8000.csv b/results/Meta-Llama-3.1-8B-Instruct/avg/8000.csv new file mode 100644 index 0000000000000000000000000000000000000000..90476c2461795796d79aacaea3589b52b4c7a571 --- /dev/null +++ b/results/Meta-Llama-3.1-8B-Instruct/avg/8000.csv @@ -0,0 +1,2 @@ +result +0.62 diff --git a/results/Mistral-7b-Instruct-v0.2/avg/0.csv b/results/Mistral-7b-Instruct-v0.2/avg/0.csv new file mode 100644 index 0000000000000000000000000000000000000000..73185cf6124b62847537578b1975e3664741cd40 --- /dev/null +++ b/results/Mistral-7b-Instruct-v0.2/avg/0.csv @@ -0,0 +1,2 @@ +result +0.6 diff --git a/results/Mistral-7b-Instruct-v0.2/avg/1000.csv b/results/Mistral-7b-Instruct-v0.2/avg/1000.csv new file mode 100644 index 0000000000000000000000000000000000000000..7e19811b296e6cf21c8a6d216c28c84251eb122d --- /dev/null +++ b/results/Mistral-7b-Instruct-v0.2/avg/1000.csv @@ -0,0 +1,2 @@ +result +0.56 diff --git a/results/Mistral-7b-Instruct-v0.2/avg/16000.csv b/results/Mistral-7b-Instruct-v0.2/avg/16000.csv new file mode 100644 index 0000000000000000000000000000000000000000..82f83f2880089b96caf335389691d4e4aefd72a4 --- /dev/null +++ b/results/Mistral-7b-Instruct-v0.2/avg/16000.csv @@ -0,0 +1,2 @@ +result +0.42 diff --git a/results/Mistral-7b-Instruct-v0.2/avg/2000.csv b/results/Mistral-7b-Instruct-v0.2/avg/2000.csv new file mode 100644 index 0000000000000000000000000000000000000000..39467bcb402a65ea39eb4898a1c28fc11507f9c6 --- /dev/null +++ b/results/Mistral-7b-Instruct-v0.2/avg/2000.csv @@ -0,0 +1,2 @@ +result +0.52 diff --git a/results/Mistral-7b-Instruct-v0.2/avg/32000.csv b/results/Mistral-7b-Instruct-v0.2/avg/32000.csv new file mode 100644 index 0000000000000000000000000000000000000000..ce21d5886fbcbcae20b93146113ddcd40f561ebf --- /dev/null +++ b/results/Mistral-7b-Instruct-v0.2/avg/32000.csv @@ -0,0 +1,2 @@ +result +0.37 diff --git a/results/Mistral-7b-Instruct-v0.2/avg/4000.csv b/results/Mistral-7b-Instruct-v0.2/avg/4000.csv new file mode 100644 index 0000000000000000000000000000000000000000..2be09b645e270be85f5fe4f17f33d1301ff5d5fe --- /dev/null +++ b/results/Mistral-7b-Instruct-v0.2/avg/4000.csv @@ -0,0 +1,2 @@ +result +0.49 diff --git a/results/Mistral-7b-Instruct-v0.2/avg/8000.csv b/results/Mistral-7b-Instruct-v0.2/avg/8000.csv new file mode 100644 index 0000000000000000000000000000000000000000..89df95907c79f67b0441bcd8e01916d5ed3147ca --- /dev/null +++ b/results/Mistral-7b-Instruct-v0.2/avg/8000.csv @@ -0,0 +1,2 @@ +result +0.45 diff --git a/results/Mistral/avg/0.csv b/results/Mistral/avg/0.csv new file mode 100644 index 0000000000000000000000000000000000000000..e952da3a8db796734136b86b833a087661b59f2d --- /dev/null +++ b/results/Mistral/avg/0.csv @@ -0,0 +1,2 @@ +result +0.72 diff --git a/results/Mistral/avg/16000.csv b/results/Mistral/avg/16000.csv new file mode 100644 index 0000000000000000000000000000000000000000..590e72f0c5a2aea0283613b945cb8184582b980b --- /dev/null +++ b/results/Mistral/avg/16000.csv @@ -0,0 +1,2 @@ +result +0.44000000000000006 diff --git a/results/Mistral/avg/32000.csv b/results/Mistral/avg/32000.csv new file mode 100644 index 0000000000000000000000000000000000000000..6c2e835608d123a8b2253a3f25f1860969cf60cf --- /dev/null +++ b/results/Mistral/avg/32000.csv @@ -0,0 +1,2 @@ +result +0.372 diff --git a/results/Mistral/avg/4000.csv b/results/Mistral/avg/4000.csv new file mode 100644 index 0000000000000000000000000000000000000000..7d81f926ca158ab1ab70fa95f90aeb62aebdd468 --- /dev/null +++ b/results/Mistral/avg/4000.csv @@ -0,0 +1,2 @@ +result +0.546 diff --git a/results/Mistral/avg/8000.csv b/results/Mistral/avg/8000.csv new file mode 100644 index 0000000000000000000000000000000000000000..deed529b2467e14283200bb2c6b8aca8b9c5c834 --- /dev/null +++ b/results/Mistral/avg/8000.csv @@ -0,0 +1,2 @@ +result +0.5199999999999999 diff --git a/results/Mixtral-8x22B-Instruct-v0.1/avg/0.csv b/results/Mixtral-8x22B-Instruct-v0.1/avg/0.csv new file mode 100644 index 0000000000000000000000000000000000000000..76adb21b45a8d8b7a67041be02009075571488ff --- /dev/null +++ b/results/Mixtral-8x22B-Instruct-v0.1/avg/0.csv @@ -0,0 +1,2 @@ +result +0.75 diff --git a/results/Mixtral-8x22B-Instruct-v0.1/avg/1000.csv b/results/Mixtral-8x22B-Instruct-v0.1/avg/1000.csv new file mode 100644 index 0000000000000000000000000000000000000000..fdeb0e98d24f032d7d0c55367ac3d84c009cbe2b --- /dev/null +++ b/results/Mixtral-8x22B-Instruct-v0.1/avg/1000.csv @@ -0,0 +1,2 @@ +result +0.73 diff --git a/results/Mixtral-8x22B-Instruct-v0.1/avg/16000.csv b/results/Mixtral-8x22B-Instruct-v0.1/avg/16000.csv new file mode 100644 index 0000000000000000000000000000000000000000..197c7d251ccb4bc0c03d2533cb5529c1fe530129 --- /dev/null +++ b/results/Mixtral-8x22B-Instruct-v0.1/avg/16000.csv @@ -0,0 +1,2 @@ +result +0.51 diff --git a/results/Mixtral-8x22B-Instruct-v0.1/avg/2000.csv b/results/Mixtral-8x22B-Instruct-v0.1/avg/2000.csv new file mode 100644 index 0000000000000000000000000000000000000000..30b8677e38c6147f378843d83cdb738e20fb9c5a --- /dev/null +++ b/results/Mixtral-8x22B-Instruct-v0.1/avg/2000.csv @@ -0,0 +1,2 @@ +result +0.7 diff --git a/results/Mixtral-8x22B-Instruct-v0.1/avg/32000.csv b/results/Mixtral-8x22B-Instruct-v0.1/avg/32000.csv new file mode 100644 index 0000000000000000000000000000000000000000..8ded6f83954586c44ad05174b7a3b44c84e6e261 --- /dev/null +++ b/results/Mixtral-8x22B-Instruct-v0.1/avg/32000.csv @@ -0,0 +1,2 @@ +result +0.43 diff --git a/results/Mixtral-8x22B-Instruct-v0.1/avg/4000.csv b/results/Mixtral-8x22B-Instruct-v0.1/avg/4000.csv new file mode 100644 index 0000000000000000000000000000000000000000..4966aebbf8b964c945f71bad33d92c5102844c2d --- /dev/null +++ b/results/Mixtral-8x22B-Instruct-v0.1/avg/4000.csv @@ -0,0 +1,2 @@ +result +0.65 diff --git a/results/Mixtral-8x22B-Instruct-v0.1/avg/64000.csv b/results/Mixtral-8x22B-Instruct-v0.1/avg/64000.csv new file mode 100644 index 0000000000000000000000000000000000000000..5be5e2b842b0151eeaaf1248a55b12e87c0f1f2c --- /dev/null +++ b/results/Mixtral-8x22B-Instruct-v0.1/avg/64000.csv @@ -0,0 +1,2 @@ +result +0.35 diff --git a/results/Mixtral-8x22B-Instruct-v0.1/avg/8000.csv b/results/Mixtral-8x22B-Instruct-v0.1/avg/8000.csv new file mode 100644 index 0000000000000000000000000000000000000000..cb4221b589457585a0f9e0d4400f8986ee7c41cc --- /dev/null +++ b/results/Mixtral-8x22B-Instruct-v0.1/avg/8000.csv @@ -0,0 +1,2 @@ +result +0.58 diff --git a/results/Mixtral-8x7B-Instruct-v0.1/avg/0.csv b/results/Mixtral-8x7B-Instruct-v0.1/avg/0.csv new file mode 100644 index 0000000000000000000000000000000000000000..4966aebbf8b964c945f71bad33d92c5102844c2d --- /dev/null +++ b/results/Mixtral-8x7B-Instruct-v0.1/avg/0.csv @@ -0,0 +1,2 @@ +result +0.65 diff --git a/results/Mixtral-8x7B-Instruct-v0.1/avg/1000.csv b/results/Mixtral-8x7B-Instruct-v0.1/avg/1000.csv new file mode 100644 index 0000000000000000000000000000000000000000..66043869d21fb20185f28d3a911a996089595b43 --- /dev/null +++ b/results/Mixtral-8x7B-Instruct-v0.1/avg/1000.csv @@ -0,0 +1,2 @@ +result +0.63 diff --git a/results/Mixtral-8x7B-Instruct-v0.1/avg/16000.csv b/results/Mixtral-8x7B-Instruct-v0.1/avg/16000.csv new file mode 100644 index 0000000000000000000000000000000000000000..84a35040259e9a3c25d02785e6a8333143557008 --- /dev/null +++ b/results/Mixtral-8x7B-Instruct-v0.1/avg/16000.csv @@ -0,0 +1,2 @@ +result +0.46 diff --git a/results/Mixtral-8x7B-Instruct-v0.1/avg/2000.csv b/results/Mixtral-8x7B-Instruct-v0.1/avg/2000.csv new file mode 100644 index 0000000000000000000000000000000000000000..73185cf6124b62847537578b1975e3664741cd40 --- /dev/null +++ b/results/Mixtral-8x7B-Instruct-v0.1/avg/2000.csv @@ -0,0 +1,2 @@ +result +0.6 diff --git a/results/Mixtral-8x7B-Instruct-v0.1/avg/32000.csv b/results/Mixtral-8x7B-Instruct-v0.1/avg/32000.csv new file mode 100644 index 0000000000000000000000000000000000000000..7ce655c90b50c89eda745a06fd2d5d6da91ed9e6 --- /dev/null +++ b/results/Mixtral-8x7B-Instruct-v0.1/avg/32000.csv @@ -0,0 +1,2 @@ +result +0.4 diff --git a/results/Mixtral-8x7B-Instruct-v0.1/avg/4000.csv b/results/Mixtral-8x7B-Instruct-v0.1/avg/4000.csv new file mode 100644 index 0000000000000000000000000000000000000000..d1af3c1139d6c25d52f1f6e3ec43e2d84a1f69c7 --- /dev/null +++ b/results/Mixtral-8x7B-Instruct-v0.1/avg/4000.csv @@ -0,0 +1,2 @@ +result +0.55 diff --git a/results/Mixtral-8x7B-Instruct-v0.1/avg/8000.csv b/results/Mixtral-8x7B-Instruct-v0.1/avg/8000.csv new file mode 100644 index 0000000000000000000000000000000000000000..e8921d51a14f6c6af82b659f2641c450327d151a --- /dev/null +++ b/results/Mixtral-8x7B-Instruct-v0.1/avg/8000.csv @@ -0,0 +1,2 @@ +result +0.5 diff --git a/results/Phi-3-medium-128k-instruct/avg/0.csv b/results/Phi-3-medium-128k-instruct/avg/0.csv new file mode 100644 index 0000000000000000000000000000000000000000..e952da3a8db796734136b86b833a087661b59f2d --- /dev/null +++ b/results/Phi-3-medium-128k-instruct/avg/0.csv @@ -0,0 +1,2 @@ +result +0.72 diff --git a/results/Phi-3-medium-128k-instruct/avg/1000.csv b/results/Phi-3-medium-128k-instruct/avg/1000.csv new file mode 100644 index 0000000000000000000000000000000000000000..30b8677e38c6147f378843d83cdb738e20fb9c5a --- /dev/null +++ b/results/Phi-3-medium-128k-instruct/avg/1000.csv @@ -0,0 +1,2 @@ +result +0.7 diff --git a/results/Phi-3-medium-128k-instruct/avg/128000.csv b/results/Phi-3-medium-128k-instruct/avg/128000.csv new file mode 100644 index 0000000000000000000000000000000000000000..372e6db8948b50d2a36c8d54b399e2b73d84a10b --- /dev/null +++ b/results/Phi-3-medium-128k-instruct/avg/128000.csv @@ -0,0 +1,2 @@ +result +0.3 diff --git a/results/Phi-3-medium-128k-instruct/avg/16000.csv b/results/Phi-3-medium-128k-instruct/avg/16000.csv new file mode 100644 index 0000000000000000000000000000000000000000..6f7b039b44a27dd5a1d4701de8c71b443d754a95 --- /dev/null +++ b/results/Phi-3-medium-128k-instruct/avg/16000.csv @@ -0,0 +1,2 @@ +result +0.57 diff --git a/results/Phi-3-medium-128k-instruct/avg/2000.csv b/results/Phi-3-medium-128k-instruct/avg/2000.csv new file mode 100644 index 0000000000000000000000000000000000000000..1f7c0e94d6341eed38b798bbe83f395efb572b0a --- /dev/null +++ b/results/Phi-3-medium-128k-instruct/avg/2000.csv @@ -0,0 +1,2 @@ +result +0.67 diff --git a/results/Phi-3-medium-128k-instruct/avg/32000.csv b/results/Phi-3-medium-128k-instruct/avg/32000.csv new file mode 100644 index 0000000000000000000000000000000000000000..b3596ba97e928d291ed1c63db4564056b44e7785 --- /dev/null +++ b/results/Phi-3-medium-128k-instruct/avg/32000.csv @@ -0,0 +1,2 @@ +result +0.53 diff --git a/results/Phi-3-medium-128k-instruct/avg/4000.csv b/results/Phi-3-medium-128k-instruct/avg/4000.csv new file mode 100644 index 0000000000000000000000000000000000000000..90476c2461795796d79aacaea3589b52b4c7a571 --- /dev/null +++ b/results/Phi-3-medium-128k-instruct/avg/4000.csv @@ -0,0 +1,2 @@ +result +0.62 diff --git a/results/Phi-3-medium-128k-instruct/avg/64000.csv b/results/Phi-3-medium-128k-instruct/avg/64000.csv new file mode 100644 index 0000000000000000000000000000000000000000..89df95907c79f67b0441bcd8e01916d5ed3147ca --- /dev/null +++ b/results/Phi-3-medium-128k-instruct/avg/64000.csv @@ -0,0 +1,2 @@ +result +0.45 diff --git a/results/Phi-3-medium-128k-instruct/avg/8000.csv b/results/Phi-3-medium-128k-instruct/avg/8000.csv new file mode 100644 index 0000000000000000000000000000000000000000..73185cf6124b62847537578b1975e3664741cd40 --- /dev/null +++ b/results/Phi-3-medium-128k-instruct/avg/8000.csv @@ -0,0 +1,2 @@ +result +0.6 diff --git a/results/Phi-3-mini-128k-instruct/avg/0.csv b/results/Phi-3-mini-128k-instruct/avg/0.csv new file mode 100644 index 0000000000000000000000000000000000000000..235fdb9d924c52d3bfbeef527b47fcca8303f6b1 --- /dev/null +++ b/results/Phi-3-mini-128k-instruct/avg/0.csv @@ -0,0 +1,2 @@ +result +0.64 diff --git a/results/Phi-3-mini-128k-instruct/avg/1000.csv b/results/Phi-3-mini-128k-instruct/avg/1000.csv new file mode 100644 index 0000000000000000000000000000000000000000..6f7b039b44a27dd5a1d4701de8c71b443d754a95 --- /dev/null +++ b/results/Phi-3-mini-128k-instruct/avg/1000.csv @@ -0,0 +1,2 @@ +result +0.57 diff --git a/results/Phi-3-mini-128k-instruct/avg/128000.csv b/results/Phi-3-mini-128k-instruct/avg/128000.csv new file mode 100644 index 0000000000000000000000000000000000000000..8fc9f31a839fd188fa8898df3ecfd89dd57b2c57 --- /dev/null +++ b/results/Phi-3-mini-128k-instruct/avg/128000.csv @@ -0,0 +1,2 @@ +result +0.07 diff --git a/results/Phi-3-mini-128k-instruct/avg/16000.csv b/results/Phi-3-mini-128k-instruct/avg/16000.csv new file mode 100644 index 0000000000000000000000000000000000000000..84a35040259e9a3c25d02785e6a8333143557008 --- /dev/null +++ b/results/Phi-3-mini-128k-instruct/avg/16000.csv @@ -0,0 +1,2 @@ +result +0.46 diff --git a/results/Phi-3-mini-128k-instruct/avg/2000.csv b/results/Phi-3-mini-128k-instruct/avg/2000.csv new file mode 100644 index 0000000000000000000000000000000000000000..d1af3c1139d6c25d52f1f6e3ec43e2d84a1f69c7 --- /dev/null +++ b/results/Phi-3-mini-128k-instruct/avg/2000.csv @@ -0,0 +1,2 @@ +result +0.55 diff --git a/results/Phi-3-mini-128k-instruct/avg/32000.csv b/results/Phi-3-mini-128k-instruct/avg/32000.csv new file mode 100644 index 0000000000000000000000000000000000000000..82f83f2880089b96caf335389691d4e4aefd72a4 --- /dev/null +++ b/results/Phi-3-mini-128k-instruct/avg/32000.csv @@ -0,0 +1,2 @@ +result +0.42 diff --git a/results/Phi-3-mini-128k-instruct/avg/4000.csv b/results/Phi-3-mini-128k-instruct/avg/4000.csv new file mode 100644 index 0000000000000000000000000000000000000000..197c7d251ccb4bc0c03d2533cb5529c1fe530129 --- /dev/null +++ b/results/Phi-3-mini-128k-instruct/avg/4000.csv @@ -0,0 +1,2 @@ +result +0.51 diff --git a/results/Phi-3-mini-128k-instruct/avg/64000.csv b/results/Phi-3-mini-128k-instruct/avg/64000.csv new file mode 100644 index 0000000000000000000000000000000000000000..ce21d5886fbcbcae20b93146113ddcd40f561ebf --- /dev/null +++ b/results/Phi-3-mini-128k-instruct/avg/64000.csv @@ -0,0 +1,2 @@ +result +0.37 diff --git a/results/Phi-3-mini-128k-instruct/avg/8000.csv b/results/Phi-3-mini-128k-instruct/avg/8000.csv new file mode 100644 index 0000000000000000000000000000000000000000..e8921d51a14f6c6af82b659f2641c450327d151a --- /dev/null +++ b/results/Phi-3-mini-128k-instruct/avg/8000.csv @@ -0,0 +1,2 @@ +result +0.5 diff --git a/results/Yarn-Mistral-7b-128k/avg/0.csv b/results/Yarn-Mistral-7b-128k/avg/0.csv new file mode 100644 index 0000000000000000000000000000000000000000..197c7d251ccb4bc0c03d2533cb5529c1fe530129 --- /dev/null +++ b/results/Yarn-Mistral-7b-128k/avg/0.csv @@ -0,0 +1,2 @@ +result +0.51 diff --git a/results/Yarn-Mistral-7b-128k/avg/1000.csv b/results/Yarn-Mistral-7b-128k/avg/1000.csv new file mode 100644 index 0000000000000000000000000000000000000000..39467bcb402a65ea39eb4898a1c28fc11507f9c6 --- /dev/null +++ b/results/Yarn-Mistral-7b-128k/avg/1000.csv @@ -0,0 +1,2 @@ +result +0.52 diff --git a/results/Yarn-Mistral-7b-128k/avg/128000.csv b/results/Yarn-Mistral-7b-128k/avg/128000.csv new file mode 100644 index 0000000000000000000000000000000000000000..0b053187266d9bf5c3e7fbc2f34fbabcbb35b1c9 --- /dev/null +++ b/results/Yarn-Mistral-7b-128k/avg/128000.csv @@ -0,0 +1,2 @@ +result +0.09 diff --git a/results/Yarn-Mistral-7b-128k/avg/16000.csv b/results/Yarn-Mistral-7b-128k/avg/16000.csv new file mode 100644 index 0000000000000000000000000000000000000000..372e6db8948b50d2a36c8d54b399e2b73d84a10b --- /dev/null +++ b/results/Yarn-Mistral-7b-128k/avg/16000.csv @@ -0,0 +1,2 @@ +result +0.3 diff --git a/results/Yarn-Mistral-7b-128k/avg/2000.csv b/results/Yarn-Mistral-7b-128k/avg/2000.csv new file mode 100644 index 0000000000000000000000000000000000000000..8ded6f83954586c44ad05174b7a3b44c84e6e261 --- /dev/null +++ b/results/Yarn-Mistral-7b-128k/avg/2000.csv @@ -0,0 +1,2 @@ +result +0.43 diff --git a/results/Yarn-Mistral-7b-128k/avg/32000.csv b/results/Yarn-Mistral-7b-128k/avg/32000.csv new file mode 100644 index 0000000000000000000000000000000000000000..f6db9c63579e516a6fccaae3048e30f40f5fb7bd --- /dev/null +++ b/results/Yarn-Mistral-7b-128k/avg/32000.csv @@ -0,0 +1,2 @@ +result +0.16 diff --git a/results/Yarn-Mistral-7b-128k/avg/4000.csv b/results/Yarn-Mistral-7b-128k/avg/4000.csv new file mode 100644 index 0000000000000000000000000000000000000000..7ce655c90b50c89eda745a06fd2d5d6da91ed9e6 --- /dev/null +++ b/results/Yarn-Mistral-7b-128k/avg/4000.csv @@ -0,0 +1,2 @@ +result +0.4 diff --git a/results/Yarn-Mistral-7b-128k/avg/64000.csv b/results/Yarn-Mistral-7b-128k/avg/64000.csv new file mode 100644 index 0000000000000000000000000000000000000000..3dd2162250ba01e8e3f1241626a410b2757ba120 --- /dev/null +++ b/results/Yarn-Mistral-7b-128k/avg/64000.csv @@ -0,0 +1,2 @@ +result +0.1 diff --git a/results/Yarn-Mistral-7b-128k/avg/8000.csv b/results/Yarn-Mistral-7b-128k/avg/8000.csv new file mode 100644 index 0000000000000000000000000000000000000000..d220a0008f0e11c42ab018c0243594c09e74f118 --- /dev/null +++ b/results/Yarn-Mistral-7b-128k/avg/8000.csv @@ -0,0 +1,2 @@ +result +0.38 diff --git a/results/activation-beacon-llama2-7b-chat/avg/0.csv b/results/activation-beacon-llama2-7b-chat/avg/0.csv new file mode 100644 index 0000000000000000000000000000000000000000..d1af3c1139d6c25d52f1f6e3ec43e2d84a1f69c7 --- /dev/null +++ b/results/activation-beacon-llama2-7b-chat/avg/0.csv @@ -0,0 +1,2 @@ +result +0.55 diff --git a/results/activation-beacon-llama2-7b-chat/avg/1000.csv b/results/activation-beacon-llama2-7b-chat/avg/1000.csv new file mode 100644 index 0000000000000000000000000000000000000000..39467bcb402a65ea39eb4898a1c28fc11507f9c6 --- /dev/null +++ b/results/activation-beacon-llama2-7b-chat/avg/1000.csv @@ -0,0 +1,2 @@ +result +0.52 diff --git a/results/activation-beacon-llama2-7b-chat/avg/128000.csv b/results/activation-beacon-llama2-7b-chat/avg/128000.csv new file mode 100644 index 0000000000000000000000000000000000000000..86b92dd0e26fa2fea5cc68e01132af62350a3a59 --- /dev/null +++ b/results/activation-beacon-llama2-7b-chat/avg/128000.csv @@ -0,0 +1,2 @@ +result +0.06 diff --git a/results/activation-beacon-llama2-7b-chat/avg/16000.csv b/results/activation-beacon-llama2-7b-chat/avg/16000.csv new file mode 100644 index 0000000000000000000000000000000000000000..5ab29f154b21da70f9778e41715bff64df1d4d64 --- /dev/null +++ b/results/activation-beacon-llama2-7b-chat/avg/16000.csv @@ -0,0 +1,2 @@ +result +0.23 diff --git a/results/activation-beacon-llama2-7b-chat/avg/2000.csv b/results/activation-beacon-llama2-7b-chat/avg/2000.csv new file mode 100644 index 0000000000000000000000000000000000000000..153d5a3b84e4e0e48ae0076c64cf47afb8b264f0 --- /dev/null +++ b/results/activation-beacon-llama2-7b-chat/avg/2000.csv @@ -0,0 +1,2 @@ +result +0.47 diff --git a/results/activation-beacon-llama2-7b-chat/avg/32000.csv b/results/activation-beacon-llama2-7b-chat/avg/32000.csv new file mode 100644 index 0000000000000000000000000000000000000000..f6db9c63579e516a6fccaae3048e30f40f5fb7bd --- /dev/null +++ b/results/activation-beacon-llama2-7b-chat/avg/32000.csv @@ -0,0 +1,2 @@ +result +0.16 diff --git a/results/activation-beacon-llama2-7b-chat/avg/4000.csv b/results/activation-beacon-llama2-7b-chat/avg/4000.csv new file mode 100644 index 0000000000000000000000000000000000000000..8ded6f83954586c44ad05174b7a3b44c84e6e261 --- /dev/null +++ b/results/activation-beacon-llama2-7b-chat/avg/4000.csv @@ -0,0 +1,2 @@ +result +0.43 diff --git a/results/activation-beacon-llama2-7b-chat/avg/64000.csv b/results/activation-beacon-llama2-7b-chat/avg/64000.csv new file mode 100644 index 0000000000000000000000000000000000000000..1bbf04acff1bd08a47015d76283cad0180c9e78c --- /dev/null +++ b/results/activation-beacon-llama2-7b-chat/avg/64000.csv @@ -0,0 +1,2 @@ +result +0.08 diff --git a/results/activation-beacon-llama2-7b-chat/avg/8000.csv b/results/activation-beacon-llama2-7b-chat/avg/8000.csv new file mode 100644 index 0000000000000000000000000000000000000000..740a999619db874dd9e457f930dc8f01c8c78f33 --- /dev/null +++ b/results/activation-beacon-llama2-7b-chat/avg/8000.csv @@ -0,0 +1,2 @@ +result +0.36 diff --git a/results/activation-beacon-mistral-7b/avg/0.csv b/results/activation-beacon-mistral-7b/avg/0.csv new file mode 100644 index 0000000000000000000000000000000000000000..1fa3b0194650ca40fe580d554b8cf29e18fb4d4b --- /dev/null +++ b/results/activation-beacon-mistral-7b/avg/0.csv @@ -0,0 +1,2 @@ +result +0.59 diff --git a/results/activation-beacon-mistral-7b/avg/1000.csv b/results/activation-beacon-mistral-7b/avg/1000.csv new file mode 100644 index 0000000000000000000000000000000000000000..7e19811b296e6cf21c8a6d216c28c84251eb122d --- /dev/null +++ b/results/activation-beacon-mistral-7b/avg/1000.csv @@ -0,0 +1,2 @@ +result +0.56 diff --git a/results/activation-beacon-mistral-7b/avg/128000.csv b/results/activation-beacon-mistral-7b/avg/128000.csv new file mode 100644 index 0000000000000000000000000000000000000000..2e4950cdc3d91596fe0128c450b34c9b0e91d91b --- /dev/null +++ b/results/activation-beacon-mistral-7b/avg/128000.csv @@ -0,0 +1,2 @@ +result +0.14 diff --git a/results/activation-beacon-mistral-7b/avg/16000.csv b/results/activation-beacon-mistral-7b/avg/16000.csv new file mode 100644 index 0000000000000000000000000000000000000000..ce21d5886fbcbcae20b93146113ddcd40f561ebf --- /dev/null +++ b/results/activation-beacon-mistral-7b/avg/16000.csv @@ -0,0 +1,2 @@ +result +0.37 diff --git a/results/activation-beacon-mistral-7b/avg/2000.csv b/results/activation-beacon-mistral-7b/avg/2000.csv new file mode 100644 index 0000000000000000000000000000000000000000..197c7d251ccb4bc0c03d2533cb5529c1fe530129 --- /dev/null +++ b/results/activation-beacon-mistral-7b/avg/2000.csv @@ -0,0 +1,2 @@ +result +0.51 diff --git a/results/activation-beacon-mistral-7b/avg/32000.csv b/results/activation-beacon-mistral-7b/avg/32000.csv new file mode 100644 index 0000000000000000000000000000000000000000..740a999619db874dd9e457f930dc8f01c8c78f33 --- /dev/null +++ b/results/activation-beacon-mistral-7b/avg/32000.csv @@ -0,0 +1,2 @@ +result +0.36 diff --git a/results/activation-beacon-mistral-7b/avg/4000.csv b/results/activation-beacon-mistral-7b/avg/4000.csv new file mode 100644 index 0000000000000000000000000000000000000000..ea17e680f171042bd7e193d2fe91edb1f4a271e9 --- /dev/null +++ b/results/activation-beacon-mistral-7b/avg/4000.csv @@ -0,0 +1,2 @@ +result +0.48 diff --git a/results/activation-beacon-mistral-7b/avg/64000.csv b/results/activation-beacon-mistral-7b/avg/64000.csv new file mode 100644 index 0000000000000000000000000000000000000000..0a5e1292a7ec9eec0be5ab1ed37f761feec7241b --- /dev/null +++ b/results/activation-beacon-mistral-7b/avg/64000.csv @@ -0,0 +1,2 @@ +result +0.27 diff --git a/results/activation-beacon-mistral-7b/avg/8000.csv b/results/activation-beacon-mistral-7b/avg/8000.csv new file mode 100644 index 0000000000000000000000000000000000000000..8ded6f83954586c44ad05174b7a3b44c84e6e261 --- /dev/null +++ b/results/activation-beacon-mistral-7b/avg/8000.csv @@ -0,0 +1,2 @@ +result +0.43 diff --git a/results/ai21labs Jamba-v0.1/avg/0.csv b/results/ai21labs Jamba-v0.1/avg/0.csv new file mode 100644 index 0000000000000000000000000000000000000000..4966aebbf8b964c945f71bad33d92c5102844c2d --- /dev/null +++ b/results/ai21labs Jamba-v0.1/avg/0.csv @@ -0,0 +1,2 @@ +result +0.65 diff --git a/results/ai21labs Jamba-v0.1/avg/1000.csv b/results/ai21labs Jamba-v0.1/avg/1000.csv new file mode 100644 index 0000000000000000000000000000000000000000..b3596ba97e928d291ed1c63db4564056b44e7785 --- /dev/null +++ b/results/ai21labs Jamba-v0.1/avg/1000.csv @@ -0,0 +1,2 @@ +result +0.53 diff --git a/results/ai21labs Jamba-v0.1/avg/128000.csv b/results/ai21labs Jamba-v0.1/avg/128000.csv new file mode 100644 index 0000000000000000000000000000000000000000..a35ac122d663d8cbc1c0af4a9995f6290585cd61 --- /dev/null +++ b/results/ai21labs Jamba-v0.1/avg/128000.csv @@ -0,0 +1,2 @@ +result +0.34 diff --git a/results/ai21labs Jamba-v0.1/avg/16000.csv b/results/ai21labs Jamba-v0.1/avg/16000.csv new file mode 100644 index 0000000000000000000000000000000000000000..89df95907c79f67b0441bcd8e01916d5ed3147ca --- /dev/null +++ b/results/ai21labs Jamba-v0.1/avg/16000.csv @@ -0,0 +1,2 @@ +result +0.45 diff --git a/results/ai21labs Jamba-v0.1/avg/2000.csv b/results/ai21labs Jamba-v0.1/avg/2000.csv new file mode 100644 index 0000000000000000000000000000000000000000..e8921d51a14f6c6af82b659f2641c450327d151a --- /dev/null +++ b/results/ai21labs Jamba-v0.1/avg/2000.csv @@ -0,0 +1,2 @@ +result +0.5 diff --git a/results/ai21labs Jamba-v0.1/avg/32000.csv b/results/ai21labs Jamba-v0.1/avg/32000.csv new file mode 100644 index 0000000000000000000000000000000000000000..67a246541636923a7023f90c1eda15aeb026f580 --- /dev/null +++ b/results/ai21labs Jamba-v0.1/avg/32000.csv @@ -0,0 +1,2 @@ +result +0.41 diff --git a/results/ai21labs Jamba-v0.1/avg/4000.csv b/results/ai21labs Jamba-v0.1/avg/4000.csv new file mode 100644 index 0000000000000000000000000000000000000000..ea17e680f171042bd7e193d2fe91edb1f4a271e9 --- /dev/null +++ b/results/ai21labs Jamba-v0.1/avg/4000.csv @@ -0,0 +1,2 @@ +result +0.48 diff --git a/results/ai21labs Jamba-v0.1/avg/64000.csv b/results/ai21labs Jamba-v0.1/avg/64000.csv new file mode 100644 index 0000000000000000000000000000000000000000..7ce655c90b50c89eda745a06fd2d5d6da91ed9e6 --- /dev/null +++ b/results/ai21labs Jamba-v0.1/avg/64000.csv @@ -0,0 +1,2 @@ +result +0.4 diff --git a/results/ai21labs Jamba-v0.1/avg/8000.csv b/results/ai21labs Jamba-v0.1/avg/8000.csv new file mode 100644 index 0000000000000000000000000000000000000000..84a35040259e9a3c25d02785e6a8333143557008 --- /dev/null +++ b/results/ai21labs Jamba-v0.1/avg/8000.csv @@ -0,0 +1,2 @@ +result +0.46 diff --git a/results/c4ai-command-r-v01/avg/0.csv b/results/c4ai-command-r-v01/avg/0.csv new file mode 100644 index 0000000000000000000000000000000000000000..235fdb9d924c52d3bfbeef527b47fcca8303f6b1 --- /dev/null +++ b/results/c4ai-command-r-v01/avg/0.csv @@ -0,0 +1,2 @@ +result +0.64 diff --git a/results/c4ai-command-r-v01/avg/1000.csv b/results/c4ai-command-r-v01/avg/1000.csv new file mode 100644 index 0000000000000000000000000000000000000000..235fdb9d924c52d3bfbeef527b47fcca8303f6b1 --- /dev/null +++ b/results/c4ai-command-r-v01/avg/1000.csv @@ -0,0 +1,2 @@ +result +0.64 diff --git a/results/c4ai-command-r-v01/avg/128000.csv b/results/c4ai-command-r-v01/avg/128000.csv new file mode 100644 index 0000000000000000000000000000000000000000..d220a0008f0e11c42ab018c0243594c09e74f118 --- /dev/null +++ b/results/c4ai-command-r-v01/avg/128000.csv @@ -0,0 +1,2 @@ +result +0.38 diff --git a/results/c4ai-command-r-v01/avg/16000.csv b/results/c4ai-command-r-v01/avg/16000.csv new file mode 100644 index 0000000000000000000000000000000000000000..39467bcb402a65ea39eb4898a1c28fc11507f9c6 --- /dev/null +++ b/results/c4ai-command-r-v01/avg/16000.csv @@ -0,0 +1,2 @@ +result +0.52 diff --git a/results/c4ai-command-r-v01/avg/2000.csv b/results/c4ai-command-r-v01/avg/2000.csv new file mode 100644 index 0000000000000000000000000000000000000000..66043869d21fb20185f28d3a911a996089595b43 --- /dev/null +++ b/results/c4ai-command-r-v01/avg/2000.csv @@ -0,0 +1,2 @@ +result +0.63 diff --git a/results/c4ai-command-r-v01/avg/32000.csv b/results/c4ai-command-r-v01/avg/32000.csv new file mode 100644 index 0000000000000000000000000000000000000000..197c7d251ccb4bc0c03d2533cb5529c1fe530129 --- /dev/null +++ b/results/c4ai-command-r-v01/avg/32000.csv @@ -0,0 +1,2 @@ +result +0.51 diff --git a/results/c4ai-command-r-v01/avg/4000.csv b/results/c4ai-command-r-v01/avg/4000.csv new file mode 100644 index 0000000000000000000000000000000000000000..d20e89361832ddf1abbe151b539dc5cbc2fdb159 --- /dev/null +++ b/results/c4ai-command-r-v01/avg/4000.csv @@ -0,0 +1,2 @@ +result +0.61 diff --git a/results/c4ai-command-r-v01/avg/64000.csv b/results/c4ai-command-r-v01/avg/64000.csv new file mode 100644 index 0000000000000000000000000000000000000000..84a35040259e9a3c25d02785e6a8333143557008 --- /dev/null +++ b/results/c4ai-command-r-v01/avg/64000.csv @@ -0,0 +1,2 @@ +result +0.46 diff --git a/results/c4ai-command-r-v01/avg/8000.csv b/results/c4ai-command-r-v01/avg/8000.csv new file mode 100644 index 0000000000000000000000000000000000000000..1fa3b0194650ca40fe580d554b8cf29e18fb4d4b --- /dev/null +++ b/results/c4ai-command-r-v01/avg/8000.csv @@ -0,0 +1,2 @@ +result +0.59 diff --git a/results/chatglm3-6b-128k/avg/0.csv b/results/chatglm3-6b-128k/avg/0.csv new file mode 100644 index 0000000000000000000000000000000000000000..7e19811b296e6cf21c8a6d216c28c84251eb122d --- /dev/null +++ b/results/chatglm3-6b-128k/avg/0.csv @@ -0,0 +1,2 @@ +result +0.56 diff --git a/results/chatglm3-6b-128k/avg/1000.csv b/results/chatglm3-6b-128k/avg/1000.csv new file mode 100644 index 0000000000000000000000000000000000000000..d1af3c1139d6c25d52f1f6e3ec43e2d84a1f69c7 --- /dev/null +++ b/results/chatglm3-6b-128k/avg/1000.csv @@ -0,0 +1,2 @@ +result +0.55 diff --git a/results/chatglm3-6b-128k/avg/128000.csv b/results/chatglm3-6b-128k/avg/128000.csv new file mode 100644 index 0000000000000000000000000000000000000000..a2b1af532eedf76087542137a189b2da2c33a421 --- /dev/null +++ b/results/chatglm3-6b-128k/avg/128000.csv @@ -0,0 +1,2 @@ +result +0.13 diff --git a/results/chatglm3-6b-128k/avg/16000.csv b/results/chatglm3-6b-128k/avg/16000.csv new file mode 100644 index 0000000000000000000000000000000000000000..67a246541636923a7023f90c1eda15aeb026f580 --- /dev/null +++ b/results/chatglm3-6b-128k/avg/16000.csv @@ -0,0 +1,2 @@ +result +0.41 diff --git a/results/chatglm3-6b-128k/avg/2000.csv b/results/chatglm3-6b-128k/avg/2000.csv new file mode 100644 index 0000000000000000000000000000000000000000..197c7d251ccb4bc0c03d2533cb5529c1fe530129 --- /dev/null +++ b/results/chatglm3-6b-128k/avg/2000.csv @@ -0,0 +1,2 @@ +result +0.51 diff --git a/results/chatglm3-6b-128k/avg/32000.csv b/results/chatglm3-6b-128k/avg/32000.csv new file mode 100644 index 0000000000000000000000000000000000000000..740a999619db874dd9e457f930dc8f01c8c78f33 --- /dev/null +++ b/results/chatglm3-6b-128k/avg/32000.csv @@ -0,0 +1,2 @@ +result +0.36 diff --git a/results/chatglm3-6b-128k/avg/4000.csv b/results/chatglm3-6b-128k/avg/4000.csv new file mode 100644 index 0000000000000000000000000000000000000000..ea17e680f171042bd7e193d2fe91edb1f4a271e9 --- /dev/null +++ b/results/chatglm3-6b-128k/avg/4000.csv @@ -0,0 +1,2 @@ +result +0.48 diff --git a/results/chatglm3-6b-128k/avg/64000.csv b/results/chatglm3-6b-128k/avg/64000.csv new file mode 100644 index 0000000000000000000000000000000000000000..246a133d32405f187fce2a1d910781fb6ed86d87 --- /dev/null +++ b/results/chatglm3-6b-128k/avg/64000.csv @@ -0,0 +1,2 @@ +result +0.21 diff --git a/results/chatglm3-6b-128k/avg/8000.csv b/results/chatglm3-6b-128k/avg/8000.csv new file mode 100644 index 0000000000000000000000000000000000000000..84a35040259e9a3c25d02785e6a8333143557008 --- /dev/null +++ b/results/chatglm3-6b-128k/avg/8000.csv @@ -0,0 +1,2 @@ +result +0.46 diff --git a/results/longchat-7b-v1.5-32k/avg/0.csv b/results/longchat-7b-v1.5-32k/avg/0.csv new file mode 100644 index 0000000000000000000000000000000000000000..84a35040259e9a3c25d02785e6a8333143557008 --- /dev/null +++ b/results/longchat-7b-v1.5-32k/avg/0.csv @@ -0,0 +1,2 @@ +result +0.46 diff --git a/results/longchat-7b-v1.5-32k/avg/1000.csv b/results/longchat-7b-v1.5-32k/avg/1000.csv new file mode 100644 index 0000000000000000000000000000000000000000..82f83f2880089b96caf335389691d4e4aefd72a4 --- /dev/null +++ b/results/longchat-7b-v1.5-32k/avg/1000.csv @@ -0,0 +1,2 @@ +result +0.42 diff --git a/results/longchat-7b-v1.5-32k/avg/16000.csv b/results/longchat-7b-v1.5-32k/avg/16000.csv new file mode 100644 index 0000000000000000000000000000000000000000..090b0cf09c2910e11dbaf6d48af4c6ae4efe87ad --- /dev/null +++ b/results/longchat-7b-v1.5-32k/avg/16000.csv @@ -0,0 +1,2 @@ +result +0.39 diff --git a/results/longchat-7b-v1.5-32k/avg/2000.csv b/results/longchat-7b-v1.5-32k/avg/2000.csv new file mode 100644 index 0000000000000000000000000000000000000000..7ce655c90b50c89eda745a06fd2d5d6da91ed9e6 --- /dev/null +++ b/results/longchat-7b-v1.5-32k/avg/2000.csv @@ -0,0 +1,2 @@ +result +0.4 diff --git a/results/longchat-7b-v1.5-32k/avg/32000.csv b/results/longchat-7b-v1.5-32k/avg/32000.csv new file mode 100644 index 0000000000000000000000000000000000000000..11a090972d80aec2ff845c30aab47407be2352d9 --- /dev/null +++ b/results/longchat-7b-v1.5-32k/avg/32000.csv @@ -0,0 +1,2 @@ +result +0.05 diff --git a/results/longchat-7b-v1.5-32k/avg/4000.csv b/results/longchat-7b-v1.5-32k/avg/4000.csv new file mode 100644 index 0000000000000000000000000000000000000000..67a246541636923a7023f90c1eda15aeb026f580 --- /dev/null +++ b/results/longchat-7b-v1.5-32k/avg/4000.csv @@ -0,0 +1,2 @@ +result +0.41 diff --git a/results/longchat-7b-v1.5-32k/avg/8000.csv b/results/longchat-7b-v1.5-32k/avg/8000.csv new file mode 100644 index 0000000000000000000000000000000000000000..82f83f2880089b96caf335389691d4e4aefd72a4 --- /dev/null +++ b/results/longchat-7b-v1.5-32k/avg/8000.csv @@ -0,0 +1,2 @@ +result +0.42 diff --git a/results/mamba-2.8b-hf/avg/0.csv b/results/mamba-2.8b-hf/avg/0.csv new file mode 100644 index 0000000000000000000000000000000000000000..30b8677e38c6147f378843d83cdb738e20fb9c5a --- /dev/null +++ b/results/mamba-2.8b-hf/avg/0.csv @@ -0,0 +1,2 @@ +result +0.7 diff --git a/results/mamba-2.8b-hf/avg/1000.csv b/results/mamba-2.8b-hf/avg/1000.csv new file mode 100644 index 0000000000000000000000000000000000000000..39467bcb402a65ea39eb4898a1c28fc11507f9c6 --- /dev/null +++ b/results/mamba-2.8b-hf/avg/1000.csv @@ -0,0 +1,2 @@ +result +0.52 diff --git a/results/mamba-2.8b-hf/avg/2000.csv b/results/mamba-2.8b-hf/avg/2000.csv new file mode 100644 index 0000000000000000000000000000000000000000..5be5e2b842b0151eeaaf1248a55b12e87c0f1f2c --- /dev/null +++ b/results/mamba-2.8b-hf/avg/2000.csv @@ -0,0 +1,2 @@ +result +0.35 diff --git a/results/mamba-2.8b-hf/avg/4000.csv b/results/mamba-2.8b-hf/avg/4000.csv new file mode 100644 index 0000000000000000000000000000000000000000..0b053187266d9bf5c3e7fbc2f34fbabcbb35b1c9 --- /dev/null +++ b/results/mamba-2.8b-hf/avg/4000.csv @@ -0,0 +1,2 @@ +result +0.09 diff --git a/results/mamba-2.8b-hf/avg/8000.csv b/results/mamba-2.8b-hf/avg/8000.csv new file mode 100644 index 0000000000000000000000000000000000000000..9e437f1fcd823b898c1f4556c0805a4b31957695 --- /dev/null +++ b/results/mamba-2.8b-hf/avg/8000.csv @@ -0,0 +1,2 @@ +result +0.0 diff --git a/results/rwkv-6-world-7b/avg/0.csv b/results/rwkv-6-world-7b/avg/0.csv new file mode 100644 index 0000000000000000000000000000000000000000..7e19811b296e6cf21c8a6d216c28c84251eb122d --- /dev/null +++ b/results/rwkv-6-world-7b/avg/0.csv @@ -0,0 +1,2 @@ +result +0.56 diff --git a/results/rwkv-6-world-7b/avg/1000.csv b/results/rwkv-6-world-7b/avg/1000.csv new file mode 100644 index 0000000000000000000000000000000000000000..d1af3c1139d6c25d52f1f6e3ec43e2d84a1f69c7 --- /dev/null +++ b/results/rwkv-6-world-7b/avg/1000.csv @@ -0,0 +1,2 @@ +result +0.55 diff --git a/results/rwkv-6-world-7b/avg/2000.csv b/results/rwkv-6-world-7b/avg/2000.csv new file mode 100644 index 0000000000000000000000000000000000000000..ea17e680f171042bd7e193d2fe91edb1f4a271e9 --- /dev/null +++ b/results/rwkv-6-world-7b/avg/2000.csv @@ -0,0 +1,2 @@ +result +0.48 diff --git a/results/rwkv-6-world-7b/avg/4000.csv b/results/rwkv-6-world-7b/avg/4000.csv new file mode 100644 index 0000000000000000000000000000000000000000..5be5e2b842b0151eeaaf1248a55b12e87c0f1f2c --- /dev/null +++ b/results/rwkv-6-world-7b/avg/4000.csv @@ -0,0 +1,2 @@ +result +0.35 diff --git a/results/rwkv-6-world-7b/avg/8000.csv b/results/rwkv-6-world-7b/avg/8000.csv new file mode 100644 index 0000000000000000000000000000000000000000..8fc9f31a839fd188fa8898df3ecfd89dd57b2c57 --- /dev/null +++ b/results/rwkv-6-world-7b/avg/8000.csv @@ -0,0 +1,2 @@ +result +0.07 diff --git a/results/v5-Eagle-7B-HF/avg/0.csv b/results/v5-Eagle-7B-HF/avg/0.csv new file mode 100644 index 0000000000000000000000000000000000000000..90476c2461795796d79aacaea3589b52b4c7a571 --- /dev/null +++ b/results/v5-Eagle-7B-HF/avg/0.csv @@ -0,0 +1,2 @@ +result +0.62 diff --git a/results/v5-Eagle-7B-HF/avg/1000.csv b/results/v5-Eagle-7B-HF/avg/1000.csv new file mode 100644 index 0000000000000000000000000000000000000000..55837811362d6f27a37b7a72f0edcb238c90ccf6 --- /dev/null +++ b/results/v5-Eagle-7B-HF/avg/1000.csv @@ -0,0 +1,2 @@ +result +0.54 diff --git a/results/v5-Eagle-7B-HF/avg/2000.csv b/results/v5-Eagle-7B-HF/avg/2000.csv new file mode 100644 index 0000000000000000000000000000000000000000..ea17e680f171042bd7e193d2fe91edb1f4a271e9 --- /dev/null +++ b/results/v5-Eagle-7B-HF/avg/2000.csv @@ -0,0 +1,2 @@ +result +0.48 diff --git a/results/v5-Eagle-7B-HF/avg/4000.csv b/results/v5-Eagle-7B-HF/avg/4000.csv new file mode 100644 index 0000000000000000000000000000000000000000..67a246541636923a7023f90c1eda15aeb026f580 --- /dev/null +++ b/results/v5-Eagle-7B-HF/avg/4000.csv @@ -0,0 +1,2 @@ +result +0.41 diff --git a/results/v5-Eagle-7B-HF/avg/8000.csv b/results/v5-Eagle-7B-HF/avg/8000.csv new file mode 100644 index 0000000000000000000000000000000000000000..4acadf7f9b2babb34a649576957ab9cd4145f88d --- /dev/null +++ b/results/v5-Eagle-7B-HF/avg/8000.csv @@ -0,0 +1,2 @@ +result +0.02 diff --git a/results/~ ARMT (137M) fine-tune/avg/0.csv b/results/~ ARMT (137M) fine-tune/avg/0.csv new file mode 100644 index 0000000000000000000000000000000000000000..b8927d83d4b33cf0391dc05c87ce4da1b1666c64 --- /dev/null +++ b/results/~ ARMT (137M) fine-tune/avg/0.csv @@ -0,0 +1,2 @@ +result +0.9932 diff --git a/results/~ ARMT (137M) fine-tune/avg/1000000.csv b/results/~ ARMT (137M) fine-tune/avg/1000000.csv new file mode 100644 index 0000000000000000000000000000000000000000..b4f630a2846c9003dd0d35ef4c9ec7f55a3bc893 --- /dev/null +++ b/results/~ ARMT (137M) fine-tune/avg/1000000.csv @@ -0,0 +1,2 @@ +result +0.934 diff --git a/results/~ ARMT (137M) fine-tune/avg/10000000.csv b/results/~ ARMT (137M) fine-tune/avg/10000000.csv new file mode 100644 index 0000000000000000000000000000000000000000..dd32bf97d7fd4d9d175735f83d69d7507364b018 --- /dev/null +++ b/results/~ ARMT (137M) fine-tune/avg/10000000.csv @@ -0,0 +1,2 @@ +result +0.7659999999999999 diff --git a/results/~ ARMT (137M) fine-tune/avg/128000.csv b/results/~ ARMT (137M) fine-tune/avg/128000.csv new file mode 100644 index 0000000000000000000000000000000000000000..bba3edb4d9e88824b6cb532710a74d3d2081266e --- /dev/null +++ b/results/~ ARMT (137M) fine-tune/avg/128000.csv @@ -0,0 +1,2 @@ +result +0.9690000000000001 diff --git a/results/~ ARMT (137M) fine-tune/avg/16000.csv b/results/~ ARMT (137M) fine-tune/avg/16000.csv new file mode 100644 index 0000000000000000000000000000000000000000..a954a79438d45afec3f9650e2d151eeb609e3ca8 --- /dev/null +++ b/results/~ ARMT (137M) fine-tune/avg/16000.csv @@ -0,0 +1,2 @@ +result +0.981 diff --git a/results/~ ARMT (137M) fine-tune/avg/32000.csv b/results/~ ARMT (137M) fine-tune/avg/32000.csv new file mode 100644 index 0000000000000000000000000000000000000000..9b92468b080e05d8e2a04b3f2f443e2ecdb03cdc --- /dev/null +++ b/results/~ ARMT (137M) fine-tune/avg/32000.csv @@ -0,0 +1,2 @@ +result +0.98 diff --git a/results/~ ARMT (137M) fine-tune/avg/4000.csv b/results/~ ARMT (137M) fine-tune/avg/4000.csv new file mode 100644 index 0000000000000000000000000000000000000000..a954a79438d45afec3f9650e2d151eeb609e3ca8 --- /dev/null +++ b/results/~ ARMT (137M) fine-tune/avg/4000.csv @@ -0,0 +1,2 @@ +result +0.981 diff --git a/results/~ ARMT (137M) fine-tune/avg/500000.csv b/results/~ ARMT (137M) fine-tune/avg/500000.csv new file mode 100644 index 0000000000000000000000000000000000000000..00e3093e5457665a78622bd377dc5c77bcfa8549 --- /dev/null +++ b/results/~ ARMT (137M) fine-tune/avg/500000.csv @@ -0,0 +1,2 @@ +result +0.953 diff --git a/results/~ ARMT (137M) fine-tune/avg/64000.csv b/results/~ ARMT (137M) fine-tune/avg/64000.csv new file mode 100644 index 0000000000000000000000000000000000000000..3e5622225f7370911f29a7052015db97da8ae02a --- /dev/null +++ b/results/~ ARMT (137M) fine-tune/avg/64000.csv @@ -0,0 +1,2 @@ +result +0.9790000000000001 diff --git a/results/~ ARMT (137M) fine-tune/avg/8000.csv b/results/~ ARMT (137M) fine-tune/avg/8000.csv new file mode 100644 index 0000000000000000000000000000000000000000..a3476c7e769bcb5d3d2f0b6a740cd784f154f903 --- /dev/null +++ b/results/~ ARMT (137M) fine-tune/avg/8000.csv @@ -0,0 +1,2 @@ +result +0.982 diff --git a/results/~ Mamba (130M) fine-tune/avg/128000.csv b/results/~ Mamba (130M) fine-tune/avg/128000.csv new file mode 100644 index 0000000000000000000000000000000000000000..b25ae7f18f736e6f40e42e2e728e5595fd2d23ea --- /dev/null +++ b/results/~ Mamba (130M) fine-tune/avg/128000.csv @@ -0,0 +1,2 @@ +result +0.925 diff --git a/results/~ Mamba (130M) fine-tune/avg/16000.csv b/results/~ Mamba (130M) fine-tune/avg/16000.csv new file mode 100644 index 0000000000000000000000000000000000000000..56ee6362b1194a7b16b0a90570a722d25caea106 --- /dev/null +++ b/results/~ Mamba (130M) fine-tune/avg/16000.csv @@ -0,0 +1,2 @@ +result +0.985 diff --git a/results/~ Mamba (130M) fine-tune/avg/32000.csv b/results/~ Mamba (130M) fine-tune/avg/32000.csv new file mode 100644 index 0000000000000000000000000000000000000000..a954a79438d45afec3f9650e2d151eeb609e3ca8 --- /dev/null +++ b/results/~ Mamba (130M) fine-tune/avg/32000.csv @@ -0,0 +1,2 @@ +result +0.981 diff --git a/results/~ Mamba (130M) fine-tune/avg/4000.csv b/results/~ Mamba (130M) fine-tune/avg/4000.csv new file mode 100644 index 0000000000000000000000000000000000000000..19164839373fca15d08954246496aecda5c86ef2 --- /dev/null +++ b/results/~ Mamba (130M) fine-tune/avg/4000.csv @@ -0,0 +1,2 @@ +result +0.987 diff --git a/results/~ Mamba (130M) fine-tune/avg/64000.csv b/results/~ Mamba (130M) fine-tune/avg/64000.csv new file mode 100644 index 0000000000000000000000000000000000000000..f403ecb7accc530a5f2556fc20afd0b451757307 --- /dev/null +++ b/results/~ Mamba (130M) fine-tune/avg/64000.csv @@ -0,0 +1,2 @@ +result +0.97 diff --git a/results/~ Mamba (130M) fine-tune/avg/8000.csv b/results/~ Mamba (130M) fine-tune/avg/8000.csv new file mode 100644 index 0000000000000000000000000000000000000000..56ee6362b1194a7b16b0a90570a722d25caea106 --- /dev/null +++ b/results/~ Mamba (130M) fine-tune/avg/8000.csv @@ -0,0 +1,2 @@ +result +0.985 diff --git a/results/~ RMT (137M) fine-tune/avg/0.csv b/results/~ RMT (137M) fine-tune/avg/0.csv new file mode 100644 index 0000000000000000000000000000000000000000..e47efd6649b0171394f69ceba42ed5dce4493485 --- /dev/null +++ b/results/~ RMT (137M) fine-tune/avg/0.csv @@ -0,0 +1,2 @@ +result +0.9936 diff --git a/results/~ RMT (137M) fine-tune/avg/1000.csv b/results/~ RMT (137M) fine-tune/avg/1000.csv new file mode 100644 index 0000000000000000000000000000000000000000..c21f27b70cd7f93717d8b59c2455fbf253fac75f --- /dev/null +++ b/results/~ RMT (137M) fine-tune/avg/1000.csv @@ -0,0 +1,2 @@ +result +0.9740000000000001 diff --git a/results/~ RMT (137M) fine-tune/avg/1000000.csv b/results/~ RMT (137M) fine-tune/avg/1000000.csv new file mode 100644 index 0000000000000000000000000000000000000000..3bed8979178f1b6de75350884df2d71cc103c3bb --- /dev/null +++ b/results/~ RMT (137M) fine-tune/avg/1000000.csv @@ -0,0 +1,2 @@ +result +0.42840000000000006 diff --git a/results/~ RMT (137M) fine-tune/avg/10000000.csv b/results/~ RMT (137M) fine-tune/avg/10000000.csv new file mode 100644 index 0000000000000000000000000000000000000000..c64e035e06a8895022768bfb1db25dd36fb0e67f --- /dev/null +++ b/results/~ RMT (137M) fine-tune/avg/10000000.csv @@ -0,0 +1,2 @@ +result +0.3378 diff --git a/results/~ RMT (137M) fine-tune/avg/128000.csv b/results/~ RMT (137M) fine-tune/avg/128000.csv new file mode 100644 index 0000000000000000000000000000000000000000..0e6c9100642132c9129d9ddc8ff416086575e902 --- /dev/null +++ b/results/~ RMT (137M) fine-tune/avg/128000.csv @@ -0,0 +1,2 @@ +result +0.5852 diff --git a/results/~ RMT (137M) fine-tune/avg/16000.csv b/results/~ RMT (137M) fine-tune/avg/16000.csv new file mode 100644 index 0000000000000000000000000000000000000000..595b1f56b99e4dc155c6eefcf84362994631e27b --- /dev/null +++ b/results/~ RMT (137M) fine-tune/avg/16000.csv @@ -0,0 +1,2 @@ +result +0.8562000000000001 diff --git a/results/~ RMT (137M) fine-tune/avg/2000.csv b/results/~ RMT (137M) fine-tune/avg/2000.csv new file mode 100644 index 0000000000000000000000000000000000000000..c47e2d064a6f39ce226b148d38f75fd71c14d96f --- /dev/null +++ b/results/~ RMT (137M) fine-tune/avg/2000.csv @@ -0,0 +1,2 @@ +result +0.9466 diff --git a/results/~ RMT (137M) fine-tune/avg/32000.csv b/results/~ RMT (137M) fine-tune/avg/32000.csv new file mode 100644 index 0000000000000000000000000000000000000000..619306552f9ce4b4f586635db4bc54262c01ea21 --- /dev/null +++ b/results/~ RMT (137M) fine-tune/avg/32000.csv @@ -0,0 +1,2 @@ +result +0.7787999999999999 diff --git a/results/~ RMT (137M) fine-tune/avg/4000.csv b/results/~ RMT (137M) fine-tune/avg/4000.csv new file mode 100644 index 0000000000000000000000000000000000000000..065d9ec8479b0d5cb4efad01c8485f571e5e0b9b --- /dev/null +++ b/results/~ RMT (137M) fine-tune/avg/4000.csv @@ -0,0 +1,2 @@ +result +0.9231999999999999 diff --git a/results/~ RMT (137M) fine-tune/avg/500000.csv b/results/~ RMT (137M) fine-tune/avg/500000.csv new file mode 100644 index 0000000000000000000000000000000000000000..41d218450ceac5c3e3a7e3a8b6994fa5aa47794b --- /dev/null +++ b/results/~ RMT (137M) fine-tune/avg/500000.csv @@ -0,0 +1,2 @@ +result +0.4636 diff --git a/results/~ RMT (137M) fine-tune/avg/64000.csv b/results/~ RMT (137M) fine-tune/avg/64000.csv new file mode 100644 index 0000000000000000000000000000000000000000..ba0605a0ef7c4301d86891483096c00d9f81eb49 --- /dev/null +++ b/results/~ RMT (137M) fine-tune/avg/64000.csv @@ -0,0 +1,2 @@ +result +0.6986 diff --git a/results/~ RMT (137M) fine-tune/avg/8000.csv b/results/~ RMT (137M) fine-tune/avg/8000.csv new file mode 100644 index 0000000000000000000000000000000000000000..ed71caa3e0f078b4fb96916f7e257043d4d228cc --- /dev/null +++ b/results/~ RMT (137M) fine-tune/avg/8000.csv @@ -0,0 +1,2 @@ +result +0.899 diff --git a/results/~ RMT-Retrieval (137M) fine-tune/avg/0.csv b/results/~ RMT-Retrieval (137M) fine-tune/avg/0.csv new file mode 100644 index 0000000000000000000000000000000000000000..d7ed0ed39053e3c82b2e022596158d8c5c904f39 --- /dev/null +++ b/results/~ RMT-Retrieval (137M) fine-tune/avg/0.csv @@ -0,0 +1,2 @@ +result +0.9805999999999999 diff --git a/results/~ RMT-Retrieval (137M) fine-tune/avg/1000000.csv b/results/~ RMT-Retrieval (137M) fine-tune/avg/1000000.csv new file mode 100644 index 0000000000000000000000000000000000000000..a9f022f986a77dd6eda149640bc9fe167a69f969 --- /dev/null +++ b/results/~ RMT-Retrieval (137M) fine-tune/avg/1000000.csv @@ -0,0 +1,2 @@ +result +0.4436 diff --git a/results/~ RMT-Retrieval (137M) fine-tune/avg/10000000.csv b/results/~ RMT-Retrieval (137M) fine-tune/avg/10000000.csv new file mode 100644 index 0000000000000000000000000000000000000000..74044b719a3a56d4310b21610f91957b2aad2ad0 --- /dev/null +++ b/results/~ RMT-Retrieval (137M) fine-tune/avg/10000000.csv @@ -0,0 +1,2 @@ +result +0.32880000000000004 diff --git a/results/~ RMT-Retrieval (137M) fine-tune/avg/128000.csv b/results/~ RMT-Retrieval (137M) fine-tune/avg/128000.csv new file mode 100644 index 0000000000000000000000000000000000000000..d5ace03e36cda133583a570388078f97b1785064 --- /dev/null +++ b/results/~ RMT-Retrieval (137M) fine-tune/avg/128000.csv @@ -0,0 +1,2 @@ +result +0.6476 diff --git a/results/~ RMT-Retrieval (137M) fine-tune/avg/16000.csv b/results/~ RMT-Retrieval (137M) fine-tune/avg/16000.csv new file mode 100644 index 0000000000000000000000000000000000000000..fbc465a8646b1c3a66bb4f402dec446f25cca5ef --- /dev/null +++ b/results/~ RMT-Retrieval (137M) fine-tune/avg/16000.csv @@ -0,0 +1,2 @@ +result +0.8798 diff --git a/results/~ RMT-Retrieval (137M) fine-tune/avg/32000.csv b/results/~ RMT-Retrieval (137M) fine-tune/avg/32000.csv new file mode 100644 index 0000000000000000000000000000000000000000..b9dc297afec7799ffd4c4abb495943c7c6318a3f --- /dev/null +++ b/results/~ RMT-Retrieval (137M) fine-tune/avg/32000.csv @@ -0,0 +1,2 @@ +result +0.8276 diff --git a/results/~ RMT-Retrieval (137M) fine-tune/avg/4000.csv b/results/~ RMT-Retrieval (137M) fine-tune/avg/4000.csv new file mode 100644 index 0000000000000000000000000000000000000000..f203a347ff9be5874734e6015508d0b3186afec2 --- /dev/null +++ b/results/~ RMT-Retrieval (137M) fine-tune/avg/4000.csv @@ -0,0 +1,2 @@ +result +0.9260000000000002 diff --git a/results/~ RMT-Retrieval (137M) fine-tune/avg/500000.csv b/results/~ RMT-Retrieval (137M) fine-tune/avg/500000.csv new file mode 100644 index 0000000000000000000000000000000000000000..73017fd66de0a180c83fa309ebad2c236567c2b2 --- /dev/null +++ b/results/~ RMT-Retrieval (137M) fine-tune/avg/500000.csv @@ -0,0 +1,2 @@ +result +0.4856 diff --git a/results/~ RMT-Retrieval (137M) fine-tune/avg/64000.csv b/results/~ RMT-Retrieval (137M) fine-tune/avg/64000.csv new file mode 100644 index 0000000000000000000000000000000000000000..07f95a7be03251a3c6af20678c35f265dcb4563f --- /dev/null +++ b/results/~ RMT-Retrieval (137M) fine-tune/avg/64000.csv @@ -0,0 +1,2 @@ +result +0.7384000000000001 diff --git a/results/~ RMT-Retrieval (137M) fine-tune/avg/8000.csv b/results/~ RMT-Retrieval (137M) fine-tune/avg/8000.csv new file mode 100644 index 0000000000000000000000000000000000000000..85796b1c86d2a8718ed21550e66f588222a2ad43 --- /dev/null +++ b/results/~ RMT-Retrieval (137M) fine-tune/avg/8000.csv @@ -0,0 +1,2 @@ +result +0.9032