benchbench / cache /aggregate_scoress_cache_05c0405c5253dda90dc632e052accfd2.csv
Yotam-Perlitz
update cache
697d2f9
raw
history blame
3.53 kB
model,score
gemini_1_5_pro_api_0409_preview,1.0
gemini_1_5_pro_exp_0801,0.9921875
chatgpt_4o_latest,0.984375
gpt_3_5_turbo_0314,0.9765625
bard_jan_24_gemini_pro,0.96875
claude_1,0.9609375
gemini_advanced_0514,0.953125
llama3_1_70b_instruct,0.9453125
gpt_4o_2024_05_13,0.9375
gpt_4o_2024_08_06,0.9296875
gpt_4o_mini_2024_07_18,0.921875
claude_3_5_sonnet_20240620,0.9140625
claude_3_opus_20240229,0.90625
athene_70b_0725,0.8984375
gemini_pro_dev_api,0.890625
claude_2_0,0.8828125
glm_4_0520,0.875
nemotron_4_340b_instruct,0.8671875
yi_large_preview,0.859375
llama_2_70b_chat,0.8515625
reka_core_20240722,0.84375
gemini_1_5_pro_api_0514,0.8359375
gemini_pro,0.828125
llama3_1_405b_instruct,0.8203125
mistral_large_2407,0.8125
gpt_4_turbo_2024_04_09,0.8046875
gpt_4_1106_preview,0.796875
gpt_3_5_turbo_0613,0.7890625
gpt_4_0125_preview,0.78125
glm_4_0116,0.7734375
zephyr_orpo_141b_a35b_v0_1,0.765625
qwen_max_0428,0.7578125
claude_instant_1,0.75
yi_large,0.7421875
deepseek_coder_v2_0724,0.734375
deepseek_v2_api_0628,0.7265625
gemini_1_5_flash_api_0514,0.71875
llama3_70b_instruct,0.7109375
command_r_plus,0.703125
gpt_4_0314,0.6953125
claude_2_1,0.6875
wizardlm_70b,0.6796875
gemma_2_27b_it,0.671875
dolphin_2_2_1_mistral_7b,0.6640625
guanaco_33b,0.65625
nous_hermes_2_mixtral_8x7b_dpo,0.6484375
wizardlm_13b,0.640625
mpt_30b_chat,0.6328125
qwen1_5_110b_chat,0.625
claude_3_sonnet_20240229,0.6171875
mistral_next,0.609375
deepseek_coder_v2,0.6015625
reka_flash_21b_20240226_online,0.59375
starling_lm_7b_beta,0.5859375
llama2_70b_steerlm_chat,0.578125
mistral_medium,0.5703125
llama_2_13b_chat,0.5625
tulu_2_dpo_70b,0.5546875
reka_core_20240501,0.546875
gpt_4_0613,0.5390625
deepseek_llm_67b_chat,0.53125
solar_10_7b_instruct_v1_0,0.5234375
openchat_3_5_0106,0.515625
reka_flash_20240722,0.5078125
gemma_2_9b_it,0.5
llama3_1_8b_instruct,0.4921875
openchat_3_5,0.484375
pplx_7b_online,0.4765625
qwen1_5_72b_chat,0.46875
zephyr_7b_alpha,0.4609375
claude_3_haiku_20240307,0.453125
starling_lm_7b_alpha,0.4453125
reka_flash_21b_20240226,0.4375
mistral_large_2402,0.4296875
gpt_3_5_turbo_1106,0.421875
qwen1_5_7b_chat,0.4140625
reka_flash_preview_20240611,0.40625
yi_1_5_34b_chat,0.3984375
openhermes_2_5_mistral_7b,0.390625
codellama34b_instruct,0.3828125
qwen1_5_14b_chat,0.375
yi_34b_chat,0.3671875
pplx_70b_online,0.359375
qwen2_72b_instruct,0.3515625
dbrx_instructruct_preview,0.34375
llama3_8b_instruct,0.3359375
falcon_180b_chat,0.328125
palm_2,0.3203125
qwen_14b_chat,0.3125
stripedhyena_nous_7b,0.3046875
qwen1_5_32b_chat,0.296875
command_r,0.2890625
gemma_7b_it,0.28125
zephyr_7b_beta,0.2734375
mixtral_8x22b_instruct_v0_1,0.265625
vicuna_7b,0.2578125
snowflake_arctic_instruct,0.25
vicuna_33b,0.2421875
gemma_2_2b_it,0.234375
koala_13b,0.2265625
gpt_3_5_turbo_0125,0.21875
mistral_7b_instruct,0.2109375
llama_2_7b_chat,0.203125
mistral_7b_instruct_v0_2,0.1953125
gemma_1_1_7b_it,0.1875
gpt4all_13b_snoozy,0.1796875
phi_3_small_8k_instruct,0.171875
olmo_7b_instruct,0.1640625
phi_3_mini_4k_instruct,0.15625
phi_3_mini_128k_instruct,0.1484375
rwkv_4_raven_14b,0.140625
vicuna_13b,0.1328125
codellama_70b_instruct,0.125
mpt_7b_chat,0.1171875
mixtral_8x7b_instruct_v0_1,0.109375
phi_3_medium_4k_instruct,0.1015625
gemma_2b_it,0.09375
phi_3_mini_4k_instruct_june_2024,0.0859375
qwen1_5_4b_chat,0.078125
chatglm_6b,0.0703125
alpaca_13b,0.0625
gemma_1_1_2b_it,0.0546875
chatglm2_6b,0.046875
stablelm_tuned_alpha_7b,0.0390625
chatglm3_6b,0.03125
oasst_pythia_12b,0.0234375
llama_13b,0.015625
fastchat_t5_3b,0.0078125
dolly_v2_12b,0.0