|
dataset,model,length,op,accuracy |
|
symbolic,o1-mini,0,1,0.98 |
|
symbolic,o1-mini,0,11,1.0 |
|
symbolic,o1-mini,0,21,0.9 |
|
symbolic,o1-mini,0,31,0.64 |
|
symbolic,o1-mini,0,41,0.54 |
|
symbolic,o1-mini,0,51,0.46 |
|
symbolic,o1-mini,0,61,0.28 |
|
symbolic,o1-mini,0,71,0.24 |
|
symbolic,o1-mini,0,81,0.12 |
|
symbolic,o1-mini,0,91,0.08 |
|
symbolic,o1-mini,0,101,0.06 |
|
symbolic,o1-mini,0,111,0.08 |
|
symbolic,o1-mini,0,121,0.06 |
|
symbolic,o1-mini,0,131,0.02 |
|
symbolic,o1-mini,0,141,0.08 |
|
symbolic,o1-mini,0,151,0.02 |
|
realistic_Medium,mistral-large-2411,0,2,0.9219 |
|
realistic_Medium,mistral-large-2411,0,4,0.9844 |
|
realistic_Medium,mistral-large-2411,0,6,0.9688 |
|
realistic_Medium,mistral-large-2411,0,8,0.9375 |
|
realistic_Medium,mistral-large-2411,0,10,0.7422 |
|
realistic_Medium,mistral-large-2411,0,12,0.7266 |
|
realistic_Medium,mistral-large-2411,0,14,0.7578 |
|
realistic_Medium,mistral-large-2411,0,16,0.5703 |
|
realistic_Medium,mistral-large-2411,0,18,0.6719 |
|
realistic_Medium,mistral-large-2411,0,20,0.5625 |
|
realistic_Medium,mistral-large-2411,0,22,0.6172 |
|
realistic_Medium,mistral-large-2411,0,24,0.5469 |
|
realistic_Medium,mistral-large-2411,0,26,0.5625 |
|
realistic_Medium,mistral-large-2411,0,28,0.5312 |
|
realistic_Medium,mistral-large-2411,0,30,0.5078 |
|
realistic_Medium,mistral-large-2411,0,35,0.346154 |
|
realistic_Medium,mistral-large-2411,0,40,0.375 |
|
realistic_Medium,mistral-large-2411,0,45,0.298077 |
|
realistic_Medium,mistral-large-2411,0,50,0.25 |
|
realistic_Medium,mistral-large-2411,0,55,0.0865385 |
|
realistic_Medium,mistral-large-2411,0,60,0.125 |
|
realistic_Medium,mistral-large-2411,0,65,0.0576923 |
|
realistic_Medium,mistral-large-2411,0,70,0.0192308 |
|
realistic_Medium,mistral-large-2411,8000,2,0.8438 |
|
realistic_Medium,mistral-large-2411,8000,4,0.9141 |
|
realistic_Medium,mistral-large-2411,8000,6,0.8672 |
|
realistic_Medium,mistral-large-2411,8000,8,0.6641 |
|
realistic_Medium,mistral-large-2411,8000,10,0.5625 |
|
realistic_Medium,mistral-large-2411,8000,12,0.4453 |
|
realistic_Medium,mistral-large-2411,8000,14,0.3984 |
|
realistic_Medium,mistral-large-2411,8000,16,0.1719 |
|
realistic_Medium,mistral-large-2411,8000,18,0.2891 |
|
realistic_Medium,mistral-large-2411,8000,20,0.2656 |
|
realistic_Medium,mistral-large-2411,8000,22,0.2266 |
|
realistic_Medium,mistral-large-2411,8000,24,0.2578 |
|
realistic_Medium,mistral-large-2411,8000,26,0.1875 |
|
realistic_Medium,mistral-large-2411,8000,28,0.2188 |
|
realistic_Medium,mistral-large-2411,8000,30,0.2677 |
|
realistic_Medium,mistral-large-2411,16000,2,0.7188 |
|
realistic_Medium,mistral-large-2411,16000,6,0.8125 |
|
realistic_Medium,mistral-large-2411,16000,10,0.3906 |
|
realistic_Medium,mistral-large-2411,16000,14,0.3047 |
|
realistic_Medium,mistral-large-2411,16000,18,0.1953 |
|
realistic_Medium,mistral-large-2411,16000,22,0.0703 |
|
realistic_Medium,mistral-large-2411,16000,26,0.0938 |
|
realistic_Medium,mistral-large-2411,16000,30,0.0394 |
|
realistic_Medium,mistral-large-2411,32000,2,0.4882 |
|
realistic_Medium,mistral-large-2411,32000,6,0.5938 |
|
realistic_Medium,mistral-large-2411,32000,10,0.2422 |
|
realistic_Medium,mistral-large-2411,32000,14,0.1406 |
|
realistic_Medium,mistral-large-2411,32000,18,0.0783 |
|
realistic_Medium,mistral-large-2411,32000,22,0.0583 |
|
realistic_Medium,mistral-large-2411,32000,26,0.0078 |
|
realistic_Medium,mistral-large-2411,32000,30,0.0078 |
|
realistic_Hard,qwen-2.5-72b-instruct,0,2,0.9 |
|
realistic_Hard,qwen-2.5-72b-instruct,0,3,0.935 |
|
realistic_Hard,qwen-2.5-72b-instruct,0,4,0.91 |
|
realistic_Hard,qwen-2.5-72b-instruct,0,5,0.885 |
|
realistic_Hard,qwen-2.5-72b-instruct,0,6,0.885 |
|
realistic_Hard,qwen-2.5-72b-instruct,0,7,0.795 |
|
realistic_Hard,qwen-2.5-72b-instruct,0,8,0.88 |
|
realistic_Hard,qwen-2.5-72b-instruct,0,9,0.845 |
|
realistic_Hard,qwen-2.5-72b-instruct,0,10,0.805 |
|
realistic_Hard,qwen-2.5-72b-instruct,0,11,0.72 |
|
realistic_Hard,qwen-2.5-72b-instruct,0,12,0.63 |
|
realistic_Hard,qwen-2.5-72b-instruct,0,13,0.695 |
|
realistic_Hard,qwen-2.5-72b-instruct,0,14,0.735 |
|
realistic_Hard,qwen-2.5-72b-instruct,0,15,0.67 |
|
realistic_Hard,qwen-2.5-72b-instruct,0,16,0.71 |
|
realistic_Hard,qwen-2.5-72b-instruct,0,17,0.58 |
|
realistic_Hard,qwen-2.5-72b-instruct,0,18,0.66 |
|
realistic_Hard,qwen-2.5-72b-instruct,0,19,0.54 |
|
realistic_Hard,qwen-2.5-72b-instruct,0,20,0.58 |
|
realistic_Hard,qwen-2.5-72b-instruct,0,21,0.465 |
|
realistic_Hard,qwen-2.5-72b-instruct,0,22,0.415 |
|
realistic_Hard,qwen-2.5-72b-instruct,0,23,0.46 |
|
realistic_Hard,qwen-2.5-72b-instruct,0,24,0.37 |
|
realistic_Hard,qwen-2.5-72b-instruct,0,25,0.395 |
|
realistic_Hard,qwen-2.5-72b-instruct,0,26,0.31 |
|
realistic_Hard,qwen-2.5-72b-instruct,0,27,0.275 |
|
realistic_Hard,qwen-2.5-72b-instruct,0,28,0.315 |
|
realistic_Hard,qwen-2.5-72b-instruct,0,29,0.41 |
|
realistic_Hard,qwen-2.5-72b-instruct,0,30,0.29 |
|
realistic_Hard,qwen-2.5-72b-instruct,0,35,0.16875 |
|
realistic_Hard,qwen-2.5-72b-instruct,0,40,0.06875 |
|
realistic_Hard,qwen-2.5-72b-instruct,0,45,0.09375 |
|
realistic_Hard,qwen-2.5-72b-instruct,0,50,0.05 |
|
realistic_Hard,qwen-2.5-72b-instruct,0,55,0.025 |
|
realistic_Hard,qwen-2.5-72b-instruct,8000,2,0.765 |
|
realistic_Hard,qwen-2.5-72b-instruct,8000,4,0.85 |
|
realistic_Hard,qwen-2.5-72b-instruct,8000,6,0.78 |
|
realistic_Hard,qwen-2.5-72b-instruct,8000,8,0.665 |
|
realistic_Hard,qwen-2.5-72b-instruct,8000,10,0.595 |
|
realistic_Hard,qwen-2.5-72b-instruct,8000,12,0.56 |
|
realistic_Hard,qwen-2.5-72b-instruct,8000,14,0.44 |
|
realistic_Hard,qwen-2.5-72b-instruct,8000,16,0.415 |
|
realistic_Hard,qwen-2.5-72b-instruct,8000,18,0.37 |
|
realistic_Hard,qwen-2.5-72b-instruct,8000,20,0.35 |
|
realistic_Hard,qwen-2.5-72b-instruct,8000,22,0.34 |
|
realistic_Hard,qwen-2.5-72b-instruct,8000,24,0.335 |
|
realistic_Hard,qwen-2.5-72b-instruct,8000,26,0.31 |
|
realistic_Hard,qwen-2.5-72b-instruct,8000,28,0.295 |
|
realistic_Hard,qwen-2.5-72b-instruct,8000,30,0.27 |
|
realistic_Hard,qwen-2.5-72b-instruct,16000,2,0.7344 |
|
realistic_Hard,qwen-2.5-72b-instruct,16000,4,0.7656 |
|
realistic_Hard,qwen-2.5-72b-instruct,16000,6,0.7266 |
|
realistic_Hard,qwen-2.5-72b-instruct,16000,8,0.625 |
|
realistic_Hard,qwen-2.5-72b-instruct,16000,10,0.5391 |
|
realistic_Hard,qwen-2.5-72b-instruct,16000,12,0.4141 |
|
realistic_Hard,qwen-2.5-72b-instruct,16000,14,0.2812 |
|
realistic_Hard,qwen-2.5-72b-instruct,16000,16,0.1719 |
|
realistic_Hard,qwen-2.5-72b-instruct,16000,18,0.125 |
|
realistic_Hard,qwen-2.5-72b-instruct,16000,20,0.1562 |
|
realistic_Hard,qwen-2.5-72b-instruct,16000,22,0.0781 |
|
realistic_Hard,qwen-2.5-72b-instruct,16000,24,0.0781 |
|
realistic_Hard,qwen-2.5-72b-instruct,16000,26,0.0547 |
|
realistic_Hard,qwen-2.5-72b-instruct,16000,28,0.0234 |
|
realistic_Hard,qwen-2.5-72b-instruct,16000,30,0.0781 |
|
realistic_Hard,qwen-2.5-72b-instruct,32000,2,0.7188 |
|
realistic_Hard,qwen-2.5-72b-instruct,32000,4,0.7578 |
|
realistic_Hard,qwen-2.5-72b-instruct,32000,6,0.6562 |
|
realistic_Hard,qwen-2.5-72b-instruct,32000,8,0.4844 |
|
realistic_Hard,qwen-2.5-72b-instruct,32000,10,0.3594 |
|
realistic_Hard,qwen-2.5-72b-instruct,32000,12,0.2422 |
|
realistic_Hard,qwen-2.5-72b-instruct,32000,14,0.1875 |
|
realistic_Hard,qwen-2.5-72b-instruct,32000,16,0.1328 |
|
realistic_Hard,qwen-2.5-72b-instruct,32000,18,0.1172 |
|
realistic_Hard,qwen-2.5-72b-instruct,32000,20,0.0625 |
|
realistic_Hard,qwen-2.5-72b-instruct,32000,22,0.0312 |
|
realistic_Hard,qwen-2.5-72b-instruct,32000,24,0.0703 |
|
realistic_Hard,qwen-2.5-72b-instruct,32000,26,0.0703 |
|
realistic_Hard,qwen-2.5-72b-instruct,32000,28,0.0625 |
|
realistic_Hard,qwen-2.5-72b-instruct,32000,30,0.0312 |
|
realistic_Hard,deepseek-r1,0,10,0.9453 |
|
realistic_Hard,deepseek-r1,0,20,0.9375 |
|
realistic_Hard,deepseek-r1,0,30,0.9453 |
|
realistic_Hard,deepseek-r1,0,40,0.9231 |
|
realistic_Hard,deepseek-r1,0,50,0.8846 |
|
realistic_Hard,deepseek-r1,0,60,0.8269 |
|
realistic_Hard,deepseek-r1,0,70,0.75 |
|
realistic_Hard,deepseek-r1,0,80,0.7404 |
|
realistic_Hard,deepseek-r1,0,90,0.6731 |
|
realistic_Hard,deepseek-r1,0,100,0.493 |
|
realistic_Hard,deepseek-r1,0,110,0.5256 |
|
realistic_Hard,deepseek-r1,0,120,0.2658 |
|
realistic_Hard,deepseek-r1,0,130,0.2717 |
|
symbolic,claude-3.5-sonnet,0,1,0.98 |
|
symbolic,claude-3.5-sonnet,0,3,0.99 |
|
symbolic,claude-3.5-sonnet,0,5,1.0 |
|
symbolic,claude-3.5-sonnet,0,7,0.99 |
|
symbolic,claude-3.5-sonnet,0,9,0.98 |
|
symbolic,claude-3.5-sonnet,0,11,0.98 |
|
symbolic,claude-3.5-sonnet,0,13,0.86 |
|
symbolic,claude-3.5-sonnet,0,15,0.77 |
|
symbolic,claude-3.5-sonnet,0,17,0.72 |
|
symbolic,claude-3.5-sonnet,0,19,0.64 |
|
symbolic,claude-3.5-sonnet,0,21,0.44 |
|
symbolic,claude-3.5-sonnet,0,23,0.39 |
|
symbolic,claude-3.5-sonnet,0,25,0.31 |
|
symbolic,claude-3.5-sonnet,0,27,0.27 |
|
symbolic,claude-3.5-sonnet,0,29,0.22 |
|
symbolic,claude-3.5-sonnet,0,31,0.12 |
|
symbolic,claude-3.5-sonnet,0,33,0.17 |
|
symbolic,claude-3.5-sonnet,0,35,0.04 |
|
symbolic,claude-3.5-sonnet,0,37,0.07 |
|
symbolic,claude-3.5-sonnet,0,39,0.08 |
|
symbolic,claude-3.5-sonnet,0,41,0.08 |
|
symbolic,claude-3.5-sonnet,0,43,0.03 |
|
symbolic,claude-3.5-sonnet,0,45,0.04 |
|
symbolic,claude-3.5-sonnet,0,47,0.05 |
|
symbolic,claude-3.5-sonnet,0,49,0.02 |
|
symbolic,claude-3.5-sonnet,0,51,0.02 |
|
symbolic,claude-3.5-sonnet,0,53,0.01 |
|
symbolic,claude-3.5-sonnet,0,55,0.0 |
|
symbolic,claude-3.5-sonnet,0,57,0.02 |
|
symbolic,claude-3.5-sonnet,0,59,0.01 |
|
symbolic,claude-3.5-sonnet-0620,0,1,1.0 |
|
symbolic,claude-3.5-sonnet-0620,0,3,1.0 |
|
symbolic,claude-3.5-sonnet-0620,0,5,1.0 |
|
symbolic,claude-3.5-sonnet-0620,0,7,1.0 |
|
symbolic,claude-3.5-sonnet-0620,0,9,0.97 |
|
symbolic,claude-3.5-sonnet-0620,0,11,0.93 |
|
symbolic,claude-3.5-sonnet-0620,0,13,0.91 |
|
symbolic,claude-3.5-sonnet-0620,0,15,0.88 |
|
symbolic,claude-3.5-sonnet-0620,0,17,0.73 |
|
symbolic,claude-3.5-sonnet-0620,0,19,0.64 |
|
symbolic,claude-3.5-sonnet-0620,0,21,0.57 |
|
symbolic,claude-3.5-sonnet-0620,0,23,0.5 |
|
symbolic,claude-3.5-sonnet-0620,0,25,0.44 |
|
symbolic,claude-3.5-sonnet-0620,0,27,0.4 |
|
symbolic,claude-3.5-sonnet-0620,0,29,0.39 |
|
symbolic,claude-3.5-sonnet-0620,0,31,0.28 |
|
symbolic,claude-3.5-sonnet-0620,0,33,0.3 |
|
symbolic,claude-3.5-sonnet-0620,0,35,0.15 |
|
symbolic,claude-3.5-sonnet-0620,0,37,0.17 |
|
symbolic,claude-3.5-sonnet-0620,0,39,0.11 |
|
symbolic,claude-3.5-sonnet-0620,0,41,0.16 |
|
symbolic,claude-3.5-sonnet-0620,0,43,0.1 |
|
symbolic,claude-3.5-sonnet-0620,0,45,0.07 |
|
symbolic,claude-3.5-sonnet-0620,0,47,0.07 |
|
symbolic,claude-3.5-sonnet-0620,0,49,0.1 |
|
symbolic,claude-3.5-sonnet-0620,0,51,0.03 |
|
symbolic,claude-3.5-sonnet-0620,0,53,0.04 |
|
symbolic,claude-3.5-sonnet-0620,0,55,0.02 |
|
symbolic,claude-3.5-sonnet-0620,0,57,0.04 |
|
symbolic,claude-3.5-sonnet-0620,0,59,0.02 |
|
realistic_Medium,gpt-4o-mini,0,2,0.762 |
|
realistic_Medium,gpt-4o-mini,0,4,0.9194 |
|
realistic_Medium,gpt-4o-mini,0,6,0.9113 |
|
realistic_Medium,gpt-4o-mini,0,10,0.7863 |
|
realistic_Medium,gpt-4o-mini,0,12,0.6008 |
|
realistic_Medium,gpt-4o-mini,0,12,0.5444 |
|
realistic_Medium,gpt-4o-mini,0,14,0.5444 |
|
realistic_Medium,gpt-4o-mini,0,16,0.4234 |
|
realistic_Medium,gpt-4o-mini,0,18,0.371 |
|
realistic_Medium,gpt-4o-mini,0,20,0.371 |
|
realistic_Medium,gpt-4o-mini,0,20,0.3024 |
|
realistic_Medium,gpt-4o-mini,0,22,0.1492 |
|
realistic_Medium,gpt-4o-mini,0,24,0.1492 |
|
realistic_Medium,gpt-4o-mini,0,26,0.1331 |
|
realistic_Medium,gpt-4o-mini,0,28,0.0766 |
|
realistic_Medium,gpt-4o-mini,0,30,0.0806 |
|
realistic_Medium,gpt-4o-mini,0,35,0.0769231 |
|
realistic_Medium,gpt-4o-mini,0,40,0.0769231 |
|
realistic_Medium,gpt-4o-mini,8000,2,0.4531 |
|
realistic_Medium,gpt-4o-mini,8000,4,0.7969 |
|
realistic_Medium,gpt-4o-mini,8000,6,0.7266 |
|
realistic_Medium,gpt-4o-mini,8000,8,0.4844 |
|
realistic_Medium,gpt-4o-mini,8000,10,0.25 |
|
realistic_Medium,gpt-4o-mini,8000,12,0.2109 |
|
realistic_Medium,gpt-4o-mini,8000,14,0.1094 |
|
realistic_Medium,gpt-4o-mini,8000,16,0.0781 |
|
realistic_Medium,gpt-4o-mini,8000,18,0.0547 |
|
realistic_Medium,gpt-4o-mini,8000,20,0.0156 |
|
realistic_Medium,gpt-4o-mini,8000,22,0.0469 |
|
realistic_Medium,gpt-4o-mini,8000,24,0.0391 |
|
realistic_Medium,gpt-4o-mini,8000,26,0.0938 |
|
realistic_Medium,gpt-4o-mini,8000,28,0.0234 |
|
realistic_Medium,gpt-4o-mini,8000,30,0.0234 |
|
realistic_Medium,gpt-4o-mini,16000,2,0.5078 |
|
realistic_Medium,gpt-4o-mini,16000,4,0.7656 |
|
realistic_Medium,gpt-4o-mini,16000,6,0.6719 |
|
realistic_Medium,gpt-4o-mini,16000,8,0.4141 |
|
realistic_Medium,gpt-4o-mini,16000,10,0.1484 |
|
realistic_Medium,gpt-4o-mini,16000,12,0.0859 |
|
realistic_Medium,gpt-4o-mini,16000,14,0.125 |
|
realistic_Medium,gpt-4o-mini,16000,16,0.0391 |
|
realistic_Medium,gpt-4o-mini,16000,18,0.0234 |
|
realistic_Medium,gpt-4o-mini,16000,20,0.0312 |
|
realistic_Medium,gpt-4o-mini,16000,22,0.0391 |
|
realistic_Medium,gpt-4o-mini,16000,24,0.0625 |
|
realistic_Medium,gpt-4o-mini,16000,26,0.0703 |
|
realistic_Medium,gpt-4o-mini,16000,28,0.0469 |
|
realistic_Medium,gpt-4o-mini,16000,30,0.0469 |
|
realistic_Medium,gpt-4o-mini,32000,2,0.4531 |
|
realistic_Medium,gpt-4o-mini,32000,4,0.6719 |
|
realistic_Medium,gpt-4o-mini,32000,6,0.5469 |
|
realistic_Medium,gpt-4o-mini,32000,8,0.3125 |
|
realistic_Medium,gpt-4o-mini,32000,10,0.0703 |
|
realistic_Medium,gpt-4o-mini,32000,12,0.1094 |
|
realistic_Medium,gpt-4o-mini,32000,14,0.0234 |
|
realistic_Medium,gpt-4o-mini,32000,16,0.0234 |
|
realistic_Medium,gpt-4o-mini,32000,18,0.0547 |
|
realistic_Medium,gpt-4o-mini,32000,20,0.0312 |
|
realistic_Medium,gpt-4o-mini,32000,22,0.0469 |
|
realistic_Medium,gpt-4o-mini,32000,24,0.0391 |
|
realistic_Medium,gpt-4o-mini,32000,26,0.0625 |
|
realistic_Medium,gpt-4o-mini,32000,28,0.0391 |
|
realistic_Medium,gpt-4o-mini,32000,30,0.0312 |
|
realistic_Medium,qwen-2.5-72b-instruct,0,2,0.97 |
|
realistic_Medium,qwen-2.5-72b-instruct,0,3,0.88 |
|
realistic_Medium,qwen-2.5-72b-instruct,0,4,0.98 |
|
realistic_Medium,qwen-2.5-72b-instruct,0,5,0.95 |
|
realistic_Medium,qwen-2.5-72b-instruct,0,6,0.97 |
|
realistic_Medium,qwen-2.5-72b-instruct,0,7,0.975 |
|
realistic_Medium,qwen-2.5-72b-instruct,0,8,0.9 |
|
realistic_Medium,qwen-2.5-72b-instruct,0,9,0.975 |
|
realistic_Medium,qwen-2.5-72b-instruct,0,10,0.815 |
|
realistic_Medium,qwen-2.5-72b-instruct,0,11,0.855 |
|
realistic_Medium,qwen-2.5-72b-instruct,0,12,0.705 |
|
realistic_Medium,qwen-2.5-72b-instruct,0,13,0.74 |
|
realistic_Medium,qwen-2.5-72b-instruct,0,14,0.73 |
|
realistic_Medium,qwen-2.5-72b-instruct,0,15,0.755 |
|
realistic_Medium,qwen-2.5-72b-instruct,0,16,0.61 |
|
realistic_Medium,qwen-2.5-72b-instruct,0,17,0.67 |
|
realistic_Medium,qwen-2.5-72b-instruct,0,18,0.66 |
|
realistic_Medium,qwen-2.5-72b-instruct,0,19,0.615 |
|
realistic_Medium,qwen-2.5-72b-instruct,0,20,0.64 |
|
realistic_Medium,qwen-2.5-72b-instruct,0,21,0.595 |
|
realistic_Medium,qwen-2.5-72b-instruct,0,22,0.61 |
|
realistic_Medium,qwen-2.5-72b-instruct,0,23,0.515 |
|
realistic_Medium,qwen-2.5-72b-instruct,0,24,0.595 |
|
realistic_Medium,qwen-2.5-72b-instruct,0,25,0.485 |
|
realistic_Medium,qwen-2.5-72b-instruct,0,26,0.45 |
|
realistic_Medium,qwen-2.5-72b-instruct,0,27,0.435 |
|
realistic_Medium,qwen-2.5-72b-instruct,0,28,0.44 |
|
realistic_Medium,qwen-2.5-72b-instruct,0,29,0.385 |
|
realistic_Medium,qwen-2.5-72b-instruct,0,30,0.365 |
|
realistic_Medium,qwen-2.5-72b-instruct,0,35,0.30625 |
|
realistic_Medium,qwen-2.5-72b-instruct,0,40,0.19375 |
|
realistic_Medium,qwen-2.5-72b-instruct,0,45,0.18125 |
|
realistic_Medium,qwen-2.5-72b-instruct,0,50,0.125 |
|
realistic_Medium,qwen-2.5-72b-instruct,0,55,0.06875 |
|
realistic_Medium,qwen-2.5-72b-instruct,0,60,0.03125 |
|
realistic_Medium,qwen-2.5-72b-instruct,8000,2,0.825 |
|
realistic_Medium,qwen-2.5-72b-instruct,8000,4,0.91 |
|
realistic_Medium,qwen-2.5-72b-instruct,8000,6,0.835 |
|
realistic_Medium,qwen-2.5-72b-instruct,8000,8,0.745 |
|
realistic_Medium,qwen-2.5-72b-instruct,8000,10,0.6 |
|
realistic_Medium,qwen-2.5-72b-instruct,8000,12,0.555 |
|
realistic_Medium,qwen-2.5-72b-instruct,8000,14,0.48 |
|
realistic_Medium,qwen-2.5-72b-instruct,8000,16,0.46 |
|
realistic_Medium,qwen-2.5-72b-instruct,8000,18,0.34 |
|
realistic_Medium,qwen-2.5-72b-instruct,8000,20,0.325 |
|
realistic_Medium,qwen-2.5-72b-instruct,8000,22,0.23 |
|
realistic_Medium,qwen-2.5-72b-instruct,8000,24,0.2 |
|
realistic_Medium,qwen-2.5-72b-instruct,8000,26,0.155 |
|
realistic_Medium,qwen-2.5-72b-instruct,8000,28,0.185 |
|
realistic_Medium,qwen-2.5-72b-instruct,8000,30,0.115 |
|
realistic_Medium,qwen-2.5-72b-instruct,16000,2,0.7188 |
|
realistic_Medium,qwen-2.5-72b-instruct,16000,4,0.8828 |
|
realistic_Medium,qwen-2.5-72b-instruct,16000,6,0.8359 |
|
realistic_Medium,qwen-2.5-72b-instruct,16000,8,0.625 |
|
realistic_Medium,qwen-2.5-72b-instruct,16000,10,0.5781 |
|
realistic_Medium,qwen-2.5-72b-instruct,16000,12,0.4375 |
|
realistic_Medium,qwen-2.5-72b-instruct,16000,14,0.4062 |
|
realistic_Medium,qwen-2.5-72b-instruct,16000,16,0.3984 |
|
realistic_Medium,qwen-2.5-72b-instruct,16000,18,0.2344 |
|
realistic_Medium,qwen-2.5-72b-instruct,16000,20,0.1719 |
|
realistic_Medium,qwen-2.5-72b-instruct,16000,22,0.1562 |
|
realistic_Medium,qwen-2.5-72b-instruct,16000,24,0.0625 |
|
realistic_Medium,qwen-2.5-72b-instruct,16000,26,0.0625 |
|
realistic_Medium,qwen-2.5-72b-instruct,16000,28,0.0859 |
|
realistic_Medium,qwen-2.5-72b-instruct,16000,30,0.0469 |
|
realistic_Medium,qwen-2.5-72b-instruct,32000,2,0.7344 |
|
realistic_Medium,qwen-2.5-72b-instruct,32000,4,0.8047 |
|
realistic_Medium,qwen-2.5-72b-instruct,32000,6,0.6875 |
|
realistic_Medium,qwen-2.5-72b-instruct,32000,8,0.5547 |
|
realistic_Medium,qwen-2.5-72b-instruct,32000,10,0.5625 |
|
realistic_Medium,qwen-2.5-72b-instruct,32000,12,0.4219 |
|
realistic_Medium,qwen-2.5-72b-instruct,32000,14,0.3203 |
|
realistic_Medium,qwen-2.5-72b-instruct,32000,16,0.2422 |
|
realistic_Medium,qwen-2.5-72b-instruct,32000,18,0.1719 |
|
realistic_Medium,qwen-2.5-72b-instruct,32000,20,0.1562 |
|
realistic_Medium,qwen-2.5-72b-instruct,32000,22,0.0781 |
|
realistic_Medium,qwen-2.5-72b-instruct,32000,24,0.0703 |
|
realistic_Medium,qwen-2.5-72b-instruct,32000,26,0.0469 |
|
realistic_Medium,qwen-2.5-72b-instruct,32000,28,0.0469 |
|
realistic_Medium,qwen-2.5-72b-instruct,32000,30,0.0625 |
|
symbolic,deepseek-v3,0,1,1.0 |
|
symbolic,deepseek-v3,0,6,0.97 |
|
symbolic,deepseek-v3,0,11,0.97 |
|
symbolic,deepseek-v3,0,16,0.88 |
|
symbolic,deepseek-v3,0,21,0.82 |
|
symbolic,deepseek-v3,0,26,0.7 |
|
symbolic,deepseek-v3,0,31,0.62 |
|
symbolic,deepseek-v3,0,36,0.5 |
|
symbolic,deepseek-v3,0,41,0.46 |
|
symbolic,deepseek-v3,0,46,0.44 |
|
symbolic,deepseek-v3,0,51,0.23 |
|
symbolic,deepseek-v3,0,56,0.25 |
|
symbolic,deepseek-v3,0,61,0.23 |
|
symbolic,deepseek-v3,0,66,0.13 |
|
symbolic,deepseek-v3,0,71,0.23 |
|
symbolic,deepseek-v3,0,76,0.13 |
|
symbolic,deepseek-v3,0,81,0.17 |
|
symbolic,deepseek-v3,0,86,0.11 |
|
symbolic,deepseek-v3,0,91,0.06 |
|
symbolic,deepseek-v3,0,96,0.11 |
|
symbolic,deepseek-v3,0,101,0.03 |
|
symbolic,deepseek-v3,0,106,0.05 |
|
symbolic,deepseek-v3,0,111,0.02 |
|
symbolic,deepseek-v3,0,116,0.04 |
|
symbolic,deepseek-v3,8000,1,0.98 |
|
symbolic,deepseek-v3,8000,2,0.83 |
|
symbolic,deepseek-v3,8000,3,0.66 |
|
symbolic,deepseek-v3,8000,4,0.59 |
|
symbolic,deepseek-v3,8000,5,0.43 |
|
symbolic,deepseek-v3,8000,6,0.34 |
|
symbolic,deepseek-v3,8000,7,0.35 |
|
symbolic,deepseek-v3,8000,8,0.28 |
|
symbolic,deepseek-v3,8000,9,0.15 |
|
symbolic,deepseek-v3,8000,10,0.17 |
|
symbolic,deepseek-v3,8000,11,0.11 |
|
symbolic,deepseek-v3,8000,12,0.04 |
|
symbolic,deepseek-v3,8000,13,0.18 |
|
symbolic,deepseek-v3,8000,14,0.09 |
|
symbolic,deepseek-v3,8000,15,0.05 |
|
symbolic,deepseek-v3,8000,16,0.03 |
|
symbolic,deepseek-v3,8000,17,0.08 |
|
symbolic,deepseek-v3,8000,18,0.03 |
|
symbolic,deepseek-v3,8000,19,0.02 |
|
symbolic,deepseek-v3,16000,1,0.98 |
|
symbolic,deepseek-v3,16000,2,0.42 |
|
symbolic,deepseek-v3,16000,3,0.4 |
|
symbolic,deepseek-v3,16000,4,0.22 |
|
symbolic,deepseek-v3,16000,5,0.16 |
|
symbolic,deepseek-v3,16000,6,0.06 |
|
symbolic,deepseek-v3,16000,7,0.06 |
|
symbolic,deepseek-v3,16000,8,0.0 |
|
symbolic,deepseek-v3,16000,9,0.1 |
|
symbolic,deepseek-v3,16000,10,0.06 |
|
symbolic,deepseek-v3,16000,11,0.02 |
|
symbolic,deepseek-v3,16000,12,0.0 |
|
symbolic,deepseek-v3,16000,13,0.0 |
|
symbolic,deepseek-v3,16000,14,0.02 |
|
symbolic,deepseek-v3,32000,1,0.8 |
|
symbolic,deepseek-v3,32000,2,0.2 |
|
symbolic,deepseek-v3,32000,3,0.24 |
|
symbolic,deepseek-v3,32000,4,0.2 |
|
realistic_Hard,llama-3.1-8b-instruct,0,2,0.53 |
|
realistic_Hard,llama-3.1-8b-instruct,0,3,0.6 |
|
realistic_Hard,llama-3.1-8b-instruct,0,4,0.57 |
|
realistic_Hard,llama-3.1-8b-instruct,0,5,0.52 |
|
realistic_Hard,llama-3.1-8b-instruct,0,6,0.485 |
|
realistic_Hard,llama-3.1-8b-instruct,0,7,0.415 |
|
realistic_Hard,llama-3.1-8b-instruct,0,8,0.38 |
|
realistic_Hard,llama-3.1-8b-instruct,0,9,0.325 |
|
realistic_Hard,llama-3.1-8b-instruct,0,10,0.395 |
|
realistic_Hard,llama-3.1-8b-instruct,0,11,0.25 |
|
realistic_Hard,llama-3.1-8b-instruct,0,12,0.28 |
|
realistic_Hard,llama-3.1-8b-instruct,0,13,0.24 |
|
realistic_Hard,llama-3.1-8b-instruct,0,14,0.21 |
|
realistic_Hard,llama-3.1-8b-instruct,0,15,0.19 |
|
realistic_Hard,llama-3.1-8b-instruct,0,16,0.165 |
|
realistic_Hard,llama-3.1-8b-instruct,0,17,0.095 |
|
realistic_Hard,llama-3.1-8b-instruct,0,18,0.1 |
|
realistic_Hard,llama-3.1-8b-instruct,0,19,0.085 |
|
realistic_Hard,llama-3.1-8b-instruct,0,20,0.07 |
|
realistic_Hard,llama-3.1-8b-instruct,0,21,0.055 |
|
realistic_Hard,llama-3.1-8b-instruct,0,22,0.06 |
|
realistic_Hard,llama-3.1-8b-instruct,0,23,0.06 |
|
realistic_Hard,llama-3.1-8b-instruct,0,24,0.065 |
|
realistic_Hard,llama-3.1-8b-instruct,0,25,0.06 |
|
realistic_Hard,llama-3.1-8b-instruct,0,26,0.015 |
|
realistic_Hard,llama-3.1-8b-instruct,0,27,0.025 |
|
realistic_Hard,llama-3.1-8b-instruct,0,28,0.045 |
|
realistic_Hard,llama-3.1-8b-instruct,0,29,0.03 |
|
realistic_Hard,llama-3.1-8b-instruct,0,30,0.02 |
|
realistic_Hard,llama-3.1-8b-instruct,8000,2,0.465 |
|
realistic_Hard,llama-3.1-8b-instruct,8000,4,0.285 |
|
realistic_Hard,llama-3.1-8b-instruct,8000,6,0.265 |
|
realistic_Hard,llama-3.1-8b-instruct,8000,8,0.165 |
|
realistic_Hard,llama-3.1-8b-instruct,8000,10,0.155 |
|
realistic_Hard,llama-3.1-8b-instruct,8000,12,0.105 |
|
realistic_Hard,llama-3.1-8b-instruct,8000,14,0.08 |
|
realistic_Hard,llama-3.1-8b-instruct,8000,16,0.06 |
|
realistic_Hard,llama-3.1-8b-instruct,8000,18,0.03 |
|
realistic_Hard,llama-3.1-8b-instruct,8000,20,0.055 |
|
realistic_Hard,llama-3.1-8b-instruct,8000,22,0.04 |
|
realistic_Hard,llama-3.1-8b-instruct,8000,24,0.025 |
|
realistic_Hard,llama-3.1-8b-instruct,8000,26,0.015 |
|
realistic_Hard,llama-3.1-8b-instruct,8000,28,0.025 |
|
realistic_Hard,llama-3.1-8b-instruct,8000,30,0.01 |
|
realistic_Hard,llama-3.1-8b-instruct,16000,2,0.38 |
|
realistic_Hard,llama-3.1-8b-instruct,16000,4,0.135 |
|
realistic_Hard,llama-3.1-8b-instruct,16000,6,0.135 |
|
realistic_Hard,llama-3.1-8b-instruct,16000,8,0.145 |
|
realistic_Hard,llama-3.1-8b-instruct,16000,10,0.095 |
|
realistic_Hard,llama-3.1-8b-instruct,16000,12,0.06 |
|
realistic_Hard,llama-3.1-8b-instruct,16000,14,0.05 |
|
realistic_Hard,llama-3.1-8b-instruct,16000,16,0.045 |
|
realistic_Hard,llama-3.1-8b-instruct,16000,18,0.045 |
|
realistic_Hard,llama-3.1-8b-instruct,16000,20,0.035 |
|
realistic_Hard,llama-3.1-8b-instruct,16000,22,0.015 |
|
realistic_Hard,llama-3.1-8b-instruct,16000,24,0.015 |
|
realistic_Hard,llama-3.1-8b-instruct,16000,26,0.035 |
|
realistic_Hard,llama-3.1-8b-instruct,16000,28,0.03 |
|
realistic_Hard,llama-3.1-8b-instruct,16000,30,0.01 |
|
realistic_Hard,llama-3.1-8b-instruct,32000,2,0.31 |
|
realistic_Hard,llama-3.1-8b-instruct,32000,4,0.17 |
|
realistic_Hard,llama-3.1-8b-instruct,32000,6,0.11 |
|
realistic_Hard,llama-3.1-8b-instruct,32000,8,0.05 |
|
realistic_Hard,llama-3.1-8b-instruct,32000,10,0.055 |
|
realistic_Hard,llama-3.1-8b-instruct,32000,12,0.035 |
|
realistic_Hard,llama-3.1-8b-instruct,32000,14,0.045 |
|
realistic_Hard,llama-3.1-8b-instruct,32000,16,0.025 |
|
realistic_Hard,llama-3.1-8b-instruct,32000,18,0.02 |
|
realistic_Hard,llama-3.1-8b-instruct,32000,20,0.035 |
|
realistic_Hard,llama-3.1-8b-instruct,32000,22,0.015 |
|
realistic_Hard,llama-3.1-8b-instruct,32000,24,0.02 |
|
realistic_Hard,llama-3.1-8b-instruct,32000,26,0.0242 |
|
realistic_Hard,llama-3.1-8b-instruct,32000,28,0.025 |
|
realistic_Hard,llama-3.1-8b-instruct,32000,30,0.01 |
|
realistic_Hard,llama-3.1-405b-instruct,0,2,0.765 |
|
realistic_Hard,llama-3.1-405b-instruct,0,3,0.805 |
|
realistic_Hard,llama-3.1-405b-instruct,0,4,0.755 |
|
realistic_Hard,llama-3.1-405b-instruct,0,5,0.79 |
|
realistic_Hard,llama-3.1-405b-instruct,0,6,0.85 |
|
realistic_Hard,llama-3.1-405b-instruct,0,7,0.655 |
|
realistic_Hard,llama-3.1-405b-instruct,0,8,0.74 |
|
realistic_Hard,llama-3.1-405b-instruct,0,9,0.71 |
|
realistic_Hard,llama-3.1-405b-instruct,0,10,0.585 |
|
realistic_Hard,llama-3.1-405b-instruct,0,11,0.475 |
|
realistic_Hard,llama-3.1-405b-instruct,0,12,0.455 |
|
realistic_Hard,llama-3.1-405b-instruct,0,13,0.445 |
|
realistic_Hard,llama-3.1-405b-instruct,0,14,0.405 |
|
realistic_Hard,llama-3.1-405b-instruct,0,15,0.395 |
|
realistic_Hard,llama-3.1-405b-instruct,0,16,0.32 |
|
realistic_Hard,llama-3.1-405b-instruct,0,17,0.22 |
|
realistic_Hard,llama-3.1-405b-instruct,0,18,0.195 |
|
realistic_Hard,llama-3.1-405b-instruct,0,19,0.125 |
|
realistic_Hard,llama-3.1-405b-instruct,0,20,0.04 |
|
realistic_Hard,llama-3.1-405b-instruct,0,21,0.075 |
|
realistic_Hard,llama-3.1-405b-instruct,0,22,0.03 |
|
realistic_Hard,llama-3.1-405b-instruct,0,23,0.005 |
|
realistic_Hard,llama-3.1-405b-instruct,0,24,0.01 |
|
realistic_Hard,llama-3.1-405b-instruct,0,25,0.005 |
|
realistic_Hard,llama-3.1-405b-instruct,0,26,0.01 |
|
realistic_Hard,llama-3.1-405b-instruct,0,27,0.01 |
|
realistic_Hard,llama-3.1-405b-instruct,0,28,0.0 |
|
realistic_Hard,llama-3.1-405b-instruct,0,29,0.005 |
|
realistic_Hard,llama-3.1-405b-instruct,0,30,0.005 |
|
realistic_Hard,qwen-2.5-7b-instruct,0,2,0.655 |
|
realistic_Hard,qwen-2.5-7b-instruct,0,3,0.7 |
|
realistic_Hard,qwen-2.5-7b-instruct,0,4,0.65 |
|
realistic_Hard,qwen-2.5-7b-instruct,0,5,0.585 |
|
realistic_Hard,qwen-2.5-7b-instruct,0,6,0.585 |
|
realistic_Hard,qwen-2.5-7b-instruct,0,7,0.465 |
|
realistic_Hard,qwen-2.5-7b-instruct,0,8,0.39 |
|
realistic_Hard,qwen-2.5-7b-instruct,0,9,0.35 |
|
realistic_Hard,qwen-2.5-7b-instruct,0,10,0.285 |
|
realistic_Hard,qwen-2.5-7b-instruct,0,11,0.195 |
|
realistic_Hard,qwen-2.5-7b-instruct,0,12,0.255 |
|
realistic_Hard,qwen-2.5-7b-instruct,0,13,0.21 |
|
realistic_Hard,qwen-2.5-7b-instruct,0,14,0.21 |
|
realistic_Hard,qwen-2.5-7b-instruct,0,15,0.18 |
|
realistic_Hard,qwen-2.5-7b-instruct,0,16,0.11 |
|
realistic_Hard,qwen-2.5-7b-instruct,0,17,0.1 |
|
realistic_Hard,qwen-2.5-7b-instruct,0,18,0.115 |
|
realistic_Hard,qwen-2.5-7b-instruct,0,19,0.03 |
|
realistic_Hard,qwen-2.5-7b-instruct,0,20,0.05 |
|
realistic_Hard,qwen-2.5-7b-instruct,0,21,0.02 |
|
realistic_Hard,qwen-2.5-7b-instruct,0,22,0.055 |
|
realistic_Hard,qwen-2.5-7b-instruct,0,23,0.045 |
|
realistic_Hard,qwen-2.5-7b-instruct,0,24,0.045 |
|
realistic_Hard,qwen-2.5-7b-instruct,0,25,0.025 |
|
realistic_Hard,qwen-2.5-7b-instruct,0,26,0.045 |
|
realistic_Hard,qwen-2.5-7b-instruct,0,27,0.04 |
|
realistic_Hard,qwen-2.5-7b-instruct,0,28,0.035 |
|
realistic_Hard,qwen-2.5-7b-instruct,0,29,0.05 |
|
realistic_Hard,qwen-2.5-7b-instruct,0,30,0.065 |
|
realistic_Hard,qwen-2.5-7b-instruct,8000,2,0.495 |
|
realistic_Hard,qwen-2.5-7b-instruct,8000,4,0.365 |
|
realistic_Hard,qwen-2.5-7b-instruct,8000,6,0.315 |
|
realistic_Hard,qwen-2.5-7b-instruct,8000,8,0.2 |
|
realistic_Hard,qwen-2.5-7b-instruct,8000,10,0.17 |
|
realistic_Hard,qwen-2.5-7b-instruct,8000,12,0.135 |
|
realistic_Hard,qwen-2.5-7b-instruct,8000,14,0.08 |
|
realistic_Hard,qwen-2.5-7b-instruct,8000,16,0.07 |
|
realistic_Hard,qwen-2.5-7b-instruct,8000,18,0.03 |
|
realistic_Hard,qwen-2.5-7b-instruct,8000,20,0.03 |
|
realistic_Hard,qwen-2.5-7b-instruct,8000,22,0.025 |
|
realistic_Hard,qwen-2.5-7b-instruct,8000,24,0.015 |
|
realistic_Hard,qwen-2.5-7b-instruct,8000,26,0.035 |
|
realistic_Hard,qwen-2.5-7b-instruct,8000,28,0.03 |
|
realistic_Hard,qwen-2.5-7b-instruct,8000,30,0.02 |
|
realistic_Hard,qwen-2.5-7b-instruct,16000,2,0.535 |
|
realistic_Hard,qwen-2.5-7b-instruct,16000,4,0.26 |
|
realistic_Hard,qwen-2.5-7b-instruct,16000,6,0.225 |
|
realistic_Hard,qwen-2.5-7b-instruct,16000,8,0.155 |
|
realistic_Hard,qwen-2.5-7b-instruct,16000,10,0.1 |
|
realistic_Hard,qwen-2.5-7b-instruct,16000,12,0.12 |
|
realistic_Hard,qwen-2.5-7b-instruct,16000,14,0.075 |
|
realistic_Hard,qwen-2.5-7b-instruct,16000,16,0.05 |
|
realistic_Hard,qwen-2.5-7b-instruct,16000,18,0.04 |
|
realistic_Hard,qwen-2.5-7b-instruct,16000,20,0.035 |
|
realistic_Hard,qwen-2.5-7b-instruct,16000,22,0.06 |
|
realistic_Hard,qwen-2.5-7b-instruct,16000,24,0.06 |
|
realistic_Hard,qwen-2.5-7b-instruct,16000,26,0.05 |
|
realistic_Hard,qwen-2.5-7b-instruct,16000,28,0.035 |
|
realistic_Hard,qwen-2.5-7b-instruct,16000,30,0.06 |
|
realistic_Hard,qwen-2.5-7b-instruct,32000,2,0.39 |
|
realistic_Hard,qwen-2.5-7b-instruct,32000,4,0.245 |
|
realistic_Hard,qwen-2.5-7b-instruct,32000,6,0.165 |
|
realistic_Hard,qwen-2.5-7b-instruct,32000,8,0.16 |
|
realistic_Hard,qwen-2.5-7b-instruct,32000,10,0.11 |
|
realistic_Hard,qwen-2.5-7b-instruct,32000,12,0.055 |
|
realistic_Hard,qwen-2.5-7b-instruct,32000,14,0.065 |
|
realistic_Hard,qwen-2.5-7b-instruct,32000,16,0.045 |
|
realistic_Hard,qwen-2.5-7b-instruct,32000,18,0.055 |
|
realistic_Hard,qwen-2.5-7b-instruct,32000,20,0.06 |
|
realistic_Hard,qwen-2.5-7b-instruct,32000,22,0.085 |
|
realistic_Hard,qwen-2.5-7b-instruct,32000,24,0.065 |
|
realistic_Hard,qwen-2.5-7b-instruct,32000,26,0.055 |
|
realistic_Hard,qwen-2.5-7b-instruct,32000,28,0.045 |
|
realistic_Hard,qwen-2.5-7b-instruct,32000,30,0.055 |
|
realistic_Medium,gpt-4o-2024-11-20,0,2,0.9375 |
|
realistic_Medium,gpt-4o-2024-11-20,0,6,0.9844 |
|
realistic_Medium,gpt-4o-2024-11-20,0,10,0.7266 |
|
realistic_Medium,gpt-4o-2024-11-20,0,14,0.7188 |
|
realistic_Medium,gpt-4o-2024-11-20,0,18,0.6719 |
|
realistic_Medium,gpt-4o-2024-11-20,0,22,0.5547 |
|
realistic_Medium,gpt-4o-2024-11-20,0,26,0.4375 |
|
realistic_Medium,gpt-4o-2024-11-20,0,30,0.3047 |
|
realistic_Medium,gpt-4o-2024-11-20,0,35,0.326923 |
|
realistic_Medium,gpt-4o-2024-11-20,0,40,0.25 |
|
realistic_Medium,gpt-4o-2024-11-20,0,45,0.163462 |
|
realistic_Medium,gpt-4o-2024-11-20,0,50,0.0961538 |
|
realistic_Medium,gpt-4o-2024-11-20,0,55,0.125 |
|
realistic_Medium,gpt-4o-2024-11-20,0,60,0.0576923 |
|
symbolic,gemini-1.5-pro-002,0,1,0.99 |
|
symbolic,gemini-1.5-pro-002,0,4,0.97 |
|
symbolic,gemini-1.5-pro-002,0,7,0.97 |
|
symbolic,gemini-1.5-pro-002,0,10,0.93 |
|
symbolic,gemini-1.5-pro-002,0,13,0.82 |
|
symbolic,gemini-1.5-pro-002,0,16,0.78 |
|
symbolic,gemini-1.5-pro-002,0,19,0.72 |
|
symbolic,gemini-1.5-pro-002,0,22,0.65 |
|
symbolic,gemini-1.5-pro-002,0,25,0.48 |
|
symbolic,gemini-1.5-pro-002,0,28,0.28 |
|
symbolic,gemini-1.5-pro-002,0,31,0.33 |
|
symbolic,gemini-1.5-pro-002,0,34,0.24 |
|
symbolic,gemini-1.5-pro-002,0,37,0.18 |
|
symbolic,gemini-1.5-pro-002,0,40,0.13 |
|
symbolic,gemini-1.5-pro-002,0,43,0.13 |
|
symbolic,gemini-1.5-pro-002,0,46,0.11 |
|
symbolic,gemini-1.5-pro-002,0,49,0.04 |
|
symbolic,gemini-1.5-pro-002,0,52,0.03 |
|
symbolic,gemini-1.5-pro-002,0,55,0.04 |
|
symbolic,gemini-1.5-pro-002,0,58,0.01 |
|
symbolic,gemini-1.5-pro-002,0,61,0.02 |
|
symbolic,gemini-1.5-pro-002,0,64,0.02 |
|
symbolic,gemini-1.5-pro-002,0,67,0.04 |
|
symbolic,gemini-1.5-pro-002,0,70,0.04 |
|
symbolic,gemini-1.5-pro-002,0,73,0.01 |
|
symbolic,gemini-1.5-pro-002,0,76,0.0 |
|
symbolic,gemini-1.5-pro-002,0,79,0.0 |
|
symbolic,gemini-1.5-pro-002,0,82,0.02 |
|
symbolic,gemini-1.5-pro-002,0,85,0.0 |
|
symbolic,gemini-1.5-pro-002,0,88,0.01 |
|
symbolic,gemini-1.5-pro-002,8000,1,0.99 |
|
symbolic,gemini-1.5-pro-002,8000,2,0.69 |
|
symbolic,gemini-1.5-pro-002,8000,3,0.54 |
|
symbolic,gemini-1.5-pro-002,8000,4,0.45 |
|
symbolic,gemini-1.5-pro-002,8000,5,0.26 |
|
symbolic,gemini-1.5-pro-002,8000,6,0.35 |
|
symbolic,gemini-1.5-pro-002,8000,7,0.25 |
|
symbolic,gemini-1.5-pro-002,8000,8,0.19 |
|
symbolic,gemini-1.5-pro-002,8000,9,0.21 |
|
symbolic,gemini-1.5-pro-002,8000,10,0.09 |
|
symbolic,gemini-1.5-pro-002,8000,11,0.17 |
|
symbolic,gemini-1.5-pro-002,8000,12,0.14 |
|
symbolic,gemini-1.5-pro-002,8000,13,0.05 |
|
symbolic,gemini-1.5-pro-002,8000,14,0.07 |
|
symbolic,gemini-1.5-pro-002,8000,16,0.04 |
|
symbolic,gemini-1.5-pro-002,8000,17,0.03 |
|
symbolic,gemini-1.5-pro-002,8000,18,0.05 |
|
symbolic,gemini-1.5-pro-002,8000,19,0.03 |
|
symbolic,gemini-1.5-pro-002,16000,1,1.0 |
|
symbolic,gemini-1.5-pro-002,16000,2,0.64 |
|
symbolic,gemini-1.5-pro-002,16000,3,0.42 |
|
symbolic,gemini-1.5-pro-002,16000,4,0.38 |
|
symbolic,gemini-1.5-pro-002,16000,5,0.3 |
|
symbolic,gemini-1.5-pro-002,16000,6,0.34 |
|
symbolic,gemini-1.5-pro-002,16000,7,0.1 |
|
symbolic,gemini-1.5-pro-002,16000,8,0.1 |
|
symbolic,gemini-1.5-pro-002,16000,9,0.18 |
|
symbolic,gemini-1.5-pro-002,16000,10,0.14 |
|
symbolic,gemini-1.5-pro-002,16000,11,0.14 |
|
symbolic,gemini-1.5-pro-002,16000,12,0.06 |
|
symbolic,gemini-1.5-pro-002,16000,13,0.1 |
|
symbolic,gemini-1.5-pro-002,16000,14,0.06 |
|
symbolic,gemini-1.5-pro-002,32000,1,0.98 |
|
symbolic,gemini-1.5-pro-002,32000,2,0.68 |
|
symbolic,gemini-1.5-pro-002,32000,3,0.44 |
|
symbolic,gemini-1.5-pro-002,32000,4,0.46 |
|
symbolic,gemini-1.5-pro-002,32000,5,0.26 |
|
symbolic,gemini-1.5-pro-002,32000,6,0.3 |
|
symbolic,gemini-1.5-pro-002,32000,7,0.1 |
|
symbolic,gemini-1.5-pro-002,32000,8,0.06 |
|
symbolic,gemini-1.5-pro-002,32000,9,0.1 |
|
symbolic,gemini-1.5-pro-002,32000,10,0.02 |
|
symbolic,gemini-1.5-pro-002,32000,11,0.06 |
|
symbolic,gemini-1.5-pro-002,32000,12,0.02 |
|
symbolic,gemini-1.5-pro-002,32000,13,0.06 |
|
symbolic,gemini-1.5-pro-002,32000,14,0.12 |
|
realistic_Hard,gemini-1.5-pro-002,0,2,0.9531 |
|
realistic_Hard,gemini-1.5-pro-002,0,6,0.9844 |
|
realistic_Hard,gemini-1.5-pro-002,0,10,0.8828 |
|
realistic_Hard,gemini-1.5-pro-002,0,14,0.7734 |
|
realistic_Hard,gemini-1.5-pro-002,0,18,0.7734 |
|
realistic_Hard,gemini-1.5-pro-002,0,22,0.5156 |
|
realistic_Hard,gemini-1.5-pro-002,0,26,0.3672 |
|
realistic_Hard,gemini-1.5-pro-002,0,30,0.2891 |
|
realistic_Hard,gemini-1.5-pro-002,0,35,0.25 |
|
realistic_Hard,gemini-1.5-pro-002,0,40,0.201923 |
|
realistic_Hard,gemini-1.5-pro-002,0,45,0.0865385 |
|
realistic_Hard,gemini-1.5-pro-002,0,50,0.0384615 |
|
realistic_Hard,gemini-1.5-pro-002,8000,2,0.875 |
|
realistic_Hard,gemini-1.5-pro-002,8000,6,0.8203 |
|
realistic_Hard,gemini-1.5-pro-002,8000,10,0.6641 |
|
realistic_Hard,gemini-1.5-pro-002,8000,14,0.6719 |
|
realistic_Hard,gemini-1.5-pro-002,8000,18,0.5312 |
|
realistic_Hard,gemini-1.5-pro-002,8000,22,0.5312 |
|
realistic_Hard,gemini-1.5-pro-002,8000,26,0.5 |
|
realistic_Hard,gemini-1.5-pro-002,8000,30,0.4688 |
|
realistic_Hard,gemini-1.5-pro-002,16000,2,0.9219 |
|
realistic_Hard,gemini-1.5-pro-002,16000,6,0.7891 |
|
realistic_Hard,gemini-1.5-pro-002,16000,10,0.5703 |
|
realistic_Hard,gemini-1.5-pro-002,16000,14,0.3438 |
|
realistic_Hard,gemini-1.5-pro-002,16000,18,0.3281 |
|
realistic_Hard,gemini-1.5-pro-002,16000,22,0.2266 |
|
realistic_Hard,gemini-1.5-pro-002,16000,26,0.1719 |
|
realistic_Hard,gemini-1.5-pro-002,16000,30,0.125 |
|
realistic_Hard,gemini-1.5-pro-002,32000,2,0.8504 |
|
realistic_Hard,gemini-1.5-pro-002,32000,6,0.7188 |
|
realistic_Hard,gemini-1.5-pro-002,32000,10,0.5197 |
|
realistic_Hard,gemini-1.5-pro-002,32000,14,0.4375 |
|
realistic_Hard,gemini-1.5-pro-002,32000,18,0.315 |
|
realistic_Hard,gemini-1.5-pro-002,32000,22,0.1575 |
|
realistic_Hard,gemini-1.5-pro-002,32000,26,0.0391 |
|
realistic_Hard,gemini-1.5-pro-002,32000,30,0.0859 |
|
realistic_Medium,jamba-1.5-large,0,2,0.6716 |
|
realistic_Medium,jamba-1.5-large,0,2,0.7422 |
|
realistic_Medium,jamba-1.5-large,0,4,0.6406 |
|
realistic_Medium,jamba-1.5-large,0,6,0.3516 |
|
realistic_Medium,jamba-1.5-large,0,8,0.1484 |
|
realistic_Medium,jamba-1.5-large,0,10,0.1562 |
|
realistic_Medium,jamba-1.5-large,0,12,0.1406 |
|
realistic_Medium,jamba-1.5-large,0,14,0.0703 |
|
realistic_Medium,jamba-1.5-large,0,16,0.0938 |
|
realistic_Medium,jamba-1.5-large,0,18,0.0781 |
|
realistic_Medium,jamba-1.5-large,0,20,0.0625 |
|
realistic_Medium,jamba-1.5-large,0,22,0.0469 |
|
realistic_Medium,jamba-1.5-large,0,24,0.0859 |
|
realistic_Medium,jamba-1.5-large,0,26,0.0703 |
|
realistic_Medium,jamba-1.5-large,0,28,0.0781 |
|
realistic_Medium,jamba-1.5-large,0,30,0.0625 |
|
symbolic,claude-3.5-haiku,0,1,0.96 |
|
symbolic,claude-3.5-haiku,0,3,1.0 |
|
symbolic,claude-3.5-haiku,0,5,0.91 |
|
symbolic,claude-3.5-haiku,0,7,0.73 |
|
symbolic,claude-3.5-haiku,0,9,0.53 |
|
symbolic,claude-3.5-haiku,0,11,0.28 |
|
symbolic,claude-3.5-haiku,0,13,0.2 |
|
symbolic,claude-3.5-haiku,0,15,0.07 |
|
symbolic,claude-3.5-haiku,0,17,0.1 |
|
symbolic,claude-3.5-haiku,0,19,0.05 |
|
symbolic,claude-3.5-haiku,0,21,0.01 |
|
symbolic,claude-3.5-haiku,0,23,0.01 |
|
symbolic,claude-3.5-haiku,0,25,0.02 |
|
symbolic,claude-3.5-haiku,0,27,0.01 |
|
symbolic,claude-3.5-haiku,0,29,0.02 |
|
symbolic,claude-3.5-haiku,0,31,0.01 |
|
symbolic,claude-3.5-haiku,0,33,0.0 |
|
symbolic,claude-3.5-haiku,0,35,0.0 |
|
symbolic,claude-3.5-haiku,0,37,0.01 |
|
symbolic,claude-3.5-haiku,0,39,0.01 |
|
symbolic,claude-3.5-haiku,0,41,0.01 |
|
symbolic,claude-3.5-haiku,0,43,0.0 |
|
symbolic,claude-3.5-haiku,0,45,0.02 |
|
symbolic,claude-3.5-haiku,0,47,0.0 |
|
symbolic,claude-3.5-haiku,0,49,0.0 |
|
symbolic,claude-3.5-haiku,0,51,0.0 |
|
symbolic,claude-3.5-haiku,0,53,0.0 |
|
symbolic,claude-3.5-haiku,0,55,0.0 |
|
symbolic,claude-3.5-haiku,0,57,0.0 |
|
symbolic,claude-3.5-haiku,0,59,0.01 |
|
symbolic,o3-mini,0,1,1.0 |
|
symbolic,o3-mini,0,16,0.96 |
|
symbolic,o3-mini,0,31,0.82 |
|
symbolic,o3-mini,0,46,0.62 |
|
symbolic,o3-mini,0,61,0.34 |
|
symbolic,o3-mini,0,76,0.28 |
|
symbolic,o3-mini,0,91,0.22 |
|
symbolic,o3-mini,0,106,0.12 |
|
symbolic,o3-mini,0,121,0.1 |
|
symbolic,o3-mini,0,136,0.14 |
|
symbolic,o3-mini,0,151,0.16 |
|
symbolic,o3-mini,0,166,0.08 |
|
symbolic,o3-mini,0,181,0.06 |
|
symbolic,o3-mini,0,196,0.0 |
|
symbolic,o3-mini,0,211,0.04 |
|
symbolic,o3-mini,0,226,0.04 |
|
realistic_Hard,mistral-large-2411,0,2,0.8672 |
|
realistic_Hard,mistral-large-2411,0,4,0.9141 |
|
realistic_Hard,mistral-large-2411,0,6,0.8672 |
|
realistic_Hard,mistral-large-2411,0,8,0.8594 |
|
realistic_Hard,mistral-large-2411,0,10,0.7578 |
|
realistic_Hard,mistral-large-2411,0,12,0.6875 |
|
realistic_Hard,mistral-large-2411,0,14,0.7812 |
|
realistic_Hard,mistral-large-2411,0,16,0.7266 |
|
realistic_Hard,mistral-large-2411,0,18,0.7109 |
|
realistic_Hard,mistral-large-2411,0,20,0.6172 |
|
realistic_Hard,mistral-large-2411,0,22,0.5156 |
|
realistic_Hard,mistral-large-2411,0,24,0.4646 |
|
realistic_Hard,mistral-large-2411,0,26,0.4453 |
|
realistic_Hard,mistral-large-2411,0,28,0.4252 |
|
realistic_Hard,mistral-large-2411,0,30,0.3281 |
|
realistic_Hard,mistral-large-2411,0,35,0.230769 |
|
realistic_Hard,mistral-large-2411,0,40,0.16667 |
|
realistic_Hard,mistral-large-2411,0,50,0.0714 |
|
realistic_Hard,mistral-large-2411,0,55,0.0938 |
|
realistic_Hard,mistral-large-2411,0,60,0.0543 |
|
realistic_Hard,mistral-large-2411,8000,2,0.7109 |
|
realistic_Hard,mistral-large-2411,8000,6,0.7734 |
|
realistic_Hard,mistral-large-2411,8000,10,0.5391 |
|
realistic_Hard,mistral-large-2411,8000,14,0.5859 |
|
realistic_Hard,mistral-large-2411,8000,18,0.4219 |
|
realistic_Hard,mistral-large-2411,8000,22,0.3828 |
|
realistic_Hard,mistral-large-2411,8000,26,0.3828 |
|
realistic_Hard,mistral-large-2411,8000,30,0.4375 |
|
realistic_Hard,mistral-large-2411,16000,2,0.6562 |
|
realistic_Hard,mistral-large-2411,16000,6,0.6094 |
|
realistic_Hard,mistral-large-2411,16000,10,0.3984 |
|
realistic_Hard,mistral-large-2411,16000,14,0.2266 |
|
realistic_Hard,mistral-large-2411,16000,18,0.1406 |
|
realistic_Hard,mistral-large-2411,16000,22,0.0938 |
|
realistic_Hard,mistral-large-2411,16000,26,0.0859 |
|
realistic_Hard,mistral-large-2411,16000,30,0.0703 |
|
realistic_Hard,mistral-large-2411,32000,2,0.4688 |
|
realistic_Hard,mistral-large-2411,32000,6,0.3516 |
|
realistic_Hard,mistral-large-2411,32000,10,0.1653 |
|
realistic_Hard,mistral-large-2411,32000,14,0.0787 |
|
realistic_Hard,mistral-large-2411,32000,18,0.0859 |
|
realistic_Hard,mistral-large-2411,32000,22,0.0859 |
|
realistic_Hard,mistral-large-2411,32000,26,0.0078 |
|
realistic_Hard,mistral-large-2411,32000,30,0.0234 |
|
realistic_Hard,llama-3.1-70b-instruct,0,2,0.84 |
|
realistic_Hard,llama-3.1-70b-instruct,0,3,0.9 |
|
realistic_Hard,llama-3.1-70b-instruct,0,4,0.815 |
|
realistic_Hard,llama-3.1-70b-instruct,0,5,0.73 |
|
realistic_Hard,llama-3.1-70b-instruct,0,6,0.735 |
|
realistic_Hard,llama-3.1-70b-instruct,0,7,0.66 |
|
realistic_Hard,llama-3.1-70b-instruct,0,8,0.655 |
|
realistic_Hard,llama-3.1-70b-instruct,0,9,0.65 |
|
realistic_Hard,llama-3.1-70b-instruct,0,10,0.64 |
|
realistic_Hard,llama-3.1-70b-instruct,0,11,0.52 |
|
realistic_Hard,llama-3.1-70b-instruct,0,12,0.55 |
|
realistic_Hard,llama-3.1-70b-instruct,0,13,0.415 |
|
realistic_Hard,llama-3.1-70b-instruct,0,14,0.5 |
|
realistic_Hard,llama-3.1-70b-instruct,0,15,0.49 |
|
realistic_Hard,llama-3.1-70b-instruct,0,16,0.4 |
|
realistic_Hard,llama-3.1-70b-instruct,0,17,0.345 |
|
realistic_Hard,llama-3.1-70b-instruct,0,18,0.41 |
|
realistic_Hard,llama-3.1-70b-instruct,0,19,0.335 |
|
realistic_Hard,llama-3.1-70b-instruct,0,20,0.325 |
|
realistic_Hard,llama-3.1-70b-instruct,0,21,0.235 |
|
realistic_Hard,llama-3.1-70b-instruct,0,22,0.2 |
|
realistic_Hard,llama-3.1-70b-instruct,0,23,0.165 |
|
realistic_Hard,llama-3.1-70b-instruct,0,24,0.185 |
|
realistic_Hard,llama-3.1-70b-instruct,0,25,0.2 |
|
realistic_Hard,llama-3.1-70b-instruct,0,26,0.115 |
|
realistic_Hard,llama-3.1-70b-instruct,0,27,0.15 |
|
realistic_Hard,llama-3.1-70b-instruct,0,28,0.1 |
|
realistic_Hard,llama-3.1-70b-instruct,0,29,0.17 |
|
realistic_Hard,llama-3.1-70b-instruct,0,30,0.075 |
|
realistic_Hard,llama-3.1-70b-instruct,8000,2,0.77 |
|
realistic_Hard,llama-3.1-70b-instruct,8000,4,0.705 |
|
realistic_Hard,llama-3.1-70b-instruct,8000,6,0.635 |
|
realistic_Hard,llama-3.1-70b-instruct,8000,8,0.475 |
|
realistic_Hard,llama-3.1-70b-instruct,8000,10,0.385 |
|
realistic_Hard,llama-3.1-70b-instruct,8000,12,0.275 |
|
realistic_Hard,llama-3.1-70b-instruct,8000,14,0.175 |
|
realistic_Hard,llama-3.1-70b-instruct,8000,16,0.08 |
|
realistic_Hard,llama-3.1-70b-instruct,8000,18,0.03 |
|
realistic_Hard,llama-3.1-70b-instruct,8000,20,0.02 |
|
realistic_Hard,llama-3.1-70b-instruct,8000,22,0.005 |
|
realistic_Hard,llama-3.1-70b-instruct,8000,24,0.0 |
|
realistic_Hard,llama-3.1-70b-instruct,8000,26,0.0 |
|
realistic_Hard,llama-3.1-70b-instruct,8000,28,0.0 |
|
realistic_Hard,llama-3.1-70b-instruct,8000,30,0.0 |
|
realistic_Hard,llama-3.1-70b-instruct,16000,2,0.76 |
|
realistic_Hard,llama-3.1-70b-instruct,16000,4,0.615 |
|
realistic_Hard,llama-3.1-70b-instruct,16000,6,0.56 |
|
realistic_Hard,llama-3.1-70b-instruct,16000,8,0.385 |
|
realistic_Hard,llama-3.1-70b-instruct,16000,10,0.245 |
|
realistic_Hard,llama-3.1-70b-instruct,16000,12,0.18 |
|
realistic_Hard,llama-3.1-70b-instruct,16000,14,0.1 |
|
realistic_Hard,llama-3.1-70b-instruct,16000,16,0.035 |
|
realistic_Hard,llama-3.1-70b-instruct,16000,18,0.03 |
|
realistic_Hard,llama-3.1-70b-instruct,16000,20,0.02 |
|
realistic_Hard,llama-3.1-70b-instruct,16000,22,0.015 |
|
realistic_Hard,llama-3.1-70b-instruct,16000,24,0.025 |
|
realistic_Hard,llama-3.1-70b-instruct,16000,26,0.01 |
|
realistic_Hard,llama-3.1-70b-instruct,16000,28,0.015 |
|
realistic_Hard,llama-3.1-70b-instruct,16000,30,0.025 |
|
realistic_Hard,llama-3.1-70b-instruct,32000,2,0.725 |
|
realistic_Hard,llama-3.1-70b-instruct,32000,4,0.575 |
|
realistic_Hard,llama-3.1-70b-instruct,32000,6,0.445 |
|
realistic_Hard,llama-3.1-70b-instruct,32000,8,0.39 |
|
realistic_Hard,llama-3.1-70b-instruct,32000,10,0.12 |
|
realistic_Hard,llama-3.1-70b-instruct,32000,12,0.08 |
|
realistic_Hard,llama-3.1-70b-instruct,32000,14,0.075 |
|
realistic_Hard,llama-3.1-70b-instruct,32000,16,0.075 |
|
realistic_Hard,llama-3.1-70b-instruct,32000,18,0.045 |
|
realistic_Hard,llama-3.1-70b-instruct,32000,20,0.015 |
|
realistic_Hard,llama-3.1-70b-instruct,32000,22,0.03 |
|
realistic_Hard,llama-3.1-70b-instruct,32000,24,0.01 |
|
realistic_Hard,llama-3.1-70b-instruct,32000,26,0.015 |
|
realistic_Hard,llama-3.1-70b-instruct,32000,28,0.015 |
|
realistic_Hard,llama-3.1-70b-instruct,32000,30,0.025 |
|
realistic_Medium,llama-3.1-70b-instruct,0,2,0.74 |
|
realistic_Medium,llama-3.1-70b-instruct,0,3,0.725 |
|
realistic_Medium,llama-3.1-70b-instruct,0,4,0.9 |
|
realistic_Medium,llama-3.1-70b-instruct,0,5,0.865 |
|
realistic_Medium,llama-3.1-70b-instruct,0,6,0.935 |
|
realistic_Medium,llama-3.1-70b-instruct,0,7,0.905 |
|
realistic_Medium,llama-3.1-70b-instruct,0,8,0.795 |
|
realistic_Medium,llama-3.1-70b-instruct,0,9,0.805 |
|
realistic_Medium,llama-3.1-70b-instruct,0,10,0.69 |
|
realistic_Medium,llama-3.1-70b-instruct,0,11,0.7 |
|
realistic_Medium,llama-3.1-70b-instruct,0,12,0.625 |
|
realistic_Medium,llama-3.1-70b-instruct,0,13,0.575 |
|
realistic_Medium,llama-3.1-70b-instruct,0,14,0.565 |
|
realistic_Medium,llama-3.1-70b-instruct,0,15,0.58 |
|
realistic_Medium,llama-3.1-70b-instruct,0,16,0.38 |
|
realistic_Medium,llama-3.1-70b-instruct,0,17,0.465 |
|
realistic_Medium,llama-3.1-70b-instruct,0,18,0.42 |
|
realistic_Medium,llama-3.1-70b-instruct,0,19,0.355 |
|
realistic_Medium,llama-3.1-70b-instruct,0,20,0.415 |
|
realistic_Medium,llama-3.1-70b-instruct,0,21,0.335 |
|
realistic_Medium,llama-3.1-70b-instruct,0,22,0.36 |
|
realistic_Medium,llama-3.1-70b-instruct,0,23,0.32 |
|
realistic_Medium,llama-3.1-70b-instruct,0,24,0.305 |
|
realistic_Medium,llama-3.1-70b-instruct,0,25,0.385 |
|
realistic_Medium,llama-3.1-70b-instruct,0,26,0.265 |
|
realistic_Medium,llama-3.1-70b-instruct,0,27,0.305 |
|
realistic_Medium,llama-3.1-70b-instruct,0,28,0.255 |
|
realistic_Medium,llama-3.1-70b-instruct,0,29,0.175 |
|
realistic_Medium,llama-3.1-70b-instruct,0,30,0.18 |
|
realistic_Medium,llama-3.1-70b-instruct,0,35,0.125 |
|
realistic_Medium,llama-3.1-70b-instruct,0,40,0.08125 |
|
realistic_Medium,llama-3.1-70b-instruct,0,45,0.0625 |
|
realistic_Medium,llama-3.1-70b-instruct,8000,2,0.48 |
|
realistic_Medium,llama-3.1-70b-instruct,8000,4,0.725 |
|
realistic_Medium,llama-3.1-70b-instruct,8000,6,0.745 |
|
realistic_Medium,llama-3.1-70b-instruct,8000,8,0.53 |
|
realistic_Medium,llama-3.1-70b-instruct,8000,10,0.22 |
|
realistic_Medium,llama-3.1-70b-instruct,8000,12,0.165 |
|
realistic_Medium,llama-3.1-70b-instruct,8000,14,0.11 |
|
realistic_Medium,llama-3.1-70b-instruct,8000,16,0.055 |
|
realistic_Medium,llama-3.1-70b-instruct,8000,18,0.035 |
|
realistic_Medium,llama-3.1-70b-instruct,8000,20,0.01 |
|
realistic_Medium,llama-3.1-70b-instruct,8000,22,0.005 |
|
realistic_Medium,llama-3.1-70b-instruct,8000,24,0.015 |
|
realistic_Medium,llama-3.1-70b-instruct,8000,26,0.0 |
|
realistic_Medium,llama-3.1-70b-instruct,8000,28,0.005 |
|
realistic_Medium,llama-3.1-70b-instruct,8000,30,0.005 |
|
realistic_Medium,llama-3.1-70b-instruct,16000,2,0.325 |
|
realistic_Medium,llama-3.1-70b-instruct,16000,4,0.68 |
|
realistic_Medium,llama-3.1-70b-instruct,16000,6,0.62 |
|
realistic_Medium,llama-3.1-70b-instruct,16000,8,0.39 |
|
realistic_Medium,llama-3.1-70b-instruct,16000,10,0.15 |
|
realistic_Medium,llama-3.1-70b-instruct,16000,12,0.12 |
|
realistic_Medium,llama-3.1-70b-instruct,16000,14,0.085 |
|
realistic_Medium,llama-3.1-70b-instruct,16000,16,0.03 |
|
realistic_Medium,llama-3.1-70b-instruct,16000,18,0.015 |
|
realistic_Medium,llama-3.1-70b-instruct,16000,20,0.015 |
|
realistic_Medium,llama-3.1-70b-instruct,16000,22,0.01 |
|
realistic_Medium,llama-3.1-70b-instruct,16000,24,0.005 |
|
realistic_Medium,llama-3.1-70b-instruct,16000,26,0.0 |
|
realistic_Medium,llama-3.1-70b-instruct,16000,28,0.01 |
|
realistic_Medium,llama-3.1-70b-instruct,16000,30,0.005 |
|
realistic_Medium,llama-3.1-70b-instruct,32000,2,0.325 |
|
realistic_Medium,llama-3.1-70b-instruct,32000,4,0.68 |
|
realistic_Medium,llama-3.1-70b-instruct,32000,6,0.62 |
|
realistic_Medium,llama-3.1-70b-instruct,32000,8,0.39 |
|
realistic_Medium,llama-3.1-70b-instruct,32000,10,0.15 |
|
realistic_Medium,llama-3.1-70b-instruct,32000,12,0.12 |
|
realistic_Medium,llama-3.1-70b-instruct,32000,14,0.065 |
|
realistic_Medium,llama-3.1-70b-instruct,32000,16,0.02 |
|
realistic_Medium,llama-3.1-70b-instruct,32000,18,0.005 |
|
realistic_Medium,llama-3.1-70b-instruct,32000,20,0.005 |
|
realistic_Medium,llama-3.1-70b-instruct,32000,22,0.0 |
|
realistic_Medium,llama-3.1-70b-instruct,32000,24,0.005 |
|
realistic_Medium,llama-3.1-70b-instruct,32000,26,0.035 |
|
realistic_Medium,llama-3.1-70b-instruct,32000,28,0.025 |
|
realistic_Medium,llama-3.1-70b-instruct,32000,30,0.02 |
|
realistic_Medium,claude-3.5-haiku,0,2,0.5859 |
|
realistic_Medium,claude-3.5-haiku,0,6,0.7266 |
|
realistic_Medium,claude-3.5-haiku,0,10,0.5547 |
|
realistic_Medium,claude-3.5-haiku,0,14,0.4688 |
|
realistic_Medium,claude-3.5-haiku,0,18,0.3047 |
|
realistic_Medium,claude-3.5-haiku,0,22,0.1719 |
|
realistic_Medium,claude-3.5-haiku,0,26,0.0859 |
|
realistic_Medium,claude-3.5-haiku,0,30,0.0547 |
|
realistic_Hard,claude-3.5-haiku,0,2,0.6484 |
|
realistic_Hard,claude-3.5-haiku,0,6,0.7031 |
|
realistic_Hard,claude-3.5-haiku,0,10,0.4844 |
|
realistic_Hard,claude-3.5-haiku,0,14,0.2656 |
|
realistic_Hard,claude-3.5-haiku,0,18,0.1094 |
|
realistic_Hard,claude-3.5-haiku,0,22,0.0312 |
|
realistic_Hard,claude-3.5-haiku,0,26,0.0 |
|
realistic_Hard,claude-3.5-haiku,0,30,0.0859 |
|
symbolic,gemini-1.5-flash-002,0,1,0.99 |
|
symbolic,gemini-1.5-flash-002,0,3,0.98 |
|
symbolic,gemini-1.5-flash-002,0,5,0.89 |
|
symbolic,gemini-1.5-flash-002,0,7,0.85 |
|
symbolic,gemini-1.5-flash-002,0,9,0.83 |
|
symbolic,gemini-1.5-flash-002,0,11,0.75 |
|
symbolic,gemini-1.5-flash-002,0,13,0.63 |
|
symbolic,gemini-1.5-flash-002,0,15,0.59 |
|
symbolic,gemini-1.5-flash-002,0,17,0.65 |
|
symbolic,gemini-1.5-flash-002,0,19,0.53 |
|
symbolic,gemini-1.5-flash-002,0,21,0.41 |
|
symbolic,gemini-1.5-flash-002,0,23,0.3 |
|
symbolic,gemini-1.5-flash-002,0,25,0.35 |
|
symbolic,gemini-1.5-flash-002,0,27,0.32 |
|
symbolic,gemini-1.5-flash-002,0,29,0.28 |
|
symbolic,gemini-1.5-flash-002,0,31,0.2 |
|
symbolic,gemini-1.5-flash-002,0,33,0.18 |
|
symbolic,gemini-1.5-flash-002,0,35,0.08 |
|
symbolic,gemini-1.5-flash-002,0,37,0.14 |
|
symbolic,gemini-1.5-flash-002,0,39,0.08 |
|
symbolic,gemini-1.5-flash-002,0,41,0.08 |
|
symbolic,gemini-1.5-flash-002,0,43,0.03 |
|
symbolic,gemini-1.5-flash-002,0,45,0.04 |
|
symbolic,gemini-1.5-flash-002,0,47,0.05 |
|
symbolic,gemini-1.5-flash-002,0,49,0.05 |
|
symbolic,gemini-1.5-flash-002,0,51,0.03 |
|
symbolic,gemini-1.5-flash-002,0,53,0.01 |
|
symbolic,gemini-1.5-flash-002,0,55,0.02 |
|
symbolic,gemini-1.5-flash-002,0,57,0.0 |
|
symbolic,gemini-1.5-flash-002,0,59,0.01 |
|
symbolic,gemini-1.5-flash-002,8000,1,0.9 |
|
symbolic,gemini-1.5-flash-002,8000,2,0.59 |
|
symbolic,gemini-1.5-flash-002,8000,3,0.38 |
|
symbolic,gemini-1.5-flash-002,8000,4,0.32 |
|
symbolic,gemini-1.5-flash-002,8000,5,0.27 |
|
symbolic,gemini-1.5-flash-002,8000,6,0.17 |
|
symbolic,gemini-1.5-flash-002,8000,7,0.11 |
|
symbolic,gemini-1.5-flash-002,8000,8,0.14 |
|
symbolic,gemini-1.5-flash-002,8000,9,0.06 |
|
symbolic,gemini-1.5-flash-002,8000,10,0.03 |
|
symbolic,gemini-1.5-flash-002,8000,11,0.09 |
|
symbolic,gemini-1.5-flash-002,8000,12,0.03 |
|
symbolic,gemini-1.5-flash-002,8000,13,0.06 |
|
symbolic,gemini-1.5-flash-002,8000,14,0.05 |
|
symbolic,gemini-1.5-flash-002,8000,15,0.02 |
|
symbolic,gemini-1.5-flash-002,8000,16,0.01 |
|
symbolic,gemini-1.5-flash-002,8000,17,0.01 |
|
symbolic,gemini-1.5-flash-002,8000,18,0.0 |
|
symbolic,gemini-1.5-flash-002,8000,19,0.01 |
|
symbolic,gemini-1.5-flash-002,16000,1,0.8 |
|
symbolic,gemini-1.5-flash-002,16000,2,0.54 |
|
symbolic,gemini-1.5-flash-002,16000,3,0.3 |
|
symbolic,gemini-1.5-flash-002,16000,4,0.26 |
|
symbolic,gemini-1.5-flash-002,16000,5,0.14 |
|
symbolic,gemini-1.5-flash-002,16000,6,0.1 |
|
symbolic,gemini-1.5-flash-002,16000,7,0.1 |
|
symbolic,gemini-1.5-flash-002,16000,8,0.04 |
|
symbolic,gemini-1.5-flash-002,16000,9,0.08 |
|
symbolic,gemini-1.5-flash-002,16000,10,0.04 |
|
symbolic,gemini-1.5-flash-002,16000,11,0.04 |
|
symbolic,gemini-1.5-flash-002,16000,12,0.0 |
|
symbolic,gemini-1.5-flash-002,16000,13,0.02 |
|
symbolic,gemini-1.5-flash-002,32000,1,0.64 |
|
symbolic,gemini-1.5-flash-002,32000,2,0.36 |
|
symbolic,gemini-1.5-flash-002,32000,3,0.3 |
|
symbolic,gemini-1.5-flash-002,32000,4,0.18 |
|
symbolic,gemini-1.5-flash-002,32000,5,0.1 |
|
symbolic,gemini-1.5-flash-002,32000,6,0.12 |
|
symbolic,gemini-1.5-flash-002,32000,7,0.06 |
|
symbolic,gemini-1.5-flash-002,32000,8,0.02 |
|
symbolic,gemini-1.5-flash-002,32000,9,0.02 |
|
symbolic,qwen-2.5-7b-instruct,0,1,0.984 |
|
symbolic,qwen-2.5-7b-instruct,0,2,0.97 |
|
symbolic,qwen-2.5-7b-instruct,0,3,0.829 |
|
symbolic,qwen-2.5-7b-instruct,0,4,0.765 |
|
symbolic,qwen-2.5-7b-instruct,0,5,0.66 |
|
symbolic,qwen-2.5-7b-instruct,0,6,0.595 |
|
symbolic,qwen-2.5-7b-instruct,0,7,0.527 |
|
symbolic,qwen-2.5-7b-instruct,0,8,0.438 |
|
symbolic,qwen-2.5-7b-instruct,0,9,0.37 |
|
symbolic,qwen-2.5-7b-instruct,0,10,0.331 |
|
symbolic,qwen-2.5-7b-instruct,0,11,0.257 |
|
symbolic,qwen-2.5-7b-instruct,0,12,0.238 |
|
symbolic,qwen-2.5-7b-instruct,0,13,0.21 |
|
symbolic,qwen-2.5-7b-instruct,0,14,0.183 |
|
symbolic,qwen-2.5-7b-instruct,0,15,0.155 |
|
symbolic,qwen-2.5-7b-instruct,0,16,0.104 |
|
symbolic,qwen-2.5-7b-instruct,0,17,0.083 |
|
symbolic,qwen-2.5-7b-instruct,0,18,0.079 |
|
symbolic,qwen-2.5-7b-instruct,0,19,0.087 |
|
symbolic,qwen-2.5-7b-instruct,0,20,0.057 |
|
symbolic,qwen-2.5-7b-instruct,0,21,0.064 |
|
symbolic,qwen-2.5-7b-instruct,0,22,0.041 |
|
symbolic,qwen-2.5-7b-instruct,0,23,0.043 |
|
symbolic,qwen-2.5-7b-instruct,0,24,0.035 |
|
symbolic,qwen-2.5-7b-instruct,0,25,0.03 |
|
symbolic,qwen-2.5-7b-instruct,0,26,0.021 |
|
symbolic,qwen-2.5-7b-instruct,0,27,0.026 |
|
symbolic,qwen-2.5-7b-instruct,0,28,0.027 |
|
symbolic,qwen-2.5-7b-instruct,0,29,0.023 |
|
symbolic,qwen-2.5-7b-instruct,0,30,0.018 |
|
symbolic,qwen-2.5-7b-instruct,0,31,0.017 |
|
symbolic,qwen-2.5-7b-instruct,0,32,0.009 |
|
symbolic,qwen-2.5-7b-instruct,0,33,0.014 |
|
symbolic,qwen-2.5-7b-instruct,0,34,0.019 |
|
symbolic,qwen-2.5-7b-instruct,0,35,0.01 |
|
symbolic,qwen-2.5-7b-instruct,0,36,0.012 |
|
symbolic,qwen-2.5-7b-instruct,0,37,0.013 |
|
symbolic,qwen-2.5-7b-instruct,0,38,0.006 |
|
symbolic,qwen-2.5-7b-instruct,0,39,0.006 |
|
symbolic,qwen-2.5-7b-instruct,0,40,0.011 |
|
symbolic,qwen-2.5-7b-instruct,8000,1,0.43 |
|
symbolic,qwen-2.5-7b-instruct,8000,2,0.14 |
|
symbolic,qwen-2.5-7b-instruct,8000,3,0.07 |
|
symbolic,qwen-2.5-7b-instruct,8000,4,0.02 |
|
symbolic,qwen-2.5-7b-instruct,8000,5,0.01 |
|
symbolic,qwen-2.5-7b-instruct,16000,1,0.1 |
|
symbolic,qwen-2.5-7b-instruct,16000,2,0.02 |
|
symbolic,qwen-2.5-7b-instruct,16000,3,0.02 |
|
symbolic,gpt-4o-mini,0,1,0.97 |
|
symbolic,gpt-4o-mini,0,3,0.99 |
|
symbolic,gpt-4o-mini,0,5,0.92 |
|
symbolic,gpt-4o-mini,0,7,0.85 |
|
symbolic,gpt-4o-mini,0,9,0.76 |
|
symbolic,gpt-4o-mini,0,11,0.64 |
|
symbolic,gpt-4o-mini,0,13,0.52 |
|
symbolic,gpt-4o-mini,0,15,0.39 |
|
symbolic,gpt-4o-mini,0,17,0.36 |
|
symbolic,gpt-4o-mini,0,19,0.22 |
|
symbolic,gpt-4o-mini,0,21,0.16 |
|
symbolic,gpt-4o-mini,0,23,0.13 |
|
symbolic,gpt-4o-mini,0,25,0.11 |
|
symbolic,gpt-4o-mini,0,27,0.09 |
|
symbolic,gpt-4o-mini,0,29,0.07 |
|
symbolic,gpt-4o-mini,0,31,0.06 |
|
symbolic,gpt-4o-mini,0,33,0.05 |
|
symbolic,gpt-4o-mini,0,35,0.03 |
|
symbolic,gpt-4o-mini,0,37,0.03 |
|
symbolic,gpt-4o-mini,0,39,0.0 |
|
symbolic,gpt-4o-mini,0,41,0.03 |
|
symbolic,gpt-4o-mini,0,43,0.0 |
|
symbolic,gpt-4o-mini,0,45,0.03 |
|
symbolic,gpt-4o-mini,0,47,0.01 |
|
symbolic,gpt-4o-mini,0,49,0.0 |
|
symbolic,gpt-4o-mini,0,51,0.01 |
|
symbolic,gpt-4o-mini,0,53,0.0 |
|
symbolic,gpt-4o-mini,0,55,0.0 |
|
symbolic,gpt-4o-mini,0,57,0.0 |
|
symbolic,gpt-4o-mini,0,59,0.0 |
|
symbolic,gpt-4o-mini,8000,1,0.8 |
|
symbolic,gpt-4o-mini,8000,2,0.3 |
|
symbolic,gpt-4o-mini,8000,3,0.14 |
|
symbolic,gpt-4o-mini,8000,4,0.07 |
|
symbolic,gpt-4o-mini,8000,5,0.06 |
|
symbolic,gpt-4o-mini,8000,6,0.03 |
|
symbolic,gpt-4o-mini,8000,7,0.02 |
|
symbolic,gpt-4o-mini,8000,8,0.01 |
|
symbolic,gpt-4o-mini,8000,9,0.0 |
|
symbolic,gpt-4o-mini,8000,10,0.0 |
|
symbolic,gpt-4o-mini,8000,11,0.0 |
|
symbolic,gpt-4o-mini,16000,1,0.64 |
|
symbolic,gpt-4o-mini,16000,2,0.32 |
|
symbolic,gpt-4o-mini,16000,3,0.12 |
|
symbolic,gpt-4o-mini,16000,4,0.01 |
|
symbolic,gpt-4o-mini,16000,5,0.07 |
|
symbolic,gpt-4o-mini,16000,6,0.02 |
|
symbolic,gpt-4o-mini,16000,7,0.04 |
|
symbolic,gpt-4o-mini,16000,8,0.0 |
|
symbolic,gpt-4o-mini,32000,1,0.41 |
|
symbolic,gpt-4o-mini,32000,2,0.2 |
|
symbolic,gpt-4o-mini,32000,3,0.09 |
|
symbolic,gpt-4o-mini,32000,4,0.05 |
|
symbolic,gpt-4o-mini,32000,5,0.01 |
|
symbolic,gpt-4o-mini,32000,6,0.04 |
|
symbolic,gpt-4o-mini,32000,7,0.02 |
|
symbolic,llama-3.1-70b-instruct,0,1,0.99 |
|
symbolic,llama-3.1-70b-instruct,0,3,0.97 |
|
symbolic,llama-3.1-70b-instruct,0,5,0.84 |
|
symbolic,llama-3.1-70b-instruct,0,7,0.84 |
|
symbolic,llama-3.1-70b-instruct,0,9,0.7 |
|
symbolic,llama-3.1-70b-instruct,0,11,0.65 |
|
symbolic,llama-3.1-70b-instruct,0,13,0.53 |
|
symbolic,llama-3.1-70b-instruct,0,15,0.45 |
|
symbolic,llama-3.1-70b-instruct,0,17,0.47 |
|
symbolic,llama-3.1-70b-instruct,0,19,0.41 |
|
symbolic,llama-3.1-70b-instruct,0,21,0.31 |
|
symbolic,llama-3.1-70b-instruct,0,23,0.31 |
|
symbolic,llama-3.1-70b-instruct,0,25,0.26 |
|
symbolic,llama-3.1-70b-instruct,0,27,0.19 |
|
symbolic,llama-3.1-70b-instruct,0,29,0.23 |
|
symbolic,llama-3.1-70b-instruct,0,31,0.13 |
|
symbolic,llama-3.1-70b-instruct,0,33,0.25 |
|
symbolic,llama-3.1-70b-instruct,0,35,0.08 |
|
symbolic,llama-3.1-70b-instruct,0,37,0.11 |
|
symbolic,llama-3.1-70b-instruct,0,39,0.07 |
|
symbolic,llama-3.1-70b-instruct,0,41,0.08 |
|
symbolic,llama-3.1-70b-instruct,0,43,0.06 |
|
symbolic,llama-3.1-70b-instruct,0,45,0.09 |
|
symbolic,llama-3.1-70b-instruct,0,47,0.07 |
|
symbolic,llama-3.1-70b-instruct,0,49,0.06 |
|
symbolic,llama-3.1-70b-instruct,0,51,0.06 |
|
symbolic,llama-3.1-70b-instruct,0,53,0.06 |
|
symbolic,llama-3.1-70b-instruct,0,55,0.02 |
|
symbolic,llama-3.1-70b-instruct,0,57,0.04 |
|
symbolic,llama-3.1-70b-instruct,0,59,0.02 |
|
symbolic,llama-3.1-70b-instruct,8000,1,0.96 |
|
symbolic,llama-3.1-70b-instruct,8000,2,0.61 |
|
symbolic,llama-3.1-70b-instruct,8000,3,0.39 |
|
symbolic,llama-3.1-70b-instruct,8000,4,0.26 |
|
symbolic,llama-3.1-70b-instruct,8000,5,0.16 |
|
symbolic,llama-3.1-70b-instruct,8000,6,0.17 |
|
symbolic,llama-3.1-70b-instruct,8000,7,0.08 |
|
symbolic,llama-3.1-70b-instruct,8000,8,0.07 |
|
symbolic,llama-3.1-70b-instruct,8000,9,0.08 |
|
symbolic,llama-3.1-70b-instruct,8000,10,0.01 |
|
symbolic,llama-3.1-70b-instruct,16000,1,0.92 |
|
symbolic,llama-3.1-70b-instruct,16000,2,0.57 |
|
symbolic,llama-3.1-70b-instruct,16000,3,0.37 |
|
symbolic,llama-3.1-70b-instruct,16000,4,0.2 |
|
symbolic,llama-3.1-70b-instruct,16000,5,0.12 |
|
symbolic,llama-3.1-70b-instruct,16000,6,0.1 |
|
symbolic,llama-3.1-70b-instruct,16000,7,0.06 |
|
symbolic,llama-3.1-70b-instruct,16000,8,0.06 |
|
symbolic,llama-3.1-70b-instruct,16000,9,0.04 |
|
symbolic,llama-3.1-70b-instruct,16000,10,0.02 |
|
symbolic,llama-3.1-70b-instruct,32000,1,0.91 |
|
symbolic,llama-3.1-70b-instruct,32000,2,0.51 |
|
symbolic,llama-3.1-70b-instruct,32000,3,0.21 |
|
symbolic,llama-3.1-70b-instruct,32000,4,0.13 |
|
symbolic,llama-3.1-70b-instruct,32000,5,0.12 |
|
symbolic,llama-3.1-70b-instruct,32000,6,0.03 |
|
symbolic,llama-3.1-70b-instruct,32000,7,0.05 |
|
symbolic,llama-3.1-70b-instruct,32000,8,0.0 |
|
symbolic,llama-3.1-70b-instruct,32000,9,0.03 |
|
symbolic,llama-3.1-70b-instruct,32000,10,0.03 |
|
realistic_Medium,minimax-text-01,0,2,0.7734 |
|
realistic_Medium,minimax-text-01,0,3,0.7656 |
|
realistic_Medium,minimax-text-01,0,4,0.9062 |
|
realistic_Medium,minimax-text-01,0,5,0.8906 |
|
realistic_Medium,minimax-text-01,0,6,0.8828 |
|
realistic_Medium,minimax-text-01,0,7,0.8828 |
|
realistic_Medium,minimax-text-01,0,8,0.8203 |
|
realistic_Medium,minimax-text-01,0,9,0.8672 |
|
realistic_Medium,minimax-text-01,0,10,0.6484 |
|
realistic_Medium,minimax-text-01,0,11,0.7969 |
|
realistic_Medium,minimax-text-01,0,12,0.6328 |
|
realistic_Medium,minimax-text-01,0,13,0.6016 |
|
realistic_Medium,minimax-text-01,0,14,0.6406 |
|
realistic_Medium,minimax-text-01,0,15,0.6562 |
|
realistic_Medium,minimax-text-01,0,16,0.4688 |
|
realistic_Medium,minimax-text-01,0,17,0.3906 |
|
realistic_Medium,minimax-text-01,0,18,0.4453 |
|
realistic_Medium,minimax-text-01,0,19,0.4844 |
|
realistic_Medium,minimax-text-01,0,20,0.4609 |
|
realistic_Medium,minimax-text-01,0,21,0.3516 |
|
realistic_Medium,minimax-text-01,0,22,0.375 |
|
realistic_Medium,minimax-text-01,0,23,0.3594 |
|
realistic_Medium,minimax-text-01,0,24,0.3359 |
|
realistic_Medium,minimax-text-01,0,25,0.2656 |
|
realistic_Medium,minimax-text-01,0,26,0.25 |
|
realistic_Medium,minimax-text-01,0,27,0.2266 |
|
realistic_Medium,minimax-text-01,0,28,0.2031 |
|
realistic_Medium,minimax-text-01,0,29,0.1719 |
|
realistic_Medium,minimax-text-01,0,30,0.1641 |
|
realistic_Medium,minimax-text-01,0,35,0.163462 |
|
realistic_Medium,minimax-text-01,0,40,0.0865385 |
|
realistic_Medium,minimax-text-01,0,45,0.0865385 |
|
realistic_Medium,minimax-text-01,8000,2,0.4531 |
|
realistic_Medium,minimax-text-01,8000,4,0.6406 |
|
realistic_Medium,minimax-text-01,8000,6,0.5469 |
|
realistic_Medium,minimax-text-01,8000,8,0.4531 |
|
realistic_Medium,minimax-text-01,8000,10,0.2891 |
|
realistic_Medium,minimax-text-01,8000,12,0.1797 |
|
realistic_Medium,minimax-text-01,8000,14,0.1328 |
|
realistic_Medium,minimax-text-01,8000,16,0.1328 |
|
realistic_Medium,minimax-text-01,8000,18,0.0938 |
|
realistic_Medium,minimax-text-01,8000,20,0.0781 |
|
realistic_Medium,minimax-text-01,8000,22,0.0859 |
|
realistic_Medium,minimax-text-01,8000,24,0.0781 |
|
realistic_Medium,minimax-text-01,8000,26,0.0547 |
|
realistic_Medium,minimax-text-01,8000,28,0.0469 |
|
realistic_Medium,minimax-text-01,8000,30,0.0547 |
|
realistic_Medium,minimax-text-01,16000,2,0.4062 |
|
realistic_Medium,minimax-text-01,16000,4,0.6562 |
|
realistic_Medium,minimax-text-01,16000,6,0.5391 |
|
realistic_Medium,minimax-text-01,16000,8,0.2734 |
|
realistic_Medium,minimax-text-01,16000,10,0.1094 |
|
realistic_Medium,minimax-text-01,16000,12,0.125 |
|
realistic_Medium,minimax-text-01,16000,14,0.1016 |
|
realistic_Medium,minimax-text-01,16000,16,0.0625 |
|
realistic_Medium,minimax-text-01,16000,18,0.0547 |
|
realistic_Medium,minimax-text-01,16000,20,0.0625 |
|
realistic_Medium,minimax-text-01,16000,22,0.0234 |
|
realistic_Medium,minimax-text-01,16000,24,0.0391 |
|
realistic_Medium,minimax-text-01,16000,26,0.0547 |
|
realistic_Medium,minimax-text-01,16000,28,0.0391 |
|
realistic_Medium,minimax-text-01,16000,30,0.0234 |
|
realistic_Medium,minimax-text-01,32000,2,0.3672 |
|
realistic_Medium,minimax-text-01,32000,4,0.5391 |
|
realistic_Medium,minimax-text-01,32000,6,0.5625 |
|
realistic_Medium,minimax-text-01,32000,8,0.3125 |
|
realistic_Medium,minimax-text-01,32000,10,0.1406 |
|
realistic_Medium,minimax-text-01,32000,12,0.1094 |
|
realistic_Medium,minimax-text-01,32000,14,0.1094 |
|
realistic_Medium,minimax-text-01,32000,16,0.0391 |
|
realistic_Medium,minimax-text-01,32000,18,0.1016 |
|
realistic_Medium,minimax-text-01,32000,20,0.0547 |
|
realistic_Medium,minimax-text-01,32000,22,0.0703 |
|
realistic_Medium,minimax-text-01,32000,24,0.0703 |
|
realistic_Medium,minimax-text-01,32000,26,0.0391 |
|
realistic_Medium,minimax-text-01,32000,28,0.0469 |
|
realistic_Hard,minimax-text-01,0,2,0.8047 |
|
realistic_Hard,minimax-text-01,0,3,0.8281 |
|
realistic_Hard,minimax-text-01,0,4,0.875 |
|
realistic_Hard,minimax-text-01,0,5,0.8125 |
|
realistic_Hard,minimax-text-01,0,6,0.8125 |
|
realistic_Hard,minimax-text-01,0,7,0.7031 |
|
realistic_Hard,minimax-text-01,0,8,0.6875 |
|
realistic_Hard,minimax-text-01,0,9,0.7266 |
|
realistic_Hard,minimax-text-01,0,10,0.6875 |
|
realistic_Hard,minimax-text-01,0,11,0.5781 |
|
realistic_Hard,minimax-text-01,0,12,0.4922 |
|
realistic_Hard,minimax-text-01,0,13,0.5156 |
|
realistic_Hard,minimax-text-01,0,14,0.4766 |
|
realistic_Hard,minimax-text-01,0,15,0.3516 |
|
realistic_Hard,minimax-text-01,0,16,0.4375 |
|
realistic_Hard,minimax-text-01,0,17,0.3516 |
|
realistic_Hard,minimax-text-01,0,18,0.3281 |
|
realistic_Hard,minimax-text-01,0,19,0.2734 |
|
realistic_Hard,minimax-text-01,0,20,0.3125 |
|
realistic_Hard,minimax-text-01,0,21,0.1484 |
|
realistic_Hard,minimax-text-01,0,22,0.1328 |
|
realistic_Hard,minimax-text-01,0,23,0.1484 |
|
realistic_Hard,minimax-text-01,0,24,0.1406 |
|
realistic_Hard,minimax-text-01,0,25,0.0938 |
|
realistic_Hard,minimax-text-01,0,26,0.0703 |
|
realistic_Hard,minimax-text-01,0,27,0.1016 |
|
realistic_Hard,minimax-text-01,0,28,0.1094 |
|
realistic_Hard,minimax-text-01,0,29,0.1562 |
|
realistic_Hard,minimax-text-01,0,30,0.0625 |
|
realistic_Hard,minimax-text-01,8000,2,0.7734 |
|
realistic_Hard,minimax-text-01,8000,4,0.6484 |
|
realistic_Hard,minimax-text-01,8000,6,0.5469 |
|
realistic_Hard,minimax-text-01,8000,8,0.4375 |
|
realistic_Hard,minimax-text-01,8000,10,0.2969 |
|
realistic_Hard,minimax-text-01,8000,12,0.1953 |
|
realistic_Hard,minimax-text-01,8000,14,0.1406 |
|
realistic_Hard,minimax-text-01,8000,16,0.1953 |
|
realistic_Hard,minimax-text-01,8000,18,0.1172 |
|
realistic_Hard,minimax-text-01,8000,20,0.0781 |
|
realistic_Hard,minimax-text-01,8000,22,0.1016 |
|
realistic_Hard,minimax-text-01,8000,24,0.0469 |
|
realistic_Hard,minimax-text-01,8000,26,0.0547 |
|
realistic_Hard,minimax-text-01,8000,28,0.0703 |
|
realistic_Hard,minimax-text-01,8000,30,0.0391 |
|
realistic_Hard,minimax-text-01,16000,2,0.6094 |
|
realistic_Hard,minimax-text-01,16000,4,0.5391 |
|
realistic_Hard,minimax-text-01,16000,6,0.4609 |
|
realistic_Hard,minimax-text-01,16000,8,0.3203 |
|
realistic_Hard,minimax-text-01,16000,10,0.1172 |
|
realistic_Hard,minimax-text-01,16000,12,0.1094 |
|
realistic_Hard,minimax-text-01,16000,14,0.0859 |
|
realistic_Hard,minimax-text-01,16000,16,0.0625 |
|
realistic_Hard,minimax-text-01,16000,18,0.0234 |
|
realistic_Hard,minimax-text-01,16000,20,0.0781 |
|
realistic_Hard,minimax-text-01,16000,22,0.0547 |
|
realistic_Hard,minimax-text-01,16000,24,0.0547 |
|
realistic_Hard,minimax-text-01,16000,26,0.0469 |
|
realistic_Hard,minimax-text-01,16000,28,0.0234 |
|
realistic_Hard,minimax-text-01,16000,30,0.0234 |
|
realistic_Hard,minimax-text-01,32000,2,0.5547 |
|
realistic_Hard,minimax-text-01,32000,4,0.4922 |
|
realistic_Hard,minimax-text-01,32000,6,0.3594 |
|
realistic_Hard,minimax-text-01,32000,8,0.1875 |
|
realistic_Hard,minimax-text-01,32000,10,0.1406 |
|
realistic_Hard,minimax-text-01,32000,12,0.0781 |
|
realistic_Hard,minimax-text-01,32000,14,0.0625 |
|
realistic_Hard,minimax-text-01,32000,16,0.0547 |
|
realistic_Hard,minimax-text-01,32000,18,0.0781 |
|
realistic_Hard,minimax-text-01,32000,20,0.0625 |
|
realistic_Hard,minimax-text-01,32000,22,0.0547 |
|
realistic_Hard,minimax-text-01,32000,24,0.0547 |
|
realistic_Hard,minimax-text-01,32000,26,0.0312 |
|
realistic_Hard,minimax-text-01,32000,28,0.0469 |
|
realistic_Hard,minimax-text-01,32000,30,0.0312 |
|
realistic_Medium,gemini-1.5-flash-002,0,2,0.595 |
|
realistic_Medium,gemini-1.5-flash-002,0,3,0.695 |
|
realistic_Medium,gemini-1.5-flash-002,0,4,0.89 |
|
realistic_Medium,gemini-1.5-flash-002,0,5,0.895 |
|
realistic_Medium,gemini-1.5-flash-002,0,6,0.94 |
|
realistic_Medium,gemini-1.5-flash-002,0,7,0.885 |
|
realistic_Medium,gemini-1.5-flash-002,0,8,0.84 |
|
realistic_Medium,gemini-1.5-flash-002,0,9,0.81 |
|
realistic_Medium,gemini-1.5-flash-002,0,10,0.63 |
|
realistic_Medium,gemini-1.5-flash-002,0,11,0.72 |
|
realistic_Medium,gemini-1.5-flash-002,0,12,0.635 |
|
realistic_Medium,gemini-1.5-flash-002,0,13,0.55 |
|
realistic_Medium,gemini-1.5-flash-002,0,14,0.54 |
|
realistic_Medium,gemini-1.5-flash-002,0,15,0.595 |
|
realistic_Medium,gemini-1.5-flash-002,0,16,0.355 |
|
realistic_Medium,gemini-1.5-flash-002,0,17,0.43 |
|
realistic_Medium,gemini-1.5-flash-002,0,18,0.505 |
|
realistic_Medium,gemini-1.5-flash-002,0,19,0.38 |
|
realistic_Medium,gemini-1.5-flash-002,0,20,0.35 |
|
realistic_Medium,gemini-1.5-flash-002,0,21,0.345 |
|
realistic_Medium,gemini-1.5-flash-002,0,22,0.38 |
|
realistic_Medium,gemini-1.5-flash-002,0,23,0.375 |
|
realistic_Medium,gemini-1.5-flash-002,0,24,0.32 |
|
realistic_Medium,gemini-1.5-flash-002,0,25,0.295 |
|
realistic_Medium,gemini-1.5-flash-002,0,26,0.32 |
|
realistic_Medium,gemini-1.5-flash-002,0,27,0.255 |
|
realistic_Medium,gemini-1.5-flash-002,0,28,0.24 |
|
realistic_Medium,gemini-1.5-flash-002,0,29,0.22 |
|
realistic_Medium,gemini-1.5-flash-002,0,30,0.19 |
|
realistic_Medium,gemini-1.5-flash-002,8000,2,0.4844 |
|
realistic_Medium,gemini-1.5-flash-002,8000,4,0.7266 |
|
realistic_Medium,gemini-1.5-flash-002,8000,6,0.75 |
|
realistic_Medium,gemini-1.5-flash-002,8000,8,0.6016 |
|
realistic_Medium,gemini-1.5-flash-002,8000,10,0.3672 |
|
realistic_Medium,gemini-1.5-flash-002,8000,12,0.3125 |
|
realistic_Medium,gemini-1.5-flash-002,8000,14,0.2969 |
|
realistic_Medium,gemini-1.5-flash-002,8000,16,0.1406 |
|
realistic_Medium,gemini-1.5-flash-002,8000,18,0.1797 |
|
realistic_Medium,gemini-1.5-flash-002,8000,20,0.0859 |
|
realistic_Medium,gemini-1.5-flash-002,8000,22,0.1562 |
|
realistic_Medium,gemini-1.5-flash-002,8000,24,0.1406 |
|
realistic_Medium,gemini-1.5-flash-002,8000,26,0.0469 |
|
realistic_Medium,gemini-1.5-flash-002,8000,28,0.1406 |
|
realistic_Medium,gemini-1.5-flash-002,8000,30,0.0938 |
|
realistic_Medium,gemini-1.5-flash-002,16000,2,0.3937 |
|
realistic_Medium,gemini-1.5-flash-002,16000,4,0.6094 |
|
realistic_Medium,gemini-1.5-flash-002,16000,6,0.6328 |
|
realistic_Medium,gemini-1.5-flash-002,16000,8,0.4724 |
|
realistic_Medium,gemini-1.5-flash-002,16000,10,0.25 |
|
realistic_Medium,gemini-1.5-flash-002,16000,12,0.25 |
|
realistic_Medium,gemini-1.5-flash-002,16000,14,0.2656 |
|
realistic_Medium,gemini-1.5-flash-002,16000,16,0.1094 |
|
realistic_Medium,gemini-1.5-flash-002,16000,18,0.0968 |
|
realistic_Medium,gemini-1.5-flash-002,16000,20,0.0391 |
|
realistic_Medium,gemini-1.5-flash-002,16000,22,0.063 |
|
realistic_Medium,gemini-1.5-flash-002,16000,24,0.0703 |
|
realistic_Medium,gemini-1.5-flash-002,16000,26,0.0234 |
|
realistic_Medium,gemini-1.5-flash-002,16000,28,0.0078 |
|
realistic_Medium,gemini-1.5-flash-002,16000,30,0.0 |
|
realistic_Medium,gemini-1.5-flash-002,32000,2,0.4524 |
|
realistic_Medium,gemini-1.5-flash-002,32000,4,0.6172 |
|
realistic_Medium,gemini-1.5-flash-002,32000,6,0.4766 |
|
realistic_Medium,gemini-1.5-flash-002,32000,8,0.3984 |
|
realistic_Medium,gemini-1.5-flash-002,32000,10,0.25 |
|
realistic_Medium,gemini-1.5-flash-002,32000,12,0.1562 |
|
realistic_Medium,gemini-1.5-flash-002,32000,14,0.125 |
|
realistic_Medium,gemini-1.5-flash-002,32000,16,0.0781 |
|
realistic_Medium,gemini-1.5-flash-002,32000,18,0.0547 |
|
realistic_Medium,gemini-1.5-flash-002,32000,20,0.0391 |
|
realistic_Medium,gemini-1.5-flash-002,32000,22,0.0234 |
|
realistic_Medium,gemini-1.5-flash-002,32000,24,0.0312 |
|
realistic_Medium,gemini-1.5-flash-002,32000,26,0.0078 |
|
realistic_Medium,gemini-1.5-flash-002,32000,28,0.0156 |
|
realistic_Medium,gemini-1.5-flash-002,32000,30,0.0078 |
|
symbolic,minimax-text-01,0,1,0.98 |
|
symbolic,minimax-text-01,0,2,1.0 |
|
symbolic,minimax-text-01,0,3,0.97 |
|
symbolic,minimax-text-01,0,4,0.96 |
|
symbolic,minimax-text-01,0,5,0.9 |
|
symbolic,minimax-text-01,0,6,0.85 |
|
symbolic,minimax-text-01,0,7,0.78 |
|
symbolic,minimax-text-01,0,8,0.83 |
|
symbolic,minimax-text-01,0,9,0.79 |
|
symbolic,minimax-text-01,0,10,0.73 |
|
symbolic,minimax-text-01,0,11,0.67 |
|
symbolic,minimax-text-01,0,21,0.28 |
|
symbolic,minimax-text-01,0,31,0.12 |
|
symbolic,minimax-text-01,0,41,0.01 |
|
symbolic,minimax-text-01,0,51,0.01 |
|
symbolic,minimax-text-01,0,61,0.0 |
|
symbolic,minimax-text-01,0,71,0.0 |
|
symbolic,minimax-text-01,8000,1,0.94 |
|
symbolic,minimax-text-01,8000,2,0.53 |
|
symbolic,minimax-text-01,8000,3,0.2 |
|
symbolic,minimax-text-01,8000,4,0.14 |
|
symbolic,minimax-text-01,8000,5,0.06 |
|
symbolic,minimax-text-01,8000,6,0.09 |
|
symbolic,minimax-text-01,8000,7,0.03 |
|
symbolic,minimax-text-01,8000,8,0.05 |
|
symbolic,minimax-text-01,8000,9,0.06 |
|
symbolic,minimax-text-01,8000,10,0.01 |
|
symbolic,minimax-text-01,16000,1,0.91 |
|
symbolic,minimax-text-01,16000,2,0.44 |
|
symbolic,minimax-text-01,16000,3,0.19 |
|
symbolic,minimax-text-01,16000,4,0.16 |
|
symbolic,minimax-text-01,16000,5,0.1 |
|
symbolic,minimax-text-01,16000,6,0.06 |
|
symbolic,minimax-text-01,16000,7,0.03 |
|
symbolic,minimax-text-01,16000,8,0.04 |
|
symbolic,minimax-text-01,16000,9,0.01 |
|
symbolic,minimax-text-01,16000,10,0.01 |
|
symbolic,minimax-text-01,32000,1,0.9 |
|
symbolic,minimax-text-01,32000,2,0.3 |
|
symbolic,minimax-text-01,32000,3,0.1 |
|
symbolic,minimax-text-01,32000,4,0.06 |
|
symbolic,minimax-text-01,32000,5,0.08 |
|
symbolic,minimax-text-01,32000,6,0.05 |
|
symbolic,minimax-text-01,32000,7,0.01 |
|
symbolic,minimax-text-01,32000,8,0.02 |
|
symbolic,minimax-text-01,32000,9,0.0 |
|
symbolic,minimax-text-01,32000,10,0.01 |
|
symbolic,jamba-1.5-large,0,1,0.97 |
|
symbolic,jamba-1.5-large,0,3,0.87 |
|
symbolic,jamba-1.5-large,0,5,0.71 |
|
symbolic,jamba-1.5-large,0,7,0.56 |
|
symbolic,jamba-1.5-large,0,9,0.46 |
|
symbolic,jamba-1.5-large,0,11,0.35 |
|
symbolic,jamba-1.5-large,0,13,0.19 |
|
symbolic,jamba-1.5-large,0,15,0.18 |
|
symbolic,jamba-1.5-large,0,17,0.15 |
|
symbolic,jamba-1.5-large,0,19,0.08 |
|
symbolic,jamba-1.5-large,0,21,0.06 |
|
symbolic,jamba-1.5-large,0,23,0.08 |
|
symbolic,jamba-1.5-large,0,25,0.04 |
|
symbolic,jamba-1.5-large,0,27,0.05 |
|
symbolic,jamba-1.5-large,0,29,0.03 |
|
realistic_Medium,llama-3.1-8b-instruct,0,2,0.65 |
|
realistic_Medium,llama-3.1-8b-instruct,0,3,0.47 |
|
realistic_Medium,llama-3.1-8b-instruct,0,4,0.78 |
|
realistic_Medium,llama-3.1-8b-instruct,0,5,0.725 |
|
realistic_Medium,llama-3.1-8b-instruct,0,6,0.735 |
|
realistic_Medium,llama-3.1-8b-instruct,0,7,0.56 |
|
realistic_Medium,llama-3.1-8b-instruct,0,8,0.57 |
|
realistic_Medium,llama-3.1-8b-instruct,0,9,0.485 |
|
realistic_Medium,llama-3.1-8b-instruct,0,10,0.365 |
|
realistic_Medium,llama-3.1-8b-instruct,0,11,0.42 |
|
realistic_Medium,llama-3.1-8b-instruct,0,12,0.335 |
|
realistic_Medium,llama-3.1-8b-instruct,0,13,0.34 |
|
realistic_Medium,llama-3.1-8b-instruct,0,14,0.285 |
|
realistic_Medium,llama-3.1-8b-instruct,0,15,0.23 |
|
realistic_Medium,llama-3.1-8b-instruct,0,16,0.135 |
|
realistic_Medium,llama-3.1-8b-instruct,0,17,0.13 |
|
realistic_Medium,llama-3.1-8b-instruct,0,18,0.155 |
|
realistic_Medium,llama-3.1-8b-instruct,0,19,0.145 |
|
realistic_Medium,llama-3.1-8b-instruct,0,20,0.11 |
|
realistic_Medium,llama-3.1-8b-instruct,0,21,0.115 |
|
realistic_Medium,llama-3.1-8b-instruct,0,22,0.06 |
|
realistic_Medium,llama-3.1-8b-instruct,0,23,0.09 |
|
realistic_Medium,llama-3.1-8b-instruct,0,24,0.065 |
|
realistic_Medium,llama-3.1-8b-instruct,0,25,0.055 |
|
realistic_Medium,llama-3.1-8b-instruct,0,26,0.045 |
|
realistic_Medium,llama-3.1-8b-instruct,0,27,0.05 |
|
realistic_Medium,llama-3.1-8b-instruct,0,28,0.035 |
|
realistic_Medium,llama-3.1-8b-instruct,0,29,0.025 |
|
realistic_Medium,llama-3.1-8b-instruct,0,30,0.05 |
|
realistic_Medium,llama-3.1-8b-instruct,8000,2,0.24 |
|
realistic_Medium,llama-3.1-8b-instruct,8000,4,0.15 |
|
realistic_Medium,llama-3.1-8b-instruct,8000,6,0.235 |
|
realistic_Medium,llama-3.1-8b-instruct,8000,8,0.15 |
|
realistic_Medium,llama-3.1-8b-instruct,8000,10,0.04 |
|
realistic_Medium,llama-3.1-8b-instruct,8000,12,0.03 |
|
realistic_Medium,llama-3.1-8b-instruct,8000,14,0.05 |
|
realistic_Medium,llama-3.1-8b-instruct,8000,16,0.015 |
|
realistic_Medium,llama-3.1-8b-instruct,8000,18,0.03 |
|
realistic_Medium,llama-3.1-8b-instruct,8000,20,0.03 |
|
realistic_Medium,llama-3.1-8b-instruct,8000,22,0.005 |
|
realistic_Medium,llama-3.1-8b-instruct,8000,24,0.015 |
|
realistic_Medium,llama-3.1-8b-instruct,8000,26,0.03 |
|
realistic_Medium,llama-3.1-8b-instruct,8000,28,0.02 |
|
realistic_Medium,llama-3.1-8b-instruct,8000,30,0.01 |
|
realistic_Medium,llama-3.1-8b-instruct,16000,2,0.17 |
|
realistic_Medium,llama-3.1-8b-instruct,16000,4,0.175 |
|
realistic_Medium,llama-3.1-8b-instruct,16000,18,0.03 |
|
realistic_Medium,llama-3.1-8b-instruct,16000,20,0.01 |
|
realistic_Medium,llama-3.1-8b-instruct,16000,22,0.005 |
|
realistic_Medium,llama-3.1-8b-instruct,16000,24,0.02 |
|
realistic_Medium,llama-3.1-8b-instruct,16000,26,0.02 |
|
realistic_Medium,llama-3.1-8b-instruct,16000,28,0.01 |
|
realistic_Medium,llama-3.1-8b-instruct,16000,30,0.015 |
|
realistic_Medium,llama-3.1-8b-instruct,32000,2,0.135 |
|
realistic_Medium,llama-3.1-8b-instruct,32000,4,0.085 |
|
realistic_Medium,llama-3.1-8b-instruct,32000,6,0.14 |
|
realistic_Medium,llama-3.1-8b-instruct,32000,8,0.095 |
|
realistic_Medium,llama-3.1-8b-instruct,32000,10,0.055 |
|
realistic_Medium,llama-3.1-8b-instruct,32000,12,0.025 |
|
realistic_Medium,llama-3.1-8b-instruct,32000,14,0.02 |
|
realistic_Medium,llama-3.1-8b-instruct,32000,16,0.005 |
|
realistic_Medium,llama-3.1-8b-instruct,32000,18,0.015 |
|
realistic_Medium,llama-3.1-8b-instruct,32000,20,0.005 |
|
realistic_Medium,llama-3.1-8b-instruct,32000,22,0.025 |
|
realistic_Medium,llama-3.1-8b-instruct,32000,24,0.015 |
|
realistic_Medium,llama-3.1-8b-instruct,32000,26,0.01 |
|
realistic_Medium,llama-3.1-8b-instruct,32000,28,0.005 |
|
realistic_Medium,llama-3.1-8b-instruct,32000,30,0.03 |
|
realistic_Hard,claude-3.5-sonnet,0,2,0.9609 |
|
realistic_Hard,claude-3.5-sonnet,0,6,0.9375 |
|
realistic_Hard,claude-3.5-sonnet,0,10,0.9375 |
|
realistic_Hard,claude-3.5-sonnet,0,14,0.8047 |
|
realistic_Hard,claude-3.5-sonnet,0,18,0.6953 |
|
realistic_Hard,claude-3.5-sonnet,0,22,0.5156 |
|
realistic_Hard,claude-3.5-sonnet,0,26,0.2891 |
|
realistic_Hard,claude-3.5-sonnet,0,30,0.2656 |
|
realistic_Hard,claude-3.5-sonnet,0,35,0.134615 |
|
realistic_Hard,claude-3.5-sonnet,0,40,0.0673077 |
|
realistic_Hard,claude-3.5-sonnet,0,45,0.0384615 |
|
realistic_Hard,claude-3.5-sonnet,0,50,0.0480769 |
|
realistic_Medium,qwq-32b-preview,0,2,0.915 |
|
realistic_Medium,qwq-32b-preview,0,3,0.91 |
|
realistic_Medium,qwq-32b-preview,0,4,0.955 |
|
realistic_Medium,qwq-32b-preview,0,5,0.965 |
|
realistic_Medium,qwq-32b-preview,0,6,0.93 |
|
realistic_Medium,qwq-32b-preview,0,7,0.945 |
|
realistic_Medium,qwq-32b-preview,0,8,0.955 |
|
realistic_Medium,qwq-32b-preview,0,9,0.935 |
|
realistic_Medium,qwq-32b-preview,0,10,0.825 |
|
realistic_Medium,qwq-32b-preview,0,11,0.89 |
|
realistic_Medium,qwq-32b-preview,0,12,0.765 |
|
realistic_Medium,qwq-32b-preview,0,13,0.815 |
|
realistic_Medium,qwq-32b-preview,0,14,0.805 |
|
realistic_Medium,qwq-32b-preview,0,15,0.845 |
|
realistic_Medium,qwq-32b-preview,0,16,0.74 |
|
realistic_Medium,qwq-32b-preview,0,17,0.705 |
|
realistic_Medium,qwq-32b-preview,0,18,0.81 |
|
realistic_Medium,qwq-32b-preview,0,19,0.715 |
|
realistic_Medium,qwq-32b-preview,0,20,0.755 |
|
realistic_Medium,qwq-32b-preview,0,21,0.76 |
|
realistic_Medium,qwq-32b-preview,0,22,0.725 |
|
realistic_Medium,qwq-32b-preview,0,23,0.74 |
|
realistic_Medium,qwq-32b-preview,0,24,0.74 |
|
realistic_Medium,qwq-32b-preview,0,25,0.605 |
|
realistic_Medium,qwq-32b-preview,0,26,0.685 |
|
realistic_Medium,qwq-32b-preview,0,27,0.665 |
|
realistic_Medium,qwq-32b-preview,0,28,0.64 |
|
realistic_Medium,qwq-32b-preview,0,29,0.625 |
|
realistic_Medium,qwq-32b-preview,0,30,0.55 |
|
realistic_Medium,qwq-32b-preview,0,35,0.48125 |
|
realistic_Medium,qwq-32b-preview,0,40,0.35 |
|
realistic_Medium,qwq-32b-preview,0,45,0.23125 |
|
realistic_Medium,qwq-32b-preview,0,50,0.15 |
|
realistic_Medium,qwq-32b-preview,0,55,0.13125 |
|
realistic_Medium,qwq-32b-preview,0,60,0.1375 |
|
realistic_Medium,qwq-32b-preview,0,65,0.09375 |
|
realistic_Medium,qwq-32b-preview,0,70,0.01875 |
|
realistic_Medium,qwq-32b-preview,0,75,0.03125 |
|
realistic_Medium,qwq-32b-preview,0,80,0.0125 |
|
realistic_Medium,qwq-32b-preview,0,85,0.0625 |
|
realistic_Medium,qwq-32b-preview,0,90,0.0 |
|
symbolic,llama-3.1-8b-instruct,0,1,0.7 |
|
symbolic,llama-3.1-8b-instruct,0,2,0.71 |
|
symbolic,llama-3.1-8b-instruct,0,3,0.67 |
|
symbolic,llama-3.1-8b-instruct,0,4,0.55 |
|
symbolic,llama-3.1-8b-instruct,0,5,0.48 |
|
symbolic,llama-3.1-8b-instruct,0,6,0.39 |
|
symbolic,llama-3.1-8b-instruct,0,7,0.31 |
|
symbolic,llama-3.1-8b-instruct,0,8,0.23 |
|
symbolic,llama-3.1-8b-instruct,0,9,0.24 |
|
symbolic,llama-3.1-8b-instruct,0,10,0.11 |
|
symbolic,llama-3.1-8b-instruct,0,11,0.15 |
|
symbolic,llama-3.1-8b-instruct,0,12,0.1 |
|
symbolic,llama-3.1-8b-instruct,0,13,0.05 |
|
symbolic,llama-3.1-8b-instruct,0,14,0.01 |
|
symbolic,llama-3.1-8b-instruct,0,15,0.07 |
|
symbolic,llama-3.1-8b-instruct,0,16,0.06 |
|
symbolic,llama-3.1-8b-instruct,0,17,0.02 |
|
symbolic,llama-3.1-8b-instruct,0,18,0.03 |
|
symbolic,llama-3.1-8b-instruct,0,19,0.02 |
|
symbolic,llama-3.1-8b-instruct,0,20,0.01 |
|
symbolic,llama-3.1-8b-instruct,0,21,0.03 |
|
symbolic,llama-3.1-8b-instruct,0,22,0.02 |
|
symbolic,llama-3.1-8b-instruct,0,23,0.0 |
|
symbolic,llama-3.1-8b-instruct,0,24,0.0 |
|
symbolic,llama-3.1-8b-instruct,0,25,0.0 |
|
symbolic,llama-3.1-8b-instruct,0,26,0.0 |
|
symbolic,llama-3.1-8b-instruct,0,27,0.0 |
|
symbolic,llama-3.1-8b-instruct,0,28,0.01 |
|
symbolic,llama-3.1-8b-instruct,0,29,0.0 |
|
symbolic,llama-3.1-8b-instruct,8000,1,0.6 |
|
symbolic,llama-3.1-8b-instruct,8000,2,0.19 |
|
symbolic,llama-3.1-8b-instruct,8000,3,0.07 |
|
symbolic,llama-3.1-8b-instruct,8000,4,0.03 |
|
symbolic,llama-3.1-8b-instruct,16000,1,0.46 |
|
symbolic,llama-3.1-8b-instruct,16000,2,0.07 |
|
symbolic,llama-3.1-8b-instruct,16000,3,0.11 |
|
symbolic,llama-3.1-8b-instruct,16000,4,0.04 |
|
symbolic,llama-3.1-8b-instruct,16000,5,0.02 |
|
symbolic,llama-3.1-8b-instruct,32000,1,0.36 |
|
symbolic,llama-3.1-8b-instruct,32000,2,0.1 |
|
symbolic,llama-3.1-8b-instruct,32000,3,0.16 |
|
symbolic,llama-3.1-8b-instruct,32000,4,0.04 |
|
symbolic,llama-3.1-8b-instruct,32000,5,0.04 |
|
symbolic,llama-3.1-8b-instruct,32000,6,0.04 |
|
symbolic,qwq-32b-preview,0,1,0.89 |
|
symbolic,qwq-32b-preview,0,6,0.95 |
|
symbolic,qwq-32b-preview,0,11,0.96 |
|
symbolic,qwq-32b-preview,0,16,0.86 |
|
symbolic,qwq-32b-preview,0,21,0.8 |
|
symbolic,qwq-32b-preview,0,26,0.64 |
|
symbolic,qwq-32b-preview,0,31,0.42 |
|
symbolic,qwq-32b-preview,0,36,0.31 |
|
symbolic,qwq-32b-preview,0,41,0.27 |
|
symbolic,qwq-32b-preview,0,46,0.35 |
|
symbolic,qwq-32b-preview,0,51,0.15 |
|
symbolic,qwq-32b-preview,0,56,0.15 |
|
symbolic,qwq-32b-preview,0,61,0.08 |
|
symbolic,qwq-32b-preview,0,66,0.08 |
|
symbolic,qwq-32b-preview,0,71,0.11 |
|
symbolic,qwq-32b-preview,0,76,0.1 |
|
symbolic,qwq-32b-preview,0,81,0.09 |
|
symbolic,qwq-32b-preview,0,86,0.05 |
|
symbolic,qwq-32b-preview,0,91,0.01 |
|
symbolic,qwq-32b-preview,0,96,0.1 |
|
symbolic,qwq-32b-preview,0,101,0.04 |
|
symbolic,qwq-32b-preview,0,106,0.03 |
|
symbolic,qwq-32b-preview,0,111,0.05 |
|
symbolic,qwq-32b-preview,0,116,0.03 |
|
symbolic,qwq-32b-preview,8000,1,0.72 |
|
symbolic,qwq-32b-preview,8000,2,0.49 |
|
symbolic,qwq-32b-preview,8000,3,0.29 |
|
symbolic,qwq-32b-preview,8000,4,0.15 |
|
symbolic,qwq-32b-preview,8000,5,0.11 |
|
symbolic,qwq-32b-preview,8000,6,0.09 |
|
symbolic,qwq-32b-preview,8000,7,0.04 |
|
symbolic,qwq-32b-preview,8000,8,0.03 |
|
symbolic,qwq-32b-preview,8000,9,0.01 |
|
symbolic,qwq-32b-preview,16000,1,0.51 |
|
symbolic,qwq-32b-preview,16000,2,0.17 |
|
symbolic,qwq-32b-preview,16000,3,0.15 |
|
symbolic,qwq-32b-preview,16000,4,0.04 |
|
symbolic,qwq-32b-preview,16000,5,0.04 |
|
symbolic,qwq-32b-preview,16000,6,0.01 |
|
symbolic,qwq-32b-preview,16000,7,0.04 |
|
symbolic,qwq-32b-preview,16000,8,0.0 |
|
symbolic,qwq-32b-preview,16000,9,0.0 |
|
symbolic,qwen-2.5-72b-instruct,0,1,1.0 |
|
symbolic,qwen-2.5-72b-instruct,0,3,1.0 |
|
symbolic,qwen-2.5-72b-instruct,0,5,0.93 |
|
symbolic,qwen-2.5-72b-instruct,0,7,0.9 |
|
symbolic,qwen-2.5-72b-instruct,0,9,0.82 |
|
symbolic,qwen-2.5-72b-instruct,0,11,0.72 |
|
symbolic,qwen-2.5-72b-instruct,0,13,0.65 |
|
symbolic,qwen-2.5-72b-instruct,0,15,0.63 |
|
symbolic,qwen-2.5-72b-instruct,0,17,0.6 |
|
symbolic,qwen-2.5-72b-instruct,0,19,0.49 |
|
symbolic,qwen-2.5-72b-instruct,0,21,0.42 |
|
symbolic,qwen-2.5-72b-instruct,0,23,0.33 |
|
symbolic,qwen-2.5-72b-instruct,0,25,0.3 |
|
symbolic,qwen-2.5-72b-instruct,0,27,0.25 |
|
symbolic,qwen-2.5-72b-instruct,0,29,0.27 |
|
symbolic,qwen-2.5-72b-instruct,0,31,0.18 |
|
symbolic,qwen-2.5-72b-instruct,0,33,0.23 |
|
symbolic,qwen-2.5-72b-instruct,0,35,0.14 |
|
symbolic,qwen-2.5-72b-instruct,0,37,0.15 |
|
symbolic,qwen-2.5-72b-instruct,0,39,0.1 |
|
symbolic,qwen-2.5-72b-instruct,0,41,0.13 |
|
symbolic,qwen-2.5-72b-instruct,0,43,0.07 |
|
symbolic,qwen-2.5-72b-instruct,0,45,0.06 |
|
symbolic,qwen-2.5-72b-instruct,0,47,0.09 |
|
symbolic,qwen-2.5-72b-instruct,0,49,0.05 |
|
symbolic,qwen-2.5-72b-instruct,0,51,0.06 |
|
symbolic,qwen-2.5-72b-instruct,0,53,0.09 |
|
symbolic,qwen-2.5-72b-instruct,0,55,0.01 |
|
symbolic,qwen-2.5-72b-instruct,0,57,0.06 |
|
symbolic,qwen-2.5-72b-instruct,0,59,0.02 |
|
symbolic,qwen-2.5-72b-instruct,8000,1,0.77 |
|
symbolic,qwen-2.5-72b-instruct,8000,2,0.46 |
|
symbolic,qwen-2.5-72b-instruct,8000,3,0.17 |
|
symbolic,qwen-2.5-72b-instruct,8000,4,0.08 |
|
symbolic,qwen-2.5-72b-instruct,8000,5,0.07 |
|
symbolic,qwen-2.5-72b-instruct,8000,6,0.01 |
|
symbolic,qwen-2.5-72b-instruct,8000,7,0.01 |
|
symbolic,qwen-2.5-72b-instruct,8000,8,0.01 |
|
symbolic,qwen-2.5-72b-instruct,8000,9,0.0 |
|
symbolic,qwen-2.5-72b-instruct,16000,1,0.71 |
|
symbolic,qwen-2.5-72b-instruct,16000,2,0.25 |
|
symbolic,qwen-2.5-72b-instruct,16000,3,0.12 |
|
symbolic,qwen-2.5-72b-instruct,16000,4,0.1 |
|
symbolic,qwen-2.5-72b-instruct,16000,5,0.06 |
|
symbolic,qwen-2.5-72b-instruct,16000,6,0.01 |
|
symbolic,qwen-2.5-72b-instruct,16000,7,0.02 |
|
symbolic,qwen-2.5-72b-instruct,16000,8,0.0 |
|
symbolic,qwen-2.5-72b-instruct,16000,9,0.0 |
|
symbolic,qwen-2.5-72b-instruct,32000,1,0.34 |
|
symbolic,qwen-2.5-72b-instruct,32000,2,0.06 |
|
symbolic,qwen-2.5-72b-instruct,32000,3,0.04 |
|
symbolic,qwen-2.5-72b-instruct,32000,4,0.04 |
|
realistic_Hard,deepseek-v3,0,2,0.88 |
|
realistic_Hard,deepseek-v3,0,3,0.915 |
|
realistic_Hard,deepseek-v3,0,4,0.905 |
|
realistic_Hard,deepseek-v3,0,5,0.9 |
|
realistic_Hard,deepseek-v3,0,6,0.905 |
|
realistic_Hard,deepseek-v3,0,7,0.815 |
|
realistic_Hard,deepseek-v3,0,8,0.865 |
|
realistic_Hard,deepseek-v3,0,9,0.885 |
|
realistic_Hard,deepseek-v3,0,10,0.785 |
|
realistic_Hard,deepseek-v3,0,11,0.7487 |
|
realistic_Hard,deepseek-v3,0,12,0.7538 |
|
realistic_Hard,deepseek-v3,0,13,0.69 |
|
realistic_Hard,deepseek-v3,0,14,0.755 |
|
realistic_Hard,deepseek-v3,0,15,0.725 |
|
realistic_Hard,deepseek-v3,0,16,0.725 |
|
realistic_Hard,deepseek-v3,0,17,0.695 |
|
realistic_Hard,deepseek-v3,0,18,0.69 |
|
realistic_Hard,deepseek-v3,0,19,0.63 |
|
realistic_Hard,deepseek-v3,0,20,0.6 |
|
realistic_Hard,deepseek-v3,0,21,0.54 |
|
realistic_Hard,deepseek-v3,0,22,0.485 |
|
realistic_Hard,deepseek-v3,0,23,0.53 |
|
realistic_Hard,deepseek-v3,0,24,0.4673 |
|
realistic_Hard,deepseek-v3,0,25,0.47 |
|
realistic_Hard,deepseek-v3,0,26,0.4 |
|
realistic_Hard,deepseek-v3,0,27,0.4 |
|
realistic_Hard,deepseek-v3,0,28,0.395 |
|
realistic_Hard,deepseek-v3,0,29,0.43 |
|
realistic_Hard,deepseek-v3,0,30,0.355 |
|
realistic_Hard,deepseek-v3,0,35,0.30625 |
|
realistic_Hard,deepseek-v3,0,40,0.1625 |
|
realistic_Hard,deepseek-v3,0,45,0.1875 |
|
realistic_Hard,deepseek-v3,0,50,0.1375 |
|
realistic_Hard,deepseek-v3,0,55,0.08125 |
|
realistic_Hard,deepseek-v3,0,60,0.0375 |
|
realistic_Hard,deepseek-v3,8000,2,0.8719 |
|
realistic_Hard,deepseek-v3,8000,4,0.84 |
|
realistic_Hard,deepseek-v3,8000,6,0.72 |
|
realistic_Hard,deepseek-v3,8000,8,0.56 |
|
realistic_Hard,deepseek-v3,8000,10,0.5 |
|
realistic_Hard,deepseek-v3,8000,12,0.48 |
|
realistic_Hard,deepseek-v3,8000,14,0.425 |
|
realistic_Hard,deepseek-v3,8000,16,0.39 |
|
realistic_Hard,deepseek-v3,8000,18,0.31 |
|
realistic_Hard,deepseek-v3,8000,20,0.3668 |
|
realistic_Hard,deepseek-v3,8000,22,0.305 |
|
realistic_Hard,deepseek-v3,8000,24,0.325 |
|
realistic_Hard,deepseek-v3,8000,26,0.365 |
|
realistic_Hard,deepseek-v3,8000,28,0.34 |
|
realistic_Hard,deepseek-v3,8000,30,0.395 |
|
realistic_Hard,deepseek-v3,16000,2,0.745 |
|
realistic_Hard,deepseek-v3,16000,4,0.72 |
|
realistic_Hard,deepseek-v3,16000,6,0.495 |
|
realistic_Hard,deepseek-v3,16000,8,0.4 |
|
realistic_Hard,deepseek-v3,16000,10,0.23 |
|
realistic_Hard,deepseek-v3,16000,12,0.17 |
|
realistic_Hard,deepseek-v3,16000,14,0.11 |
|
realistic_Hard,deepseek-v3,16000,16,0.075 |
|
realistic_Hard,deepseek-v3,16000,18,0.07 |
|
realistic_Hard,deepseek-v3,16000,20,0.055 |
|
realistic_Hard,deepseek-v3,16000,22,0.035 |
|
realistic_Hard,deepseek-v3,16000,24,0.0503 |
|
realistic_Hard,deepseek-v3,16000,26,0.025 |
|
realistic_Hard,deepseek-v3,16000,28,0.02 |
|
realistic_Hard,deepseek-v3,16000,30,0.055 |
|
realistic_Hard,deepseek-v3,32000,2,0.6475 |
|
realistic_Hard,deepseek-v3,32000,4,0.565 |
|
realistic_Hard,deepseek-v3,32000,6,0.3869 |
|
realistic_Hard,deepseek-v3,32000,8,0.19 |
|
realistic_Hard,deepseek-v3,32000,10,0.0761421 |
|
realistic_Hard,deepseek-v3,32000,12,0.0654762 |
|
realistic_Hard,deepseek-v3,32000,14,0.0494505 |
|
symbolic,gpt-4o-2024-11-20,0,1,0.97 |
|
symbolic,gpt-4o-2024-11-20,0,4,0.99 |
|
symbolic,gpt-4o-2024-11-20,0,7,0.96 |
|
symbolic,gpt-4o-2024-11-20,0,10,0.97 |
|
symbolic,gpt-4o-2024-11-20,0,13,0.76 |
|
symbolic,gpt-4o-2024-11-20,0,16,0.56 |
|
symbolic,gpt-4o-2024-11-20,0,19,0.63 |
|
symbolic,gpt-4o-2024-11-20,0,22,0.54 |
|
symbolic,gpt-4o-2024-11-20,0,25,0.45 |
|
symbolic,gpt-4o-2024-11-20,0,28,0.39 |
|
symbolic,gpt-4o-2024-11-20,0,31,0.26 |
|
symbolic,gpt-4o-2024-11-20,0,34,0.2 |
|
symbolic,gpt-4o-2024-11-20,0,37,0.22 |
|
symbolic,gpt-4o-2024-11-20,0,40,0.11 |
|
symbolic,gpt-4o-2024-11-20,0,43,0.11 |
|
symbolic,gpt-4o-2024-11-20,0,46,0.06 |
|
symbolic,gpt-4o-2024-11-20,0,49,0.06 |
|
symbolic,gpt-4o-2024-11-20,0,52,0.05 |
|
symbolic,gpt-4o-2024-11-20,0,55,0.04 |
|
symbolic,gpt-4o-2024-11-20,0,58,0.02 |
|
symbolic,gpt-4o-2024-11-20,0,61,0.01 |
|
symbolic,gpt-4o-2024-11-20,0,64,0.02 |
|
symbolic,gpt-4o-2024-11-20,0,67,0.0 |
|
symbolic,gpt-4o-2024-11-20,0,70,0.02 |
|
symbolic,gpt-4o-2024-11-20,0,73,0.01 |
|
symbolic,gpt-4o-2024-11-20,0,76,0.0 |
|
symbolic,gpt-4o-2024-11-20,0,79,0.0 |
|
symbolic,gpt-4o-2024-11-20,0,82,0.0 |
|
symbolic,gpt-4o-2024-11-20,0,85,0.0 |
|
symbolic,gpt-4o-2024-11-20,0,88,0.01 |
|
realistic_Hard,gpt-4o-2024-11-20,0,2,0.8516 |
|
realistic_Hard,gpt-4o-2024-11-20,0,6,0.8984 |
|
realistic_Hard,gpt-4o-2024-11-20,0,10,0.6172 |
|
realistic_Hard,gpt-4o-2024-11-20,0,14,0.6172 |
|
realistic_Hard,gpt-4o-2024-11-20,0,14,0.5859 |
|
realistic_Hard,gpt-4o-2024-11-20,0,18,0.5859 |
|
realistic_Hard,gpt-4o-2024-11-20,0,18,0.4609 |
|
realistic_Hard,gpt-4o-2024-11-20,0,22,0.4609 |
|
realistic_Hard,gpt-4o-2024-11-20,0,22,0.2422 |
|
realistic_Hard,gpt-4o-2024-11-20,0,26,0.1719 |
|
realistic_Hard,gpt-4o-2024-11-20,0,30,0.0781 |
|
symbolic,mistral-large-2411,0,1,0.98 |
|
symbolic,mistral-large-2411,0,4,0.94 |
|
symbolic,mistral-large-2411,0,7,0.89 |
|
symbolic,mistral-large-2411,0,10,0.96 |
|
symbolic,mistral-large-2411,0,13,0.91 |
|
symbolic,mistral-large-2411,0,16,0.73 |
|
symbolic,mistral-large-2411,0,19,0.77 |
|
symbolic,mistral-large-2411,0,22,0.53 |
|
symbolic,mistral-large-2411,0,25,0.44 |
|
symbolic,mistral-large-2411,0,28,0.29 |
|
symbolic,mistral-large-2411,0,31,0.23 |
|
symbolic,mistral-large-2411,0,34,0.17 |
|
symbolic,mistral-large-2411,0,37,0.09 |
|
symbolic,mistral-large-2411,0,40,0.1 |
|
symbolic,mistral-large-2411,0,43,0.06 |
|
symbolic,mistral-large-2411,0,46,0.07 |
|
symbolic,mistral-large-2411,0,49,0.02 |
|
symbolic,mistral-large-2411,0,52,0.04 |
|
symbolic,mistral-large-2411,0,55,0.03 |
|
symbolic,mistral-large-2411,0,58,0.03 |
|
symbolic,mistral-large-2411,8000,1,0.57 |
|
symbolic,mistral-large-2411,8000,2,0.26 |
|
symbolic,mistral-large-2411,8000,3,0.12 |
|
symbolic,mistral-large-2411,8000,4,0.04 |
|
symbolic,mistral-large-2411,8000,5,0.02 |
|
symbolic,mistral-large-2411,8000,6,0.0 |
|
symbolic,mistral-large-2411,8000,7,0.01 |
|
symbolic,mistral-large-2411,8000,8,0.01 |
|
symbolic,mistral-large-2411,8000,9,0.0 |
|
symbolic,mistral-large-2411,16000,1,0.2 |
|
symbolic,mistral-large-2411,16000,2,0.08 |
|
symbolic,mistral-large-2411,16000,3,0.06 |
|
symbolic,mistral-large-2411,16000,4,0.01 |
|
symbolic,mistral-large-2411,16000,5,0.01 |
|
symbolic,mistral-large-2411,32000,1,0.01 |
|
symbolic,mistral-large-2411,32000,2,0.0 |
|
symbolic,mistral-large-2411,32000,3,0.01 |
|
symbolic,mistral-large-2411,32000,4,0.0 |
|
realistic_Hard,gpt-4o-mini,0,2,0.6025 |
|
realistic_Hard,gpt-4o-mini,0,4,0.7944 |
|
realistic_Hard,gpt-4o-mini,0,6,0.7944 |
|
realistic_Hard,gpt-4o-mini,0,8,0.6653 |
|
realistic_Hard,gpt-4o-mini,0,10,0.504 |
|
realistic_Hard,gpt-4o-mini,0,12,0.4274 |
|
realistic_Hard,gpt-4o-mini,0,14,0.3105 |
|
realistic_Hard,gpt-4o-mini,0,14,0.3105 |
|
realistic_Hard,gpt-4o-mini,0,16,0.1976 |
|
realistic_Hard,gpt-4o-mini,0,18,0.1976 |
|
realistic_Hard,gpt-4o-mini,0,20,0.1008 |
|
realistic_Hard,gpt-4o-mini,0,22,0.0968 |
|
realistic_Hard,gpt-4o-mini,0,24,0.0484 |
|
realistic_Hard,gpt-4o-mini,0,26,0.0403 |
|
realistic_Hard,gpt-4o-mini,0,26,0.0524 |
|
realistic_Hard,gpt-4o-mini,0,28,0.0524 |
|
realistic_Hard,gpt-4o-mini,0,30,0.0645 |
|
realistic_Hard,gpt-4o-mini,8000,2,0.5938 |
|
realistic_Hard,gpt-4o-mini,8000,4,0.6484 |
|
realistic_Hard,gpt-4o-mini,8000,6,0.4844 |
|
realistic_Hard,gpt-4o-mini,8000,8,0.2812 |
|
realistic_Hard,gpt-4o-mini,8000,10,0.1719 |
|
realistic_Hard,gpt-4o-mini,8000,12,0.1562 |
|
realistic_Hard,gpt-4o-mini,8000,14,0.1016 |
|
realistic_Hard,gpt-4o-mini,8000,16,0.0625 |
|
realistic_Hard,gpt-4o-mini,8000,18,0.0781 |
|
realistic_Hard,gpt-4o-mini,8000,20,0.1016 |
|
realistic_Hard,gpt-4o-mini,16000,2,0.5391 |
|
realistic_Hard,gpt-4o-mini,16000,4,0.5781 |
|
realistic_Hard,gpt-4o-mini,16000,6,0.3984 |
|
realistic_Hard,gpt-4o-mini,16000,8,0.1797 |
|
realistic_Hard,gpt-4o-mini,16000,10,0.1016 |
|
realistic_Hard,gpt-4o-mini,16000,12,0.0625 |
|
realistic_Hard,gpt-4o-mini,16000,14,0.0625 |
|
realistic_Hard,gpt-4o-mini,16000,16,0.0391 |
|
realistic_Hard,gpt-4o-mini,16000,18,0.0781 |
|
realistic_Hard,gpt-4o-mini,16000,20,0.0938 |
|
realistic_Hard,gpt-4o-mini,32000,2,0.4766 |
|
realistic_Hard,gpt-4o-mini,32000,4,0.4531 |
|
realistic_Hard,gpt-4o-mini,32000,6,0.3281 |
|
realistic_Hard,gpt-4o-mini,32000,8,0.1562 |
|
realistic_Hard,gpt-4o-mini,32000,10,0.0703 |
|
realistic_Hard,gpt-4o-mini,32000,12,0.0859 |
|
realistic_Hard,gpt-4o-mini,32000,14,0.1094 |
|
realistic_Hard,gpt-4o-mini,32000,16,0.0391 |
|
realistic_Hard,gpt-4o-mini,32000,18,0.0547 |
|
realistic_Hard,gpt-4o-mini,32000,20,0.0469 |
|
realistic_Medium,deepseek-r1,0,10,0.7891 |
|
realistic_Medium,deepseek-r1,0,20,0.875 |
|
realistic_Medium,deepseek-r1,0,30,0.9531 |
|
realistic_Medium,deepseek-r1,0,40,0.8942 |
|
realistic_Medium,deepseek-r1,0,50,0.9135 |
|
realistic_Medium,deepseek-r1,0,60,0.9038 |
|
realistic_Medium,deepseek-r1,0,70,0.8462 |
|
realistic_Medium,deepseek-r1,0,80,0.8077 |
|
realistic_Medium,deepseek-r1,0,90,0.7308 |
|
realistic_Medium,deepseek-r1,0,100,0.5769 |
|
realistic_Medium,deepseek-r1,0,110,0.5769 |
|
realistic_Medium,deepseek-r1,0,120,0.5096 |
|
realistic_Medium,deepseek-r1,0,130,0.3654 |
|
realistic_Medium,deepseek-r1,0,140,0.2703 |
|
realistic_Medium,deepseek-r1,0,150,0.2658 |
|
realistic_Medium,claude-3.5-sonnet,0,2,0.9531 |
|
realistic_Medium,claude-3.5-sonnet,0,6,0.9844 |
|
realistic_Medium,claude-3.5-sonnet,0,10,0.8281 |
|
realistic_Medium,claude-3.5-sonnet,0,14,0.8281 |
|
realistic_Medium,claude-3.5-sonnet,0,18,0.8672 |
|
realistic_Medium,claude-3.5-sonnet,0,22,0.8359 |
|
realistic_Medium,claude-3.5-sonnet,0,26,0.6953 |
|
realistic_Medium,claude-3.5-sonnet,0,30,0.5547 |
|
realistic_Medium,claude-3.5-sonnet,0,35,0.5 |
|
realistic_Medium,claude-3.5-sonnet,0,40,0.384615 |
|
realistic_Medium,claude-3.5-sonnet,0,45,0.326923 |
|
realistic_Medium,claude-3.5-sonnet,0,50,0.0125 |
|
realistic_Medium,claude-3.5-sonnet,0,55,0.240385 |
|
realistic_Medium,claude-3.5-sonnet,0,60,0.0673077 |
|
realistic_Medium,claude-3.5-sonnet,0,65,0.0961538 |
|
realistic_Medium,claude-3.5-sonnet,0,70,0.0480769 |
|
realistic_Medium,qwen-2.5-7b-instruct,0,2,0.685 |
|
realistic_Medium,qwen-2.5-7b-instruct,0,3,0.67 |
|
realistic_Medium,qwen-2.5-7b-instruct,0,4,0.82 |
|
realistic_Medium,qwen-2.5-7b-instruct,0,5,0.82 |
|
realistic_Medium,qwen-2.5-7b-instruct,0,6,0.775 |
|
realistic_Medium,qwen-2.5-7b-instruct,0,7,0.705 |
|
realistic_Medium,qwen-2.5-7b-instruct,0,8,0.655 |
|
realistic_Medium,qwen-2.5-7b-instruct,0,9,0.605 |
|
realistic_Medium,qwen-2.5-7b-instruct,0,10,0.465 |
|
realistic_Medium,qwen-2.5-7b-instruct,0,11,0.43 |
|
realistic_Medium,qwen-2.5-7b-instruct,0,12,0.355 |
|
realistic_Medium,qwen-2.5-7b-instruct,0,13,0.35 |
|
realistic_Medium,qwen-2.5-7b-instruct,0,14,0.295 |
|
realistic_Medium,qwen-2.5-7b-instruct,0,15,0.235 |
|
realistic_Medium,qwen-2.5-7b-instruct,0,16,0.18 |
|
realistic_Medium,qwen-2.5-7b-instruct,0,17,0.17 |
|
realistic_Medium,qwen-2.5-7b-instruct,0,18,0.15 |
|
realistic_Medium,qwen-2.5-7b-instruct,0,19,0.155 |
|
realistic_Medium,qwen-2.5-7b-instruct,0,20,0.155 |
|
realistic_Medium,qwen-2.5-7b-instruct,0,21,0.075 |
|
realistic_Medium,qwen-2.5-7b-instruct,0,22,0.055 |
|
realistic_Medium,qwen-2.5-7b-instruct,0,23,0.085 |
|
realistic_Medium,qwen-2.5-7b-instruct,0,24,0.075 |
|
realistic_Medium,qwen-2.5-7b-instruct,0,25,0.06 |
|
realistic_Medium,qwen-2.5-7b-instruct,0,26,0.015 |
|
realistic_Medium,qwen-2.5-7b-instruct,0,27,0.035 |
|
realistic_Medium,qwen-2.5-7b-instruct,0,28,0.065 |
|
realistic_Medium,qwen-2.5-7b-instruct,0,29,0.055 |
|
realistic_Medium,qwen-2.5-7b-instruct,0,30,0.03 |
|
realistic_Medium,qwen-2.5-7b-instruct,8000,2,0.32 |
|
realistic_Medium,qwen-2.5-7b-instruct,8000,4,0.435 |
|
realistic_Medium,qwen-2.5-7b-instruct,8000,6,0.295 |
|
realistic_Medium,qwen-2.5-7b-instruct,8000,8,0.23 |
|
realistic_Medium,qwen-2.5-7b-instruct,8000,10,0.16 |
|
realistic_Medium,qwen-2.5-7b-instruct,8000,12,0.085 |
|
realistic_Medium,qwen-2.5-7b-instruct,8000,14,0.085 |
|
realistic_Medium,qwen-2.5-7b-instruct,8000,16,0.055 |
|
realistic_Medium,qwen-2.5-7b-instruct,8000,18,0.03 |
|
realistic_Medium,qwen-2.5-7b-instruct,8000,20,0.03 |
|
realistic_Medium,qwen-2.5-7b-instruct,8000,22,0.045 |
|
realistic_Medium,qwen-2.5-7b-instruct,8000,24,0.03 |
|
realistic_Medium,qwen-2.5-7b-instruct,8000,26,0.04 |
|
realistic_Medium,qwen-2.5-7b-instruct,8000,28,0.04 |
|
realistic_Medium,qwen-2.5-7b-instruct,8000,30,0.035 |
|
realistic_Medium,qwen-2.5-7b-instruct,16000,2,0.295 |
|
realistic_Medium,qwen-2.5-7b-instruct,16000,4,0.36 |
|
realistic_Medium,qwen-2.5-7b-instruct,16000,6,0.355 |
|
realistic_Medium,qwen-2.5-7b-instruct,16000,8,0.16 |
|
realistic_Medium,qwen-2.5-7b-instruct,16000,10,0.085 |
|
realistic_Medium,qwen-2.5-7b-instruct,16000,12,0.095 |
|
realistic_Medium,qwen-2.5-7b-instruct,16000,14,0.03 |
|
realistic_Medium,qwen-2.5-7b-instruct,16000,16,0.035 |
|
realistic_Medium,qwen-2.5-7b-instruct,16000,18,0.04 |
|
realistic_Medium,qwen-2.5-7b-instruct,16000,20,0.045 |
|
realistic_Medium,qwen-2.5-7b-instruct,16000,22,0.03 |
|
realistic_Medium,qwen-2.5-7b-instruct,16000,24,0.055 |
|
realistic_Medium,qwen-2.5-7b-instruct,16000,26,0.05 |
|
realistic_Medium,qwen-2.5-7b-instruct,16000,28,0.07 |
|
realistic_Medium,qwen-2.5-7b-instruct,16000,30,0.025 |
|
realistic_Medium,qwen-2.5-7b-instruct,32000,2,0.235 |
|
realistic_Medium,qwen-2.5-7b-instruct,32000,4,0.42 |
|
realistic_Medium,qwen-2.5-7b-instruct,32000,6,0.335 |
|
realistic_Medium,qwen-2.5-7b-instruct,32000,8,0.165 |
|
realistic_Medium,qwen-2.5-7b-instruct,32000,10,0.085 |
|
realistic_Medium,qwen-2.5-7b-instruct,32000,12,0.06 |
|
realistic_Medium,qwen-2.5-7b-instruct,32000,14,0.06 |
|
realistic_Medium,qwen-2.5-7b-instruct,32000,16,0.025 |
|
realistic_Medium,qwen-2.5-7b-instruct,32000,18,0.05 |
|
realistic_Medium,qwen-2.5-7b-instruct,32000,20,0.035 |
|
realistic_Medium,qwen-2.5-7b-instruct,32000,22,0.03 |
|
realistic_Medium,qwen-2.5-7b-instruct,32000,24,0.025 |
|
realistic_Medium,qwen-2.5-7b-instruct,32000,26,0.055 |
|
realistic_Medium,qwen-2.5-7b-instruct,32000,28,0.025 |
|
realistic_Medium,qwen-2.5-7b-instruct,32000,30,0.045 |
|
realistic_Hard,o1-mini,0,10,0.9615385 |
|
realistic_Hard,o1-mini,0,20,0.8653845 |
|
realistic_Hard,o1-mini,0,30,0.692308 |
|
realistic_Hard,o1-mini,0,40,0.548077 |
|
realistic_Hard,o1-mini,0,50,0.49035 |
|
realistic_Hard,o1-mini,0,60,0.278847 |
|
realistic_Hard,o1-mini,0,70,0.15098035 |
|
realistic_Hard,o1-mini,0,80,0.182692 |
|
realistic_Hard,o1-mini,0,90,0.098039 |
|
realistic_Medium,gemini-1.5-pro-002,0,2,0.8359 |
|
realistic_Medium,gemini-1.5-pro-002,0,6,0.9766 |
|
realistic_Medium,gemini-1.5-pro-002,0,10,0.7656 |
|
realistic_Medium,gemini-1.5-pro-002,0,14,0.75 |
|
realistic_Medium,gemini-1.5-pro-002,0,18,0.6875 |
|
realistic_Medium,gemini-1.5-pro-002,0,22,0.6406 |
|
realistic_Medium,gemini-1.5-pro-002,0,26,0.6172 |
|
realistic_Medium,gemini-1.5-pro-002,0,30,0.5312 |
|
realistic_Medium,gemini-1.5-pro-002,0,35,0.528846 |
|
realistic_Medium,gemini-1.5-pro-002,0,40,0.375 |
|
realistic_Medium,gemini-1.5-pro-002,0,45,0.307692 |
|
realistic_Medium,gemini-1.5-pro-002,0,50,0.423077 |
|
realistic_Medium,gemini-1.5-pro-002,0,55,0.317318 |
|
realistic_Medium,gemini-1.5-pro-002,0,60,0.221154 |
|
realistic_Medium,gemini-1.5-pro-002,0,65,0.240385 |
|
realistic_Medium,gemini-1.5-pro-002,0,70,0.125 |
|
realistic_Medium,gemini-1.5-pro-002,0,75,0.0865385 |
|
realistic_Medium,gemini-1.5-pro-002,0,80,0.115385 |
|
realistic_Medium,gemini-1.5-pro-002,0,85,0.125 |
|
realistic_Medium,gemini-1.5-pro-002,0,90,0.0576923 |
|
realistic_Medium,gemini-1.5-pro-002,0,95,0.0192308 |
|
realistic_Medium,gemini-1.5-pro-002,0,100,0.0288462 |
|
realistic_Medium,gemini-1.5-pro-002,8000,2,0.8516 |
|
realistic_Medium,gemini-1.5-pro-002,8000,6,0.8828 |
|
realistic_Medium,gemini-1.5-pro-002,8000,10,0.5312 |
|
realistic_Medium,gemini-1.5-pro-002,8000,14,0.4922 |
|
realistic_Medium,gemini-1.5-pro-002,8000,18,0.3828 |
|
realistic_Medium,gemini-1.5-pro-002,8000,22,0.2969 |
|
realistic_Medium,gemini-1.5-pro-002,8000,26,0.2969 |
|
realistic_Medium,gemini-1.5-pro-002,8000,30,0.2656 |
|
realistic_Medium,gemini-1.5-pro-002,16000,2,0.7969 |
|
realistic_Medium,gemini-1.5-pro-002,16000,6,0.8828 |
|
realistic_Medium,gemini-1.5-pro-002,16000,2,0.7422 |
|
realistic_Medium,gemini-1.5-pro-002,16000,6,0.8281 |
|
realistic_Medium,gemini-1.5-pro-002,16000,10,0.6172 |
|
realistic_Medium,gemini-1.5-pro-002,16000,14,0.4297 |
|
realistic_Medium,gemini-1.5-pro-002,16000,18,0.3281 |
|
realistic_Medium,gemini-1.5-pro-002,16000,22,0.1811 |
|
realistic_Medium,gemini-1.5-pro-002,16000,26,0.0781 |
|
realistic_Medium,gemini-1.5-pro-002,16000,30,0.1016 |
|
realistic_Medium,gemini-1.5-pro-002,32000,2,0.7734 |
|
realistic_Medium,gemini-1.5-pro-002,32000,6,0.7734 |
|
realistic_Medium,gemini-1.5-pro-002,32000,10,0.5547 |
|
realistic_Medium,gemini-1.5-pro-002,32000,14,0.3594 |
|
realistic_Medium,gemini-1.5-pro-002,32000,18,0.2656 |
|
realistic_Medium,gemini-1.5-pro-002,32000,22,0.2109 |
|
realistic_Medium,gemini-1.5-pro-002,32000,26,0.0703 |
|
realistic_Medium,gemini-1.5-pro-002,32000,30,0.0859 |
|
realistic_Medium,o1-mini,0,10,0.8653845 |
|
realistic_Medium,o1-mini,0,20,0.86 |
|
realistic_Medium,o1-mini,0,30,0.8942305 |
|
realistic_Medium,o1-mini,0,40,0.8653845 |
|
realistic_Medium,o1-mini,0,50,0.846154 |
|
realistic_Medium,o1-mini,0,60,0.701923 |
|
realistic_Medium,o1-mini,0,70,0.435294 |
|
realistic_Medium,o1-mini,0,80,0.4423075 |
|
realistic_Medium,o1-mini,0,90,0.221154 |
|
realistic_Medium,o1-mini,0,100,0.182692 |
|
realistic_Medium,o1-mini,0,110,0.12500005 |
|
realistic_Medium,o1-mini,0,120,0.09615405 |
|
realistic_Medium,llama-3.1-405b-instruct,0,2,0.7958 |
|
realistic_Medium,llama-3.1-405b-instruct,0,3,0.78 |
|
realistic_Medium,llama-3.1-405b-instruct,0,4,0.96 |
|
realistic_Medium,llama-3.1-405b-instruct,0,5,0.91 |
|
realistic_Medium,llama-3.1-405b-instruct,0,6,0.935 |
|
realistic_Medium,llama-3.1-405b-instruct,0,7,0.905 |
|
realistic_Medium,llama-3.1-405b-instruct,0,8,0.83 |
|
realistic_Medium,llama-3.1-405b-instruct,0,9,0.855 |
|
realistic_Medium,llama-3.1-405b-instruct,0,10,0.71 |
|
realistic_Medium,llama-3.1-405b-instruct,0,11,0.755 |
|
realistic_Medium,llama-3.1-405b-instruct,0,12,0.65 |
|
realistic_Medium,llama-3.1-405b-instruct,0,13,0.565 |
|
realistic_Medium,llama-3.1-405b-instruct,0,14,0.59 |
|
realistic_Medium,llama-3.1-405b-instruct,0,15,0.565 |
|
realistic_Medium,llama-3.1-405b-instruct,0,16,0.415 |
|
realistic_Medium,llama-3.1-405b-instruct,0,17,0.45 |
|
realistic_Medium,llama-3.1-405b-instruct,0,18,0.375 |
|
realistic_Medium,llama-3.1-405b-instruct,0,19,0.365 |
|
realistic_Medium,llama-3.1-405b-instruct,0,20,0.33 |
|
realistic_Medium,llama-3.1-405b-instruct,0,21,0.25 |
|
realistic_Medium,llama-3.1-405b-instruct,0,22,0.2 |
|
realistic_Medium,llama-3.1-405b-instruct,0,23,0.125 |
|
realistic_Medium,llama-3.1-405b-instruct,0,24,0.125 |
|
realistic_Medium,llama-3.1-405b-instruct,0,25,0.08 |
|
realistic_Medium,llama-3.1-405b-instruct,0,26,0.05 |
|
realistic_Medium,llama-3.1-405b-instruct,0,27,0.03 |
|
realistic_Medium,llama-3.1-405b-instruct,0,28,0.01 |
|
realistic_Medium,llama-3.1-405b-instruct,0,29,0.0 |
|
realistic_Medium,llama-3.1-405b-instruct,0,30,0.005 |
|
realistic_Hard,jamba-1.5-large,0,2,0.7266 |
|
realistic_Hard,jamba-1.5-large,0,4,0.5078 |
|
realistic_Hard,jamba-1.5-large,0,6,0.2812 |
|
realistic_Hard,jamba-1.5-large,0,8,0.1094 |
|
realistic_Hard,jamba-1.5-large,0,10,0.1484 |
|
realistic_Hard,jamba-1.5-large,0,12,0.1641 |
|
realistic_Hard,jamba-1.5-large,0,14,0.0781 |
|
realistic_Hard,jamba-1.5-large,0,16,0.0703 |
|
realistic_Hard,jamba-1.5-large,0,18,0.0625 |
|
realistic_Hard,jamba-1.5-large,0,20,0.125 |
|
realistic_Hard,jamba-1.5-large,0,22,0.1094 |
|
realistic_Hard,jamba-1.5-large,0,24,0.1016 |
|
realistic_Hard,jamba-1.5-large,0,26,0.0781 |
|
realistic_Hard,jamba-1.5-large,0,28,0.0859 |
|
realistic_Hard,jamba-1.5-large,0,30,0.0938 |
|
realistic_Hard,gemini-1.5-flash-002,0,2,0.765 |
|
realistic_Hard,gemini-1.5-flash-002,0,3,0.84 |
|
realistic_Hard,gemini-1.5-flash-002,0,4,0.77 |
|
realistic_Hard,gemini-1.5-flash-002,0,5,0.765 |
|
realistic_Hard,gemini-1.5-flash-002,0,6,0.765 |
|
realistic_Hard,gemini-1.5-flash-002,0,7,0.76 |
|
realistic_Hard,gemini-1.5-flash-002,0,8,0.725 |
|
realistic_Hard,gemini-1.5-flash-002,0,9,0.64 |
|
realistic_Hard,gemini-1.5-flash-002,0,10,0.63 |
|
realistic_Hard,gemini-1.5-flash-002,0,11,0.575 |
|
realistic_Hard,gemini-1.5-flash-002,0,12,0.52 |
|
realistic_Hard,gemini-1.5-flash-002,0,13,0.45 |
|
realistic_Hard,gemini-1.5-flash-002,0,14,0.77 |
|
realistic_Hard,gemini-1.5-flash-002,0,15,0.46 |
|
realistic_Hard,gemini-1.5-flash-002,0,16,0.455 |
|
realistic_Hard,gemini-1.5-flash-002,0,17,0.38 |
|
realistic_Hard,gemini-1.5-flash-002,0,18,0.36 |
|
realistic_Hard,gemini-1.5-flash-002,0,19,0.415 |
|
realistic_Hard,gemini-1.5-flash-002,0,20,0.33 |
|
realistic_Hard,gemini-1.5-flash-002,0,21,0.24 |
|
realistic_Hard,gemini-1.5-flash-002,0,22,0.19 |
|
realistic_Hard,gemini-1.5-flash-002,0,23,0.22 |
|
realistic_Hard,gemini-1.5-flash-002,0,24,0.17 |
|
realistic_Hard,gemini-1.5-flash-002,0,25,0.165 |
|
realistic_Hard,gemini-1.5-flash-002,0,26,0.125 |
|
realistic_Hard,gemini-1.5-flash-002,0,27,0.11 |
|
realistic_Hard,gemini-1.5-flash-002,0,28,0.12 |
|
realistic_Hard,gemini-1.5-flash-002,0,29,0.125 |
|
realistic_Hard,gemini-1.5-flash-002,0,30,0.095 |
|
realistic_Hard,gemini-1.5-flash-002,0,35,0.0 |
|
realistic_Hard,gemini-1.5-flash-002,8000,2,0.7031 |
|
realistic_Hard,gemini-1.5-flash-002,8000,4,0.6406 |
|
realistic_Hard,gemini-1.5-flash-002,8000,6,0.6406 |
|
realistic_Hard,gemini-1.5-flash-002,8000,8,0.5156 |
|
realistic_Hard,gemini-1.5-flash-002,8000,10,0.4141 |
|
realistic_Hard,gemini-1.5-flash-002,8000,12,0.3438 |
|
realistic_Hard,gemini-1.5-flash-002,8000,16,0.25 |
|
realistic_Hard,gemini-1.5-flash-002,8000,18,0.1736 |
|
realistic_Hard,gemini-1.5-flash-002,8000,20,0.2773 |
|
realistic_Hard,gemini-1.5-flash-002,8000,22,0.1339 |
|
realistic_Hard,gemini-1.5-flash-002,8000,24,0.1452 |
|
realistic_Hard,gemini-1.5-flash-002,8000,26,0.1172 |
|
realistic_Hard,gemini-1.5-flash-002,8000,28,0.129 |
|
realistic_Hard,gemini-1.5-flash-002,8000,30,0.0938 |
|
realistic_Hard,gemini-1.5-flash-002,16000,2,0.6094 |
|
realistic_Hard,gemini-1.5-flash-002,16000,4,0.5938 |
|
realistic_Hard,gemini-1.5-flash-002,16000,6,0.5469 |
|
realistic_Hard,gemini-1.5-flash-002,16000,8,0.4062 |
|
realistic_Hard,gemini-1.5-flash-002,16000,10,0.3125 |
|
realistic_Hard,gemini-1.5-flash-002,16000,12,0.3047 |
|
realistic_Hard,gemini-1.5-flash-002,16000,14,0.1719 |
|
realistic_Hard,gemini-1.5-flash-002,16000,16,0.0859 |
|
realistic_Hard,gemini-1.5-flash-002,16000,18,0.0781 |
|
realistic_Hard,gemini-1.5-flash-002,16000,20,0.0703 |
|
realistic_Hard,gemini-1.5-flash-002,16000,22,0.0312 |
|
realistic_Hard,gemini-1.5-flash-002,16000,24,0.0547 |
|
realistic_Hard,gemini-1.5-flash-002,16000,26,0.0391 |
|
realistic_Hard,gemini-1.5-flash-002,16000,28,0.0234 |
|
realistic_Hard,gemini-1.5-flash-002,16000,30,0.0312 |
|
realistic_Hard,gemini-1.5-flash-002,32000,2,0.6406 |
|
realistic_Hard,gemini-1.5-flash-002,32000,4,0.5 |
|
realistic_Hard,gemini-1.5-flash-002,32000,6,0.4844 |
|
realistic_Hard,gemini-1.5-flash-002,32000,8,0.3047 |
|
realistic_Hard,gemini-1.5-flash-002,32000,10,0.2109 |
|
realistic_Hard,gemini-1.5-flash-002,32000,12,0.1641 |
|
realistic_Hard,gemini-1.5-flash-002,32000,14,0.1406 |
|
realistic_Hard,gemini-1.5-flash-002,32000,16,0.1094 |
|
realistic_Hard,gemini-1.5-flash-002,32000,18,0.0391 |
|
realistic_Hard,gemini-1.5-flash-002,32000,20,0.0703 |
|
realistic_Hard,gemini-1.5-flash-002,32000,22,0.0394 |
|
realistic_Hard,gemini-1.5-flash-002,32000,24,0.0078 |
|
realistic_Hard,gemini-1.5-flash-002,32000,26,0.0078 |
|
realistic_Hard,gemini-1.5-flash-002,32000,28,0.0157 |
|
realistic_Hard,gemini-1.5-flash-002,32000,30,0.0156 |
|
realistic_Hard,qwq-32b-preview,0,2,0.655 |
|
realistic_Hard,qwq-32b-preview,0,3,0.72 |
|
realistic_Hard,qwq-32b-preview,0,4,0.775 |
|
realistic_Hard,qwq-32b-preview,0,5,0.73 |
|
realistic_Hard,qwq-32b-preview,0,6,0.655 |
|
realistic_Hard,qwq-32b-preview,0,7,0.72 |
|
realistic_Hard,qwq-32b-preview,0,8,0.715 |
|
realistic_Hard,qwq-32b-preview,0,9,0.725 |
|
realistic_Hard,qwq-32b-preview,0,10,0.64 |
|
realistic_Hard,qwq-32b-preview,0,11,0.58 |
|
realistic_Hard,qwq-32b-preview,0,12,0.565 |
|
realistic_Hard,qwq-32b-preview,0,13,0.515 |
|
realistic_Hard,qwq-32b-preview,0,14,0.615 |
|
realistic_Hard,qwq-32b-preview,0,15,0.535 |
|
realistic_Hard,qwq-32b-preview,0,16,0.57 |
|
realistic_Hard,qwq-32b-preview,0,17,0.59 |
|
realistic_Hard,qwq-32b-preview,0,18,0.615 |
|
realistic_Hard,qwq-32b-preview,0,19,0.545 |
|
realistic_Hard,qwq-32b-preview,0,20,0.595 |
|
realistic_Hard,qwq-32b-preview,0,21,0.425 |
|
realistic_Hard,qwq-32b-preview,0,22,0.41 |
|
realistic_Hard,qwq-32b-preview,0,23,0.315 |
|
realistic_Hard,qwq-32b-preview,0,24,0.385 |
|
realistic_Hard,qwq-32b-preview,0,25,0.36 |
|
realistic_Hard,qwq-32b-preview,0,26,0.37 |
|
realistic_Hard,qwq-32b-preview,0,27,0.35 |
|
realistic_Hard,qwq-32b-preview,0,28,0.35 |
|
realistic_Hard,qwq-32b-preview,0,29,0.39 |
|
realistic_Hard,qwq-32b-preview,0,30,0.255 |
|
realistic_Hard,qwq-32b-preview,0,35,0.20625 |
|
realistic_Hard,qwq-32b-preview,0,40,0.125 |
|
realistic_Hard,qwq-32b-preview,0,45,0.14375 |
|
realistic_Hard,qwq-32b-preview,0,50,0.09375 |
|
realistic_Medium,deepseek-v3,0,2,0.945 |
|
realistic_Medium,deepseek-v3,0,3,0.895 |
|
realistic_Medium,deepseek-v3,0,4,0.965 |
|
realistic_Medium,deepseek-v3,0,5,0.965 |
|
realistic_Medium,deepseek-v3,0,6,0.965 |
|
realistic_Medium,deepseek-v3,0,7,0.935 |
|
realistic_Medium,deepseek-v3,0,8,0.875 |
|
realistic_Medium,deepseek-v3,0,9,0.97 |
|
realistic_Medium,deepseek-v3,0,10,0.81 |
|
realistic_Medium,deepseek-v3,0,11,0.87 |
|
realistic_Medium,deepseek-v3,0,12,0.715 |
|
realistic_Medium,deepseek-v3,0,13,0.735 |
|
realistic_Medium,deepseek-v3,0,14,0.785 |
|
realistic_Medium,deepseek-v3,0,15,0.825 |
|
realistic_Medium,deepseek-v3,0,16,0.585 |
|
realistic_Medium,deepseek-v3,0,17,0.605 |
|
realistic_Medium,deepseek-v3,0,18,0.71 |
|
realistic_Medium,deepseek-v3,0,19,0.565 |
|
realistic_Medium,deepseek-v3,0,20,0.625 |
|
realistic_Medium,deepseek-v3,0,21,0.625 |
|
realistic_Medium,deepseek-v3,0,22,0.67 |
|
realistic_Medium,deepseek-v3,0,24,0.7 |
|
realistic_Medium,deepseek-v3,0,25,0.62 |
|
realistic_Medium,deepseek-v3,0,26,0.715 |
|
realistic_Medium,deepseek-v3,0,27,0.665 |
|
realistic_Medium,deepseek-v3,0,28,0.63 |
|
realistic_Medium,deepseek-v3,0,29,0.655 |
|
realistic_Medium,deepseek-v3,0,30,0.635 |
|
realistic_Medium,deepseek-v3,0,35,0.5625 |
|
realistic_Medium,deepseek-v3,0,40,0.5625 |
|
realistic_Medium,deepseek-v3,0,45,0.5 |
|
realistic_Medium,deepseek-v3,0,50,0.3625 |
|
realistic_Medium,deepseek-v3,0,55,0.39375 |
|
realistic_Medium,deepseek-v3,0,60,0.36875 |
|
realistic_Medium,deepseek-v3,0,65,0.2625 |
|
realistic_Medium,deepseek-v3,0,70,0.1875 |
|
realistic_Medium,deepseek-v3,0,75,0.175 |
|
realistic_Medium,deepseek-v3,0,80,0.10625 |
|
realistic_Medium,deepseek-v3,0,85,0.075 |
|
realistic_Medium,deepseek-v3,0,90,0.08125 |
|
realistic_Medium,deepseek-v3,0,95,0.03125 |
|
realistic_Medium,deepseek-v3,8000,2,0.69 |
|
realistic_Medium,deepseek-v3,8000,4,0.865 |
|
realistic_Medium,deepseek-v3,8000,6,0.72 |
|
realistic_Medium,deepseek-v3,8000,8,0.62 |
|
realistic_Medium,deepseek-v3,8000,10,0.4925 |
|
realistic_Medium,deepseek-v3,8000,12,0.44 |
|
realistic_Medium,deepseek-v3,8000,14,0.29 |
|
realistic_Medium,deepseek-v3,8000,16,0.2121 |
|
realistic_Medium,deepseek-v3,8000,18,0.205 |
|
realistic_Medium,deepseek-v3,8000,20,0.191 |
|
realistic_Medium,deepseek-v3,8000,22,0.155 |
|
realistic_Medium,deepseek-v3,8000,24,0.13 |
|
realistic_Medium,deepseek-v3,8000,26,0.1307 |
|
realistic_Medium,deepseek-v3,8000,28,0.13 |
|
realistic_Medium,deepseek-v3,8000,30,0.17 |
|
realistic_Medium,deepseek-v3,16000,2,0.637056 |
|
realistic_Medium,deepseek-v3,16000,4,0.805 |
|
realistic_Medium,deepseek-v3,16000,6,0.66 |
|
realistic_Medium,deepseek-v3,16000,8,0.405 |
|
realistic_Medium,deepseek-v3,16000,10,0.355 |
|
realistic_Medium,deepseek-v3,16000,14,0.0894 |
|
realistic_Medium,deepseek-v3,16000,16,0.1154 |
|
realistic_Medium,deepseek-v3,16000,18,0.073 |
|
realistic_Medium,deepseek-v3,16000,20,0.0636 |
|
realistic_Medium,deepseek-v3,16000,22,0.07 |
|
realistic_Medium,deepseek-v3,16000,24,0.0452 |
|
realistic_Medium,deepseek-v3,16000,26,0.035 |
|
realistic_Medium,deepseek-v3,16000,28,0.0302 |
|
realistic_Medium,deepseek-v3,16000,30,0.025 |
|
realistic_Medium,deepseek-v3,32000,2,0.565 |
|
realistic_Medium,deepseek-v3,32000,4,0.7342 |
|
realistic_Medium,deepseek-v3,32000,6,0.5333 |
|
realistic_Medium,deepseek-v3,32000,8,0.3662 |
|
realistic_Medium,deepseek-v3,32000,10,0.1029 |
|
realistic_Medium,deepseek-v3,32000,12,0.16 |
|
realistic_Medium,deepseek-v3,32000,14,0.1443 |
|
realistic_Medium,deepseek-v3,32000,16,0.0443 |
|
realistic_Medium,deepseek-v3,32000,18,0.05 |
|
realistic_Medium,deepseek-v3,32000,20,0.0452 |
|
realistic_Medium,deepseek-v3,32000,22,0.035 |
|
realistic_Medium,deepseek-v3,32000,24,0.015 |
|
realistic_Medium,deepseek-v3,32000,26,0.02 |
|
realistic_Medium,deepseek-v3,32000,28,0.05 |
|
realistic_Medium,deepseek-v3,32000,30,0.04 |
|
symbolic,deepseek-r1,0,1,1.0 |
|
symbolic,deepseek-r1,0,21,0.92 |
|
symbolic,deepseek-r1,0,41,0.68 |
|
symbolic,deepseek-r1,0,61,0.46 |
|
symbolic,deepseek-r1,0,81,0.32 |
|
symbolic,deepseek-r1,0,101,0.16 |
|
symbolic,deepseek-r1,0,121,0.16 |
|
symbolic,deepseek-r1,0,141,0.12 |
|
symbolic,deepseek-r1,0,161,0.16 |
|
symbolic,deepseek-r1,0,181,0.04 |
|
symbolic,deepseek-r1,0,201,0.04 |
|
symbolic,deepseek-r1,0,221,0.0 |
|
symbolic,deepseek-r1,0,241,0.04 |
|
symbolic,deepseek-r1,0,261,0.02 |
|
symbolic,deepseek-r1,0,281,0.04 |
|
symbolic,deepseek-r1,8000,1,1.0 |
|
symbolic,deepseek-r1,8000,3,0.92 |
|
symbolic,deepseek-r1,8000,5,0.82 |
|
symbolic,deepseek-r1,8000,7,0.6 |
|
symbolic,deepseek-r1,8000,9,0.54 |
|
symbolic,deepseek-r1,8000,11,0.32 |
|
symbolic,deepseek-r1,8000,13,0.42 |
|
symbolic,deepseek-r1,8000,15,0.22 |
|
symbolic,deepseek-r1,8000,17,0.26 |
|
symbolic,deepseek-r1,8000,19,0.16 |
|
symbolic,deepseek-r1,8000,21,0.22 |
|
symbolic,deepseek-r1,8000,23,0.06 |
|
symbolic,deepseek-r1,8000,25,0.2 |
|
symbolic,deepseek-r1,8000,27,0.02 |
|
symbolic,deepseek-r1,8000,29,0.04 |
|
symbolic,deepseek-r1,8000,31,0.12 |
|
symbolic,deepseek-r1,8000,33,0.04 |
|
symbolic,deepseek-r1,8000,35,0.06 |
|
symbolic,deepseek-r1,8000,37,0.02 |
|
symbolic,deepseek-r1,8000,39,0.04 |
|
symbolic,deepseek-r1,8000,41,0.02 |
|
symbolic,deepseek-r1,16000,1,1.0 |
|
symbolic,deepseek-r1,16000,3,0.86 |
|
symbolic,deepseek-r1,16000,5,0.54 |
|
symbolic,deepseek-r1,16000,7,0.44 |
|
symbolic,deepseek-r1,16000,9,0.4 |
|
symbolic,deepseek-r1,16000,11,0.28 |
|
symbolic,deepseek-r1,16000,13,0.16 |
|
symbolic,deepseek-r1,16000,15,0.2 |
|
symbolic,deepseek-r1,16000,17,0.06 |
|
symbolic,deepseek-r1,16000,19,0.12 |
|
symbolic,deepseek-r1,16000,21,0.14 |
|
symbolic,deepseek-r1,16000,23,0.08 |
|
symbolic,deepseek-r1,16000,25,0.04 |
|
symbolic,deepseek-r1,16000,27,0.0 |
|
symbolic,deepseek-r1,16000,29,0.02 |
|
symbolic,deepseek-r1,32000,1,1.0 |
|
symbolic,deepseek-r1,32000,3,0.68 |
|
symbolic,deepseek-r1,32000,5,0.54 |
|
symbolic,deepseek-r1,32000,7,0.24 |
|
symbolic,deepseek-r1,32000,9,0.14 |
|
symbolic,deepseek-r1,32000,11,0.16 |
|
symbolic,deepseek-r1,32000,13,0.06 |
|
symbolic,deepseek-r1,32000,15,0.06 |
|
symbolic,deepseek-r1,32000,17,0.02 |
|
symbolic,deepseek-r1,32000,19,0.04 |
|
|