Spaces:
Running
Running
model_name,coding,creative writing,current news,general culture,grammar,history,logics,math,science,technology,Average (All Topics) | |
claude-3.5-haiku-20241022,0.0032948,0.00154037,0.0016057,0.0016259,0.00151862,0.00178182,0.00171982,0.00205066,0.0015184,0.00161418,0.00182703 | |
claude-3.7-sonnet,0.02260275,0.00883012,0.00904476,0.01127775,0.00853292,0.01137565,0.0083595,0.01336142,0.0100265,0.00998204,0.01133934 | |
claude-3.7-sonnet:thinking,0.079704,0.02546725,0.02257356,0.02740737,0.02584362,0.02936273,0.06541866,0.10232284,0.02794467,0.02593431,0.0431979 | |
deepSeek-R1,0.0082304,0.0031812,0.0028937,0.00284753,0.00369708,0.00281948,0.00861731,0.01360111,0.00290555,0.00279671,0.00515901 | |
deepSeek-V3,0.00143918,0.0008705,0.00080466,0.0008034,0.00072983,0.00073238,0.00111661,0.00146937,0.00069503,0.00076635,0.00094273 | |
deepSeek-V3-0324,0.00155228,0.00063994,0.0008007,0.00070746,0.00073825,0.00071352,0.00151636,0.00165223,0.00068744,0.00120867,0.00102168 | |
gemini-2.0-flash-001,0.00077989,0.00015062,0.00026746,0.00026793,0.00027084,0.00026226,0.00037649,0.00056978,0.00029988,0.00029987,0.0003545 | |
gemini-2.5-pro-preview-03-25,0.02947544,0.00636458,0.00975805,0.01222104,0.01205163,0.01157245,0.0078375,0.0108456,0.01127743,0.01112851,0.01225322 | |
gemma-3-27b-it,0.0004438,0.00015774,0.00020777,0.00020572,0.00019867,0.00019412,0.00023203,0.00041328,0.00023526,0.00022656,0.00025149 | |
gpt-4.1-mini,0.00253364,0.00088755,0.00095741,0.000937,0.00120217,0.00097678,0.00193531,0.00309102,0.000936,0.001089,0.00145459 | |
gpt-4o-mini,0.00059473,0.00037163,0.00030421,0.00032765,0.00028369,0.00031396,0.00040208,0.00063024,0.0002965,0.00034064,0.00038653 | |
grok-2-1212,0.0130352,0.00550725,0.00660968,0.00663592,0.00646162,0.00686785,0.00951731,0.01720774,0.00614067,0.00673246,0.00847157 | |
grok-3-beta,0.026382,0.01027425,0.01239936,0.01299412,0.01412204,0.01317081,0.02142797,0.02826706,0.012175,0.01828696,0.01694996 | |
llama-3.1-Nemotron-70B-Instruct-HF,0.00054719,0.00027321,0.00032472,0.00034392,0.00034446,0.00035163,0.00042114,0.00061984,0.00031967,0.00031898,0.00038647 | |
llama-3.3-70B-Instruct,0.00051491,0.00020951,0.00031271,0.00033135,0.00028526,0.00032196,0.00040135,0.00055238,0.00030856,0.00031853,0.00035565 | |
llama-3_1-Nemotron-Ultra-253B-v1,0.00510581,0.00172625,0.00167261,0.0016562,0.0022302,0.00150722,0.00604944,0.00848181,0.0016631,0.00154232,0.0031635 | |
llama-4-Maverick-17B-128E-Instruct-FP8,0.00107845,0.00042373,0.00052103,0.00054683,0.00054633,0.0005472,0.00092127,0.00109058,0.00052545,0.00051867,0.00067195 | |
llama-4-Scout-17B-16E-Instruct,0.0008043,0.0003345,0.00041023,0.00042973,0.00037571,0.00041703,0.00054822,0.00068883,0.00037612,0.00038535,0.000477 | |
mistral-large-2411,0.0083134,0.00458883,0.00470712,0.00462658,0.00415023,0.00458323,0.00468037,0.00716394,0.00469011,0.00497415,0.0052478 | |
mistral-small-24b-instruct-2501,0.00018066,0.00010744,0.00010954,0.00010174,0.0000935,0.00009791,0.00013218,0.00017749,0.00009814,0.00010746,0.00012061 | |
nova-lite-v1,0.00024167,0.0001164,0.00011943,0.00013032,0.00012636,0.00013363,0.00017926,0.00028925,0.00012305,0.00012955,0.00015889 | |
nova-pro-v1,0.002483,0.00148737,0.00105101,0.00114887,0.00108769,0.00103572,0.00139458,0.00209969,0.00096489,0.0010052,0.0013758 | |
o3-mini-2025-01-31,0.00929378,0.00434688,0.00347006,0.00375008,0.0041121,0.00384886,0.0095952,0.01557568,0.00371201,0.00355482,0.00612595 | |
o4-mini-2025-04-16,0.01199341,0.00602218,0.00628254,0.00633765,0.00797699,0.0060602,0.00986951,0.01304156,0.00612248,0.00559202,0.00792985 | |
qwen-plus,0.00148482,0.00108252,0.00073573,0.00073057,0.00073734,0.00074188,0.00102462,0.0014788,0.0006776,0.00077935,0.00094732 | |