open-r1-eval-leaderboard
/
eval_results
/Qwen
/Qwen1.5-7B-Chat
/main
/alpaca_eval
/results_2024-05-01T07-35-15.json
{ | |
"results":{ | |
"Qwen_Qwen1.5-7B-Chat_main":{ | |
"win_rate":10.6668277226, | |
"standard_error":0.8982814823, | |
"n_wins":75, | |
"n_wins_base":727, | |
"n_draws":3, | |
"n_total":805, | |
"discrete_win_rate":9.5031055901, | |
"mode":"community", | |
"avg_length":1609, | |
"length_controlled_winrate":13.2092440356 | |
} | |
} | |
} |