open-r1-eval-leaderboard
/
eval_results
/Qwen
/Qwen1.5-0.5B-Chat
/main
/alpaca_eval
/results_2024-04-30T19-28-46.json
{ | |
"results":{ | |
"Qwen_Qwen1.5-0.5B-Chat_main":{ | |
"win_rate":0.8714440396, | |
"standard_error":0.2704964542, | |
"n_wins":5, | |
"n_wins_base":798, | |
"n_draws":2, | |
"n_total":805, | |
"discrete_win_rate":0.7453416149, | |
"mode":"community", | |
"avg_length":202, | |
"length_controlled_winrate":2.2746449742 | |
} | |
} | |
} |