|
|
|
gpt4 = { |
|
'b4bqa': 0.94921875, |
|
'medqa_og': 0.9232804232804233, |
|
'medqa_g2b': 0.8994708994708994, |
|
'medmcqa_og': 0.9166666666666666, |
|
'medmcqa_g2b': 0.8879310344827587 |
|
} |
|
|
|
gpt4o = { |
|
'b4bqa': 0.96484375, |
|
'medqa_og': 0.9021164021164021, |
|
'medqa_g2b': 0.8835978835978836, |
|
'medmcqa_og': 0.9051724137931034, |
|
'medmcqa_g2b': 0.8649425287356322 |
|
} |
|
|
|
gpt35turbo = { |
|
'b4bqa': 0.9174107142857143, |
|
'medmcqa_og': 0.9827586206896551, |
|
'medmcqa_g2b': 0.9770114942528736, |
|
'medqa_og': 0.9629629629629629, |
|
'medqa_g2b': 0.9603174603174603 |
|
} |
|
|
|
claude_opus = { |
|
'b4bqa': 0.921875, |
|
'medqa_og': 0.8571428571428571, |
|
'medqa_g2b': 0.8333333333333334, |
|
'medmcqa_og': 0.8649425287356322, |
|
'medmcqa_g2b': 0.7988505747126436 |
|
} |