Spaces:
Sleeping
Sleeping
Update mmlu_eval_original.py
Browse files- mmlu_eval_original.py +2 -1
mmlu_eval_original.py
CHANGED
@@ -45,7 +45,8 @@ def generate_answer(model, tokenizer, question, choices):
|
|
45 |
return char
|
46 |
return response[:1] # Fallback: take first character
|
47 |
|
48 |
-
|
|
|
49 |
"""
|
50 |
Evaluates the model on MMLU across all tasks.
|
51 |
"""
|
|
|
45 |
return char
|
46 |
return response[:1] # Fallback: take first character
|
47 |
|
48 |
+
@torch.no_grad()
|
49 |
+
def eval(model, tokenizer, num_questions_per_task=5, dev_df, test_df):
|
50 |
"""
|
51 |
Evaluates the model on MMLU across all tasks.
|
52 |
"""
|