H2H-eval-comparator

Sleeping

rohansampath commited on Feb 16

Commit

6cf3cc0

verified ·

1 Parent(s): cd9ff23

Update mmlu_eval_original.py

Files changed (1) hide show

mmlu_eval_original.py CHANGED Viewed

@@ -45,7 +45,8 @@ def generate_answer(model, tokenizer, question, choices):
             return char
     return response[:1]  # Fallback: take first character
-def evaluate_mmlu(model, tokenizer, num_questions_per_task=5):
     """
     Evaluates the model on MMLU across all tasks.
     """

             return char
     return response[:1]  # Fallback: take first character
+@torch.no_grad()
+def eval(model, tokenizer, num_questions_per_task=5, dev_df, test_df):
     """
     Evaluates the model on MMLU across all tasks.
     """