Spaces:
Sleeping
Sleeping
Update mmlu_eval.py
Browse files- mmlu_eval.py +3 -0
mmlu_eval.py
CHANGED
@@ -66,12 +66,15 @@ def evaluate_mmlu(model, tokenizer, num_questions_per_task=5):
|
|
66 |
references = []
|
67 |
|
68 |
for sample in sampled_questions:
|
|
|
69 |
question = sample["question"]
|
70 |
choices = [sample["choices"][i] for i in range(4)]
|
71 |
# Convert numeric answer to letter (0->A, 1->B, etc.)
|
72 |
correct_answer = chr(65 + sample["answer"])
|
|
|
73 |
|
74 |
model_output = generate_answer(model, tokenizer, question, choices)
|
|
|
75 |
|
76 |
predictions.append(model_output)
|
77 |
references.append(correct_answer)
|
|
|
66 |
references = []
|
67 |
|
68 |
for sample in sampled_questions:
|
69 |
+
print ("TASK", task_name, "Sample", sample)
|
70 |
question = sample["question"]
|
71 |
choices = [sample["choices"][i] for i in range(4)]
|
72 |
# Convert numeric answer to letter (0->A, 1->B, etc.)
|
73 |
correct_answer = chr(65 + sample["answer"])
|
74 |
+
print ("question:", question, "\n choices:", choices, "\n correct answer:", correct_answer)
|
75 |
|
76 |
model_output = generate_answer(model, tokenizer, question, choices)
|
77 |
+
print ("model output:", model_output)
|
78 |
|
79 |
predictions.append(model_output)
|
80 |
references.append(correct_answer)
|