Spaces:
Sleeping
Sleeping
Update mmlu_pro_eval_adapted.py
Browse files- mmlu_pro_eval_adapted.py +2 -0
mmlu_pro_eval_adapted.py
CHANGED
|
@@ -4,6 +4,7 @@ import json
|
|
| 4 |
import argparse
|
| 5 |
import os
|
| 6 |
import torch
|
|
|
|
| 7 |
import random
|
| 8 |
import transformers
|
| 9 |
import time
|
|
@@ -192,6 +193,7 @@ def eval_cot(subject, model, tokenizer, val_df, test_df, num_shots=5):
|
|
| 192 |
|
| 193 |
return correctness, accuracy
|
| 194 |
|
|
|
|
| 195 |
def evaluate_mmlu_pro(model_name, num_subjects=-1, num_questions=10, num_shots=5):
|
| 196 |
model, tokenizer = load_model(model_name, gpu_utilization=0.8)
|
| 197 |
|
|
|
|
| 4 |
import argparse
|
| 5 |
import os
|
| 6 |
import torch
|
| 7 |
+
import spaces
|
| 8 |
import random
|
| 9 |
import transformers
|
| 10 |
import time
|
|
|
|
| 193 |
|
| 194 |
return correctness, accuracy
|
| 195 |
|
| 196 |
+
@spaces.GPU(duration=240) # Extended to 3 minutes for larger evaluations
|
| 197 |
def evaluate_mmlu_pro(model_name, num_subjects=-1, num_questions=10, num_shots=5):
|
| 198 |
model, tokenizer = load_model(model_name, gpu_utilization=0.8)
|
| 199 |
|