xeon27 commited on
Commit
006ba57
·
1 Parent(s): a004e6b

Add base eval tasks

Browse files
Files changed (1) hide show
  1. src/about.py +16 -2
src/about.py CHANGED
@@ -12,8 +12,22 @@ class Task:
12
  # ---------------------------------------------------
13
  class Tasks(Enum):
14
  # task_key in the json file, metric_key in the json file, name to display in the leaderboard
15
- task0 = Task("anli_r1", "acc", "ANLI")
16
- task1 = Task("logiqa", "acc_norm", "LogiQA")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  NUM_FEWSHOT = 0 # Change with your few shot
19
  # ---------------------------------------------------
 
12
  # ---------------------------------------------------
13
  class Tasks(Enum):
14
  # task_key in the json file, metric_key in the json file, name to display in the leaderboard
15
+ # task0 = Task("anli_r1", "acc", "ANLI")
16
+ # task1 = Task("logiqa", "acc_norm", "LogiQA")
17
+
18
+ task0 = Task("arc_easy", "accuracy", "ARC-Easy")
19
+ task1 = Task("arc_challenge", "accuracy", "ARC-Challenge")
20
+ task2 = Task("drop", "mean", "DROP")
21
+ task3 = Task("winogrande", "accuracy", "WinoGrande")
22
+ task4 = Task("gsm8k", "accuracy", "GSM8K")
23
+ task5 = Task("hellaswag", "accuracy", "HellaSwag")
24
+ task6 = Task("humaneval", "mean", "HumanEval")
25
+ task7 = Task("ifeval", "final_acc", "IFEval")
26
+ task8 = Task("math", "accuracy", "MATH")
27
+ task9 = Task("mmlu", "accuracy", "MMLU")
28
+ task10 = Task("mmlu_pro", "accuracy", "MMLU-Pro")
29
+ task11 = Task("gpqa_diamond", "accuracy", "GPQA-Diamond")
30
+
31
 
32
  NUM_FEWSHOT = 0 # Change with your few shot
33
  # ---------------------------------------------------