malhajar commited on
Commit
7cb82bd
·
verified ·
1 Parent(s): be8be25

Update src/display/about.py

Browse files
Files changed (1) hide show
  1. src/display/about.py +2 -2
src/display/about.py CHANGED
@@ -15,7 +15,7 @@ class Tasks(Enum):
15
  task1 = Task("GPQA", "metric_name", "GPQA")
16
  task2 = Task("IFEval", "metric_name", "IFEval")
17
  task3 = Task("MUSR", "metric_name", "MUSR")
18
- task4 = Task("GSM8K", "metric_name", "GSM8K")
19
  task5 = Task("MMMLU-fr", "metric_name", "MMMLU-fr")
20
 
21
 
@@ -55,7 +55,7 @@ lm_eval --model vllm --model_args="pretrained=OpenLLM-France/Claire-7B-FR-Instru
55
  Les tâches et les paramètres de few-shot sont :
56
  - BBH : 3-shot, *Big-Bench-Hard* (`acc_norm`)
57
  - IFEval : 0-shot, *Instruction Following Evaluation* (inst_level_strict_acc,none et prompt_level_strict_acc,none)
58
- - GPQA : 0-shot, *Generalized Purpose Question Answering* (`acc_norm`)
59
  - MMLU : 5-shot, (average of all the results `acc`)
60
  - MuSR : 5-shot, *MuSR* (`acc_norm`)
61
  - GSM8k : 5-shot, *gsm8k* (`acc`)
 
15
  task1 = Task("GPQA", "metric_name", "GPQA")
16
  task2 = Task("IFEval", "metric_name", "IFEval")
17
  task3 = Task("MUSR", "metric_name", "MUSR")
18
+ task4 = Task("MATH Lvl 5", "metric_name", "MATH Lvl 5")
19
  task5 = Task("MMMLU-fr", "metric_name", "MMMLU-fr")
20
 
21
 
 
55
  Les tâches et les paramètres de few-shot sont :
56
  - BBH : 3-shot, *Big-Bench-Hard* (`acc_norm`)
57
  - IFEval : 0-shot, *Instruction Following Evaluation* (inst_level_strict_acc,none et prompt_level_strict_acc,none)
58
+ - MATH : 4-shot, *LVL 5* (exact_match,none)
59
  - MMLU : 5-shot, (average of all the results `acc`)
60
  - MuSR : 5-shot, *MuSR* (`acc_norm`)
61
  - GSM8k : 5-shot, *gsm8k* (`acc`)