gardarjuto's picture
switch to new results file format, code formatting, efficiency optimizations
9674655
raw
history blame contribute delete
786 Bytes
from dataclasses import dataclass
from enum import Enum
@dataclass
class Task:
benchmark: str
metric: str
col_name: str
class Tasks(Enum):
task0 = Task(benchmark="icelandic_winogrande_stringmatch", metric="exact_match", col_name="WinoGrande-IS (3-shot)")
task1 = Task(benchmark="icelandic_sentences_ged_stringmatch", metric="exact_match", col_name="GED")
task2 = Task(benchmark="icelandic_inflection_all", metric="exact_match", col_name="Inflection (1-shot)")
task5 = Task(benchmark="icelandic_belebele", metric="exact_match", col_name="Belebele (IS)")
task6 = Task(benchmark="icelandic_arc_challenge", metric="exact_match", col_name="ARC-Challenge-IS")
task7 = Task(benchmark="icelandic_wiki_qa", metric="llm_judge_score", col_name="WikiQA-IS")