gardarjuto commited on
Commit
80793c6
·
1 Parent(s): 117d89c

add submission instructions to about page

Browse files
Files changed (1) hide show
  1. src/about.py +5 -2
src/about.py CHANGED
@@ -12,7 +12,7 @@ class Task:
12
  # ---------------------------------------------------
13
  class Tasks(Enum):
14
  # task_key in the json file, metric_key in the json file, name to display in the leaderboard
15
- task0 = Task("icelandic_winogrande_stringmatch", "exact_match,get-answer", "WinoGrande-IS")
16
  task1 = Task("icelandic_sentences_ged_stringmatch", "exact_match,get-answer", "GED")
17
  task2 = Task("icelandic_inflection_easy", "json_metric,get-answer", "Inflection (common)")
18
  task3 = Task("icelandic_inflection_medium", "json_metric,get-answer", "Inflection (uncommon)")
@@ -33,6 +33,9 @@ INTRODUCTION_TEXT = """
33
 
34
  # Which evaluations are you running? how can people reproduce what you have?
35
  LLM_BENCHMARKS_TEXT = f"""
 
 
 
36
  ## Benchmark tasks
37
  The Icelandic LLM leaderboard evaluates models on several tasks. All of them are set up as generation tasks, where the model's output is compared to the expected output.
38
  This means that models that have not been instruction fine-tuned might perform poorly on these tasks.
@@ -42,7 +45,7 @@ The following tasks are evaluated:
42
  ### WinoGrande-IS
43
  The Icelandic WinoGrande task is a human-translated and localized version of the ~1000 test set examples in the WinoGrande task in English.
44
  Each example consists of a sentence with a blank, and two answer choices for the blank. The task is to choose the correct answer choice using coreference resolution.
45
- The benchmark is designed to test the model's ability to use knowledge and common sense reasoning in Icelandic.
46
  The Icelandic WinoGrande dataset is described in more detail in the IceBERT paper (https://aclanthology.org/2022.lrec-1.464.pdf).
47
  - Link to dataset: https://huggingface.co/datasets/mideind/icelandic-winogrande
48
 
 
12
  # ---------------------------------------------------
13
  class Tasks(Enum):
14
  # task_key in the json file, metric_key in the json file, name to display in the leaderboard
15
+ task0 = Task("icelandic_winogrande_stringmatch", "exact_match,get-answer", "WinoGrande-IS (3-shot)")
16
  task1 = Task("icelandic_sentences_ged_stringmatch", "exact_match,get-answer", "GED")
17
  task2 = Task("icelandic_inflection_easy", "json_metric,get-answer", "Inflection (common)")
18
  task3 = Task("icelandic_inflection_medium", "json_metric,get-answer", "Inflection (uncommon)")
 
33
 
34
  # Which evaluations are you running? how can people reproduce what you have?
35
  LLM_BENCHMARKS_TEXT = f"""
36
+ ## New submissions
37
+ Do you want your model to be included on the leaderboard? Open a discussion on this repository with the details of your model and we will get back to you.
38
+
39
  ## Benchmark tasks
40
  The Icelandic LLM leaderboard evaluates models on several tasks. All of them are set up as generation tasks, where the model's output is compared to the expected output.
41
  This means that models that have not been instruction fine-tuned might perform poorly on these tasks.
 
45
  ### WinoGrande-IS
46
  The Icelandic WinoGrande task is a human-translated and localized version of the ~1000 test set examples in the WinoGrande task in English.
47
  Each example consists of a sentence with a blank, and two answer choices for the blank. The task is to choose the correct answer choice using coreference resolution.
48
+ The benchmark is designed to test the model's ability to use knowledge and common sense reasoning in Icelandic. For this benchmark, we use 3-shot evaluation.
49
  The Icelandic WinoGrande dataset is described in more detail in the IceBERT paper (https://aclanthology.org/2022.lrec-1.464.pdf).
50
  - Link to dataset: https://huggingface.co/datasets/mideind/icelandic-winogrande
51