Commit
·
e5acaa3
1
Parent(s):
637c71d
style: Rename "reasoning" to "common-sense reasoning"
Browse files
app.py
CHANGED
@@ -100,7 +100,7 @@ the [MMLU](https://doi.org/10.48550/arXiv.2009.03300) and
|
|
100 |
[ARC](https://allenai.org/data/arc) datasets. We use the Matthews Correlation
|
101 |
Coefficient (MCC) as the evaluation metric.
|
102 |
|
103 |
-
### Reasoning
|
104 |
Given a scenario and multiple possible endings, choose the correct ending. As with text
|
105 |
classification, we use the probabilities of the answer letter (a, b, c or d) to choose
|
106 |
the answer. The datasets in this task are machine translated versions of the
|
@@ -164,7 +164,7 @@ class Dataset(BaseModel):
|
|
164 |
|
165 |
SUMMARISATION = Task(name="summarisation", metric="bertscore")
|
166 |
KNOWLEDGE = Task(name="knowledge", metric="mcc")
|
167 |
-
|
168 |
GRAMMAR = Task(name="grammar", metric="mcc")
|
169 |
READING_COMPREHENSION = Task(name="reading comprehension", metric="em")
|
170 |
TEXT_CLASSIFICATION = Task(name="text classification", metric="mcc")
|
@@ -246,14 +246,14 @@ DATASETS = [
|
|
246 |
Dataset(name="mmlu", language=ENGLISH, task=KNOWLEDGE),
|
247 |
Dataset(name="mmlu-fr", language=FRENCH, task=KNOWLEDGE),
|
248 |
|
249 |
-
Dataset(name="hellaswag-da", language=DANISH, task=
|
250 |
-
Dataset(name="hellaswag-no", language=NORWEGIAN, task=
|
251 |
-
Dataset(name="hellaswag-sv", language=SWEDISH, task=
|
252 |
-
Dataset(name="winogrande-is", language=ICELANDIC, task=
|
253 |
-
Dataset(name="hellaswag-de", language=GERMAN, task=
|
254 |
-
Dataset(name="hellaswag-nl", language=DUTCH, task=
|
255 |
-
Dataset(name="hellaswag", language=ENGLISH, task=
|
256 |
-
Dataset(name="hellaswag-fr", language=FRENCH, task=
|
257 |
]
|
258 |
|
259 |
|
|
|
100 |
[ARC](https://allenai.org/data/arc) datasets. We use the Matthews Correlation
|
101 |
Coefficient (MCC) as the evaluation metric.
|
102 |
|
103 |
+
### Common-sense Reasoning
|
104 |
Given a scenario and multiple possible endings, choose the correct ending. As with text
|
105 |
classification, we use the probabilities of the answer letter (a, b, c or d) to choose
|
106 |
the answer. The datasets in this task are machine translated versions of the
|
|
|
164 |
|
165 |
SUMMARISATION = Task(name="summarisation", metric="bertscore")
|
166 |
KNOWLEDGE = Task(name="knowledge", metric="mcc")
|
167 |
+
COMMON_SENSE_REASONING = Task(name="common-sense reasoning", metric="mcc")
|
168 |
GRAMMAR = Task(name="grammar", metric="mcc")
|
169 |
READING_COMPREHENSION = Task(name="reading comprehension", metric="em")
|
170 |
TEXT_CLASSIFICATION = Task(name="text classification", metric="mcc")
|
|
|
246 |
Dataset(name="mmlu", language=ENGLISH, task=KNOWLEDGE),
|
247 |
Dataset(name="mmlu-fr", language=FRENCH, task=KNOWLEDGE),
|
248 |
|
249 |
+
Dataset(name="hellaswag-da", language=DANISH, task=COMMON_SENSE_REASONING),
|
250 |
+
Dataset(name="hellaswag-no", language=NORWEGIAN, task=COMMON_SENSE_REASONING),
|
251 |
+
Dataset(name="hellaswag-sv", language=SWEDISH, task=COMMON_SENSE_REASONING),
|
252 |
+
Dataset(name="winogrande-is", language=ICELANDIC, task=COMMON_SENSE_REASONING),
|
253 |
+
Dataset(name="hellaswag-de", language=GERMAN, task=COMMON_SENSE_REASONING),
|
254 |
+
Dataset(name="hellaswag-nl", language=DUTCH, task=COMMON_SENSE_REASONING),
|
255 |
+
Dataset(name="hellaswag", language=ENGLISH, task=COMMON_SENSE_REASONING),
|
256 |
+
Dataset(name="hellaswag-fr", language=FRENCH, task=COMMON_SENSE_REASONING),
|
257 |
]
|
258 |
|
259 |
|