saattrupdan commited on
Commit
fd7fab5
·
verified ·
1 Parent(s): 5c9ed9a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -13
app.py CHANGED
@@ -164,7 +164,7 @@ SUMMARISATION = Task(name="summarisation", metric="bertscore")
164
  KNOWLEDGE = Task(name="knowledge", metric="mcc")
165
  REASONING = Task(name="reasoning", metric="mcc")
166
  GRAMMAR = Task(name="grammar", metric="mcc")
167
- QUESTION_ANSWERING = Task(name="question answering", metric="em")
168
  TEXT_CLASSIFICATION = Task(name="text classification", metric="mcc")
169
  INFORMATION_EXTRACTION = Task(name="information extraction", metric="micro_f1_no_misc")
170
  ALL_TASKS = [obj for obj in globals().values() if isinstance(obj, Task)]
@@ -203,13 +203,13 @@ DATASETS = [
203
  Dataset(name="scala-de", language=GERMAN, task=GRAMMAR),
204
  Dataset(name="scala-nl", language=DUTCH, task=GRAMMAR),
205
  Dataset(name="scala-en", language=ENGLISH, task=GRAMMAR),
206
- Dataset(name="scandiqa-da", language=DANISH, task=QUESTION_ANSWERING),
207
- Dataset(name="norquad", language=NORWEGIAN, task=QUESTION_ANSWERING),
208
- Dataset(name="scandiqa-sv", language=SWEDISH, task=QUESTION_ANSWERING),
209
- Dataset(name="nqii", language=ICELANDIC, task=QUESTION_ANSWERING),
210
- Dataset(name="germanquad", language=GERMAN, task=QUESTION_ANSWERING),
211
- Dataset(name="squad", language=ENGLISH, task=QUESTION_ANSWERING),
212
- Dataset(name="squad-nl", language=DUTCH, task=QUESTION_ANSWERING),
213
  Dataset(name="nordjylland-news", language=DANISH, task=SUMMARISATION),
214
  Dataset(name="mlsum", language=GERMAN, task=SUMMARISATION),
215
  Dataset(name="rrn", language=ICELANDIC, task=SUMMARISATION),
@@ -671,11 +671,6 @@ def fetch_results() -> dict[Language, pd.DataFrame]:
671
  for record in records:
672
  model_name = record["model"]
673
 
674
- # Manual fix for OpenAI models: Only keep the validation split results
675
- if "gpt-3.5" in model_name or "gpt-4" in model_name:
676
- if not record.get("validation_split", False):
677
- continue
678
-
679
  dataset_name = record["dataset"]
680
  if dataset_name in possible_dataset_names:
681
  dataset = next(
 
164
  KNOWLEDGE = Task(name="knowledge", metric="mcc")
165
  REASONING = Task(name="reasoning", metric="mcc")
166
  GRAMMAR = Task(name="grammar", metric="mcc")
167
+ READING_COMPREHENSION = Task(name="reading comprehension", metric="em")
168
  TEXT_CLASSIFICATION = Task(name="text classification", metric="mcc")
169
  INFORMATION_EXTRACTION = Task(name="information extraction", metric="micro_f1_no_misc")
170
  ALL_TASKS = [obj for obj in globals().values() if isinstance(obj, Task)]
 
203
  Dataset(name="scala-de", language=GERMAN, task=GRAMMAR),
204
  Dataset(name="scala-nl", language=DUTCH, task=GRAMMAR),
205
  Dataset(name="scala-en", language=ENGLISH, task=GRAMMAR),
206
+ Dataset(name="scandiqa-da", language=DANISH, task=READING_COMPREHENSION),
207
+ Dataset(name="norquad", language=NORWEGIAN, task=READING_COMPREHENSION),
208
+ Dataset(name="scandiqa-sv", language=SWEDISH, task=READING_COMPREHENSION),
209
+ Dataset(name="nqii", language=ICELANDIC, task=READING_COMPREHENSION),
210
+ Dataset(name="germanquad", language=GERMAN, task=READING_COMPREHENSION),
211
+ Dataset(name="squad", language=ENGLISH, task=READING_COMPREHENSION),
212
+ Dataset(name="squad-nl", language=DUTCH, task=READING_COMPREHENSION),
213
  Dataset(name="nordjylland-news", language=DANISH, task=SUMMARISATION),
214
  Dataset(name="mlsum", language=GERMAN, task=SUMMARISATION),
215
  Dataset(name="rrn", language=ICELANDIC, task=SUMMARISATION),
 
671
  for record in records:
672
  model_name = record["model"]
673
 
 
 
 
 
 
674
  dataset_name = record["dataset"]
675
  if dataset_name in possible_dataset_names:
676
  dataset = next(