xeon27 commited on
Commit
ba14348
·
1 Parent(s): 6410971

Add task link in description

Browse files
Files changed (1) hide show
  1. src/about.py +7 -6
src/about.py CHANGED
@@ -42,20 +42,21 @@ NUM_FEWSHOT = 0 # Change with your few shot
42
  # Your leaderboard name
43
  TITLE = """<h1 align="center" id="space-title">LLM Evaluation Leaderboard</h1>"""
44
 
 
 
 
45
  # What does your leaderboard evaluate?
46
- INTRODUCTION_TEXT = """
47
- This leaderboard presents the performance of selected LLM models on a set of tasks. The tasks are divided into two categories: base and agentic. The base tasks are:
48
- """ + ", ".join([f"[{task.value.col_name}]({task.value.source})" for task in Tasks if task.value.type == "base"]) + """. The agentic tasks are:
49
- """ + ", ".join([f"[{task.value.col_name}]({task.value.source})" for task in Tasks if task.value.type == "agentic"]) + """."""
50
 
51
  # Which evaluations are you running? how can people reproduce what you have?
52
  LLM_BENCHMARKS_TEXT = f"""
53
  ## How it works
54
  The following benchmarks are included:
55
 
56
- Base: [ARC-Easy](https://github.com/UKGovernmentBEIS/inspect_evals/tree/main/src/inspect_evals/arc), ARC-Challenge, DROP, WinoGrande, GSM8K, HellaSwag, HumanEval, IFEval, MATH, MMLU, MMLU-Pro, GPQA-Diamond
57
 
58
- Agentic: GAIA, GDM-InterCode-CTF
59
 
60
  ## Reproducibility
61
  To reproduce our results, here is the commands you can run:
 
42
  # Your leaderboard name
43
  TITLE = """<h1 align="center" id="space-title">LLM Evaluation Leaderboard</h1>"""
44
 
45
+ BASE_TASK_NAMES = ", ".join([f"[{task.value.col_name}]({task.value.source})" for task in Tasks if task.value.type == "base"])
46
+ AGENTIC_TASK_NAMES = ", ".join([f"[{task.value.col_name}]({task.value.source})" for task in Tasks if task.value.type == "agentic"])
47
+
48
  # What does your leaderboard evaluate?
49
+ INTRODUCTION_TEXT = f"""
50
+ This leaderboard presents the performance of selected LLM models on a set of tasks. The tasks are divided into two categories: base and agentic. The base tasks are: {BASE_TASK_NAMES}. The agentic tasks are: {AGENTIC_TASK_NAMES}."""
 
 
51
 
52
  # Which evaluations are you running? how can people reproduce what you have?
53
  LLM_BENCHMARKS_TEXT = f"""
54
  ## How it works
55
  The following benchmarks are included:
56
 
57
+ Base: {BASE_TASK_NAMES}
58
 
59
+ Agentic: {AGENTIC_TASK_NAMES}
60
 
61
  ## Reproducibility
62
  To reproduce our results, here is the commands you can run: