Spaces:
Running
Running
Přidány kategorie do `tasks_metadata.json`
Browse files- .gitignore +21 -0
- app.py +2 -2
- compare_significance.py +1 -1
- server.py +2 -3
- tasks_metadata.json +280 -202
.gitignore
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.eric6project/
|
| 2 |
+
.eric7project/
|
| 3 |
+
.ropeproject/
|
| 4 |
+
.jedi/
|
| 5 |
+
.directory/
|
| 6 |
+
*.pyc
|
| 7 |
+
*.pyo
|
| 8 |
+
*.orig
|
| 9 |
+
*.bak
|
| 10 |
+
*.rej
|
| 11 |
+
*~
|
| 12 |
+
cur/
|
| 13 |
+
tmp/
|
| 14 |
+
__pycache__/
|
| 15 |
+
__pypackages__
|
| 16 |
+
*.DS_Store
|
| 17 |
+
.pytest_cache/
|
| 18 |
+
venv/
|
| 19 |
+
.venv/
|
| 20 |
+
env/
|
| 21 |
+
.env/
|
app.py
CHANGED
|
@@ -8,7 +8,7 @@ from gradio_modal import Modal
|
|
| 8 |
from content import (HEADER_MARKDOWN, LEADERBOARD_TAB_TITLE_MARKDOWN, SUBMISSION_TAB_TITLE_MARKDOWN,
|
| 9 |
MODAL_SUBMIT_MARKDOWN,
|
| 10 |
SUBMISSION_DETAILS_MARKDOWN, RANKING_AFTER_SUBMISSION_MARKDOWN, MORE_DETAILS_MARKDOWN)
|
| 11 |
-
from server import LeaderboardServer
|
| 12 |
|
| 13 |
leaderboard_server = LeaderboardServer()
|
| 14 |
|
|
@@ -157,7 +157,7 @@ with (gr.Blocks(theme=gr.themes.Soft(text_size=text_md), css=custom_css) as main
|
|
| 157 |
with gr.Tab("Overall"):
|
| 158 |
results_table = gr.DataFrame(leaderboard_server.get_leaderboard(), interactive=False, label=None,
|
| 159 |
visible=True, elem_classes="leaderboard-table")
|
| 160 |
-
for c in
|
| 161 |
with gr.Tab(c):
|
| 162 |
results_table = gr.DataFrame(leaderboard_server.get_leaderboard(), interactive=False, label=None,
|
| 163 |
visible=True, elem_classes="leaderboard-table")
|
|
|
|
| 8 |
from content import (HEADER_MARKDOWN, LEADERBOARD_TAB_TITLE_MARKDOWN, SUBMISSION_TAB_TITLE_MARKDOWN,
|
| 9 |
MODAL_SUBMIT_MARKDOWN,
|
| 10 |
SUBMISSION_DETAILS_MARKDOWN, RANKING_AFTER_SUBMISSION_MARKDOWN, MORE_DETAILS_MARKDOWN)
|
| 11 |
+
from server import LeaderboardServer
|
| 12 |
|
| 13 |
leaderboard_server = LeaderboardServer()
|
| 14 |
|
|
|
|
| 157 |
with gr.Tab("Overall"):
|
| 158 |
results_table = gr.DataFrame(leaderboard_server.get_leaderboard(), interactive=False, label=None,
|
| 159 |
visible=True, elem_classes="leaderboard-table")
|
| 160 |
+
for c in leaderboard_server.tasks_categories:
|
| 161 |
with gr.Tab(c):
|
| 162 |
results_table = gr.DataFrame(leaderboard_server.get_leaderboard(), interactive=False, label=None,
|
| 163 |
visible=True, elem_classes="leaderboard-table")
|
compare_significance.py
CHANGED
|
@@ -176,7 +176,7 @@ def read_json(file_path):
|
|
| 176 |
with open(METADATA_FILE, "r") as f:
|
| 177 |
metadata = json.load(f)
|
| 178 |
|
| 179 |
-
all_tasks = list(metadata
|
| 180 |
all_missing_tasks = []
|
| 181 |
for task in all_tasks:
|
| 182 |
if task not in data:
|
|
|
|
| 176 |
with open(METADATA_FILE, "r") as f:
|
| 177 |
metadata = json.load(f)
|
| 178 |
|
| 179 |
+
all_tasks = list(metadata.keys())
|
| 180 |
all_missing_tasks = []
|
| 181 |
for task in all_tasks:
|
| 182 |
if task not in data:
|
server.py
CHANGED
|
@@ -18,8 +18,6 @@ REPO = f"{ORG}/LLM_benchmark_data"
|
|
| 18 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 19 |
TASKS_METADATA_PATH = "./tasks_metadata.json"
|
| 20 |
|
| 21 |
-
categories = ['Czech Math Reasoning', 'General Reasoning', 'Historical', 'Knowledge', 'Language Modeling', 'NER', 'NLI', 'Sentiment', 'Summarization', 'Syntactical Reasoning', 'Topic Classification']
|
| 22 |
-
|
| 23 |
class LeaderboardServer:
|
| 24 |
def __init__(self):
|
| 25 |
self.server_address = REPO
|
|
@@ -27,7 +25,8 @@ class LeaderboardServer:
|
|
| 27 |
self.local_leaderboard = snapshot_download(self.server_address, repo_type=self.repo_type, token=HF_TOKEN,
|
| 28 |
local_dir="./")
|
| 29 |
self.submisssion_id_to_file = {} # Map submission ids to file paths
|
| 30 |
-
self.tasks_metadata = json.load(open(TASKS_METADATA_PATH))
|
|
|
|
| 31 |
self.submission_ids = set()
|
| 32 |
self.fetch_existing_models()
|
| 33 |
self.tournament_results = self.load_tournament_results()
|
|
|
|
| 18 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 19 |
TASKS_METADATA_PATH = "./tasks_metadata.json"
|
| 20 |
|
|
|
|
|
|
|
| 21 |
class LeaderboardServer:
|
| 22 |
def __init__(self):
|
| 23 |
self.server_address = REPO
|
|
|
|
| 25 |
self.local_leaderboard = snapshot_download(self.server_address, repo_type=self.repo_type, token=HF_TOKEN,
|
| 26 |
local_dir="./")
|
| 27 |
self.submisssion_id_to_file = {} # Map submission ids to file paths
|
| 28 |
+
self.tasks_metadata = json.load(open(TASKS_METADATA_PATH))
|
| 29 |
+
self.tasks_categories = {self.tasks_metadata[task]["category"] for task in self.tasks_metadata}
|
| 30 |
self.submission_ids = set()
|
| 31 |
self.fetch_existing_models()
|
| 32 |
self.tournament_results = self.load_tournament_results()
|
tasks_metadata.json
CHANGED
|
@@ -1,204 +1,282 @@
|
|
| 1 |
{
|
| 2 |
-
"
|
| 3 |
-
"
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
"
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
"
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
"
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
"
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
"
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
"
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
"
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
"
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
"
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
"
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
"
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
"
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
"
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
"
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
"
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
"
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
"
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
"
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
"
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
"
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
"
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
"
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
"
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
"
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
"
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
"
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
"
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
}
|
| 204 |
-
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"benczechmark_propaganda_argumentace": {
|
| 3 |
+
"name": "Propaganda - Argumentace",
|
| 4 |
+
"source_url": "https://huggingface.co/datasets/CZLC/propaganda_argumentace",
|
| 5 |
+
"short_name": "P-Argumentace",
|
| 6 |
+
"category": "NLI",
|
| 7 |
+
"abbreviation": "P-ARG"
|
| 8 |
+
},
|
| 9 |
+
"benczechmark_propaganda_fabulace": {
|
| 10 |
+
"name": "Propaganda - Fabulace",
|
| 11 |
+
"source_url": "https://huggingface.co/datasets/CZLC/propaganda_fabulace",
|
| 12 |
+
"short_name": "P-Fabulace",
|
| 13 |
+
"category": "NLI",
|
| 14 |
+
"abbreviation": "P-FAB"
|
| 15 |
+
},
|
| 16 |
+
"benczechmark_propaganda_nazor": {
|
| 17 |
+
"name": "Propaganda - Nazor",
|
| 18 |
+
"source_url": "https://huggingface.co/datasets/CZLC/propaganda_nazor",
|
| 19 |
+
"short_name": "P-Názor",
|
| 20 |
+
"category": "NLI",
|
| 21 |
+
"abbreviation": "P-NAZOR"
|
| 22 |
+
},
|
| 23 |
+
"benczechmark_propaganda_strach": {
|
| 24 |
+
"name": "Propaganda - Strach",
|
| 25 |
+
"source_url": "https://huggingface.co/datasets/CZLC/propaganda_strach",
|
| 26 |
+
"short_name": "P-Strach",
|
| 27 |
+
"category": "NLI",
|
| 28 |
+
"abbreviation": "P-STCH"
|
| 29 |
+
},
|
| 30 |
+
"benczechmark_propaganda_zamereni": {
|
| 31 |
+
"name": "Propaganda - Zamereni",
|
| 32 |
+
"source_url": "https://huggingface.co/datasets/CZLC/propaganda_zamereni",
|
| 33 |
+
"short_name": "P-Zaměření",
|
| 34 |
+
"category": "NLI",
|
| 35 |
+
"abbreviation": "P-MER"
|
| 36 |
+
},
|
| 37 |
+
"benczechmark_propaganda_demonizace": {
|
| 38 |
+
"name": "Propaganda - Demonizace",
|
| 39 |
+
"source_url": "https://huggingface.co/datasets/CZLC/propaganda_demonizace",
|
| 40 |
+
"short_name": "P-Demonizace",
|
| 41 |
+
"category": "NLI",
|
| 42 |
+
"abbreviation": "P-DEMON"
|
| 43 |
+
},
|
| 44 |
+
"benczechmark_propaganda_lokace": {
|
| 45 |
+
"name": "Propaganda - Lokace",
|
| 46 |
+
"source_url": "https://huggingface.co/datasets/CZLC/propaganda_lokace",
|
| 47 |
+
"short_name": "P-Lokace",
|
| 48 |
+
"category": "NLI",
|
| 49 |
+
"abbreviation": "P-LOK"
|
| 50 |
+
},
|
| 51 |
+
"benczechmark_propaganda_relativizace": {
|
| 52 |
+
"name": "Propaganda - Relativizace",
|
| 53 |
+
"source_url": "https://huggingface.co/datasets/CZLC/propaganda_relativizace",
|
| 54 |
+
"short_name": "P-Relativizace",
|
| 55 |
+
"category": "NLI",
|
| 56 |
+
"abbreviation": "P-REL"
|
| 57 |
+
},
|
| 58 |
+
"benczechmark_propaganda_vina": {
|
| 59 |
+
"name": "Propaganda - Vina",
|
| 60 |
+
"source_url": "https://huggingface.co/datasets/CZLC/propaganda_vina",
|
| 61 |
+
"short_name": "P-Vina",
|
| 62 |
+
"category": "NLI",
|
| 63 |
+
"abbreviation": "P-VINA"
|
| 64 |
+
},
|
| 65 |
+
"benczechmark_propaganda_zanr": {
|
| 66 |
+
"name": "Propaganda - Zanr",
|
| 67 |
+
"source_url": "https://huggingface.co/datasets/CZLC/propaganda_zanr",
|
| 68 |
+
"short_name": "P-Žánr",
|
| 69 |
+
"category": "NLI",
|
| 70 |
+
"abbreviation": "P-ZANR"
|
| 71 |
+
},
|
| 72 |
+
"benczechmark_propaganda_emoce": {
|
| 73 |
+
"name": "Propaganda - Emoce",
|
| 74 |
+
"source_url": "https://huggingface.co/datasets/CZLC/propaganda_emoce",
|
| 75 |
+
"short_name": "P-Emoce",
|
| 76 |
+
"category": "NLI",
|
| 77 |
+
"abbreviation": "P-EMOCE"
|
| 78 |
+
},
|
| 79 |
+
"benczechmark_propaganda_nalepkovani": {
|
| 80 |
+
"name": "Propaganda - Nalepkovani",
|
| 81 |
+
"source_url": "https://huggingface.co/datasets/CZLC/propaganda_nalepkovani",
|
| 82 |
+
"short_name": "P-Nalepkování",
|
| 83 |
+
"category": "NLI",
|
| 84 |
+
"abbreviation": "P-LEP"
|
| 85 |
+
},
|
| 86 |
+
"benczechmark_propaganda_rusko": {
|
| 87 |
+
"name": "Propaganda - Rusko",
|
| 88 |
+
"source_url": "https://huggingface.co/datasets/CZLC/propaganda_rusko",
|
| 89 |
+
"short_name": "P-Rusko",
|
| 90 |
+
"category": "NLI",
|
| 91 |
+
"abbreviation": "P-RUS"
|
| 92 |
+
},
|
| 93 |
+
"benczechmark_sentiment_mall": {
|
| 94 |
+
"name": "CzechSentiment MALL",
|
| 95 |
+
"source_url": "https://huggingface.co/datasets/CZLC/mall_sentiment_balanced",
|
| 96 |
+
"short_name": "S-Mall",
|
| 97 |
+
"category": "Sentiment",
|
| 98 |
+
"abbreviation": "S-MALL"
|
| 99 |
+
},
|
| 100 |
+
"benczechmark_sentiment_fb": {
|
| 101 |
+
"name": "CzechSentiment FB",
|
| 102 |
+
"source_url": "https://huggingface.co/datasets/CZLC/fb_sentiment_balanced",
|
| 103 |
+
"short_name": "S-FB",
|
| 104 |
+
"category": "Sentiment",
|
| 105 |
+
"abbreviation": "S-FB"
|
| 106 |
+
},
|
| 107 |
+
"benczechmark_sentiment_csfd": {
|
| 108 |
+
"name": "CzechSentiment CSFD",
|
| 109 |
+
"source_url": "https://huggingface.co/datasets/CZLC/csfd_sentiment_balanced",
|
| 110 |
+
"short_name": "S-CSFD",
|
| 111 |
+
"category": "Sentiment",
|
| 112 |
+
"abbreviation": "S-CSFD"
|
| 113 |
+
},
|
| 114 |
+
"benczechmark_summarization": {
|
| 115 |
+
"name": "SUMECZECH",
|
| 116 |
+
"source_url": "https://huggingface.co/datasets/CZLC/sumeczech_downsampled",
|
| 117 |
+
"short_name": "Summarization",
|
| 118 |
+
"category": "Summarization",
|
| 119 |
+
"abbreviation": "SUM"
|
| 120 |
+
},
|
| 121 |
+
"benczechmark_grammarerrorcorrection": {
|
| 122 |
+
"name": "GrammarErrorCorrection",
|
| 123 |
+
"source_url": "https://huggingface.co/datasets/CZLC/cs_gec",
|
| 124 |
+
"short_name": "Grammar Error Correction",
|
| 125 |
+
"category": "Syntactical Reasoning",
|
| 126 |
+
"abbreviation": "GEC"
|
| 127 |
+
},
|
| 128 |
+
"benczechmark_cs_naturalquestions": {
|
| 129 |
+
"name": "NaturalQuestions-CZ",
|
| 130 |
+
"source_url": "https://huggingface.co/datasets/CZLC/cs_naturalquestions",
|
| 131 |
+
"short_name": "CS Natural Questions",
|
| 132 |
+
"category": "Knowledge",
|
| 133 |
+
"abbreviation": "NQ"
|
| 134 |
+
},
|
| 135 |
+
"benczechmark_cs_sqad32": {
|
| 136 |
+
"name": "SQAD3.2",
|
| 137 |
+
"source_url": "https://huggingface.co/datasets/CZLC/SQAD_3.2",
|
| 138 |
+
"short_name": "CS SQAD 3.2",
|
| 139 |
+
"category": "Knowledge",
|
| 140 |
+
"abbreviation": "SQAD32"
|
| 141 |
+
},
|
| 142 |
+
"benczechmark_cs_triviaQA": {
|
| 143 |
+
"name": "TriviaQA-CZ",
|
| 144 |
+
"source_url": "https://huggingface.co/datasets/CZLC/cs_triviaqa",
|
| 145 |
+
"short_name": "CS TriviaQA",
|
| 146 |
+
"category": "Knowledge",
|
| 147 |
+
"abbreviation": "TQA"
|
| 148 |
+
},
|
| 149 |
+
"benczechmark_csfever_nli": {
|
| 150 |
+
"name": "CSFEVER",
|
| 151 |
+
"source_url": "https://huggingface.co/datasets/CZLC/ctu-aic/csfever_nli",
|
| 152 |
+
"short_name": "CSFever NLI",
|
| 153 |
+
"category": "NLI",
|
| 154 |
+
"abbreviation": "CFR"
|
| 155 |
+
},
|
| 156 |
+
"benczechmark_ctkfacts_nli": {
|
| 157 |
+
"name": "CTKFACTS",
|
| 158 |
+
"source_url": "https://huggingface.co/datasets/CZLC/ctu-aic/ctkfacts_nli",
|
| 159 |
+
"short_name": "CTKFacts NLI",
|
| 160 |
+
"category": "NLI",
|
| 161 |
+
"abbreviation": "CTK"
|
| 162 |
+
},
|
| 163 |
+
"benczechmark_cs_ner": {
|
| 164 |
+
"name": "CZECH NER CORPUS 2.0",
|
| 165 |
+
"source_url": "https://huggingface.co/datasets/CZLC/fewshot-goes-multilingual/cs_czech-named-entity-corpus_2.0",
|
| 166 |
+
"short_name": "CS NER",
|
| 167 |
+
"category": "NER",
|
| 168 |
+
"abbreviation": "CZNERC"
|
| 169 |
+
},
|
| 170 |
+
"benczechmark_hellaswag": {
|
| 171 |
+
"name": "HellaSwag-CZ",
|
| 172 |
+
"source_url": "https://huggingface.co/datasets/CZLC/cs_hellaswag",
|
| 173 |
+
"short_name": "HellaSwag",
|
| 174 |
+
"category": "Language Modeling",
|
| 175 |
+
"abbreviation": "HASG"
|
| 176 |
+
},
|
| 177 |
+
"benczechmark_histcorpus": {
|
| 178 |
+
"name": "Historical Corpus",
|
| 179 |
+
"source_url": "https://huggingface.co/datasets/CZLC/benczechmark_histcorpus",
|
| 180 |
+
"short_name": "HistCorpus",
|
| 181 |
+
"category": "Language Modeling",
|
| 182 |
+
"abbreviation": "HIST"
|
| 183 |
+
},
|
| 184 |
+
"benczechmark_klokan_qa": {
|
| 185 |
+
"name": "Klokan QA",
|
| 186 |
+
"source_url": "https://huggingface.co/datasets/hynky/klokan-qa",
|
| 187 |
+
"short_name": "Klokan QA",
|
| 188 |
+
"category": "Czech Math Reasoning",
|
| 189 |
+
"abbreviation": "KQA"
|
| 190 |
+
},
|
| 191 |
+
"benczechmark_cs_court_decisions_ner": {
|
| 192 |
+
"name": "Czech Court Decisions",
|
| 193 |
+
"source_url": "https://huggingface.co/datasets/CZLC/fewshot-goes-multilingual/cs_czech-court-decisions-ner",
|
| 194 |
+
"short_name": "CS Court Decisions NER",
|
| 195 |
+
"category": "NER",
|
| 196 |
+
"abbreviation": "CCDNER"
|
| 197 |
+
},
|
| 198 |
+
"benczechmark_umimeto_biology": {
|
| 199 |
+
"name": "Umimeto.cz - Biology",
|
| 200 |
+
"source_url": "https://huggingface.co/datasets/CZLC/umimeto-biology",
|
| 201 |
+
"short_name": "Umimeto.cz - Biology",
|
| 202 |
+
"category": "General Reasoning",
|
| 203 |
+
"abbreviation": "UT-BIO"
|
| 204 |
+
},
|
| 205 |
+
"benczechmark_umimeto_chemistry": {
|
| 206 |
+
"name": "Umimeto.cz - Chemistry",
|
| 207 |
+
"source_url": "https://huggingface.co/datasets/CZLC/umimeto-chemistry",
|
| 208 |
+
"short_name": "Umimeto.cz - Chemistry",
|
| 209 |
+
"category": "General Reasoning",
|
| 210 |
+
"abbreviation": "UT-CHEM"
|
| 211 |
+
},
|
| 212 |
+
"benczechmark_umimeto_czech": {
|
| 213 |
+
"name": "Umimeto.cz - Czech Language",
|
| 214 |
+
"source_url": "https://huggingface.co/datasets/CZLC/umimeto-czech",
|
| 215 |
+
"short_name": "Umimeto.cz - Czech",
|
| 216 |
+
"category": "General Reasoning",
|
| 217 |
+
"abbreviation": "UT-CZEL"
|
| 218 |
+
},
|
| 219 |
+
"benczechmark_umimeto_history": {
|
| 220 |
+
"name": "Umimeto.cz - History",
|
| 221 |
+
"source_url": "https://huggingface.co/datasets/CZLC/umimeto-history",
|
| 222 |
+
"short_name": "Umimeto.cz - History",
|
| 223 |
+
"category": "General Reasoning",
|
| 224 |
+
"abbreviation": "UT-HIST"
|
| 225 |
+
},
|
| 226 |
+
"benczechmark_umimeto_informatics": {
|
| 227 |
+
"name": "Umimeto.cz - Informatics",
|
| 228 |
+
"source_url": "https://huggingface.co/datasets/CZLC/umimeto-informatics",
|
| 229 |
+
"short_name": "Umimeto.cz - Informatics",
|
| 230 |
+
"category": "General Reasoning",
|
| 231 |
+
"abbreviation": "UT-IT"
|
| 232 |
+
},
|
| 233 |
+
"benczechmark_umimeto_math": {
|
| 234 |
+
"name": "Umimeto.cz - Math",
|
| 235 |
+
"source_url": "https://huggingface.co/datasets/CZLC/umimeto-math",
|
| 236 |
+
"short_name": "Umimeto.cz - Math",
|
| 237 |
+
"category": "Czech Math Reasoning",
|
| 238 |
+
"abbreviation": "UT-MATH"
|
| 239 |
+
},
|
| 240 |
+
"benczechmark_umimeto_physics": {
|
| 241 |
+
"name": "Umimeto.cz - Physics",
|
| 242 |
+
"source_url": "https://huggingface.co/datasets/CZLC/umimeto-physics",
|
| 243 |
+
"short_name": "Umimeto.cz - Physics",
|
| 244 |
+
"category": "General Reasoning",
|
| 245 |
+
"abbreviation": "UT-PHYS"
|
| 246 |
+
},
|
| 247 |
+
"benczechmark_cermat_czmath_mc": {
|
| 248 |
+
"name": "CERMAT - Czech Math - MC",
|
| 249 |
+
"source_url": "https://huggingface.co/datasets/CZLC/cermat_math_mc",
|
| 250 |
+
"short_name": "Cermat Czech Math MC",
|
| 251 |
+
"category": "Czech Math Reasoning",
|
| 252 |
+
"abbreviation": "CCM-MC"
|
| 253 |
+
},
|
| 254 |
+
"benczechmark_cermat_czmath_open": {
|
| 255 |
+
"name": "CERMAT - Czech Math - OPEN",
|
| 256 |
+
"source_url": "https://huggingface.co/datasets/CZLC/cermat_math_open",
|
| 257 |
+
"short_name": "Cermat Czech Math Open",
|
| 258 |
+
"category": "Czech Math Reasoning",
|
| 259 |
+
"abbreviation": "CCM-OPEN"
|
| 260 |
+
},
|
| 261 |
+
"benczechmark_cermat_czech_tf": {
|
| 262 |
+
"name": "CERMAT - Czech Language - TF",
|
| 263 |
+
"source_url": "https://huggingface.co/datasets/CZLC/cermat_czech_tf",
|
| 264 |
+
"short_name": "Cermat Czech Language TF",
|
| 265 |
+
"category": "General Reasoning",
|
| 266 |
+
"abbreviation": "CCL-TF"
|
| 267 |
+
},
|
| 268 |
+
"benczechmark_cermat_czech_mc": {
|
| 269 |
+
"name": "CERMAT - Czech Language - MC",
|
| 270 |
+
"source_url": "https://huggingface.co/datasets/CZLC/cermat_czech_mc",
|
| 271 |
+
"short_name": "Cermat Czech Language MC",
|
| 272 |
+
"category": "General Reasoning",
|
| 273 |
+
"abbreviation": "CCL-MC"
|
| 274 |
+
},
|
| 275 |
+
"benczechmark_cermat_czech_open": {
|
| 276 |
+
"name": "CERMAT - Czech Language - OPEN",
|
| 277 |
+
"source_url": "https://huggingface.co/datasets/CZLC/cermat_czech_open",
|
| 278 |
+
"short_name": "Cermat Czech Language Open",
|
| 279 |
+
"category": "General Reasoning",
|
| 280 |
+
"abbreviation": "CCL-OPEN"
|
| 281 |
}
|
| 282 |
+
}
|