Spaces:
Running
Running
Joschka Strueber
commited on
Commit
·
5d4059c
1
Parent(s):
238bffb
[Add] filter gated models
Browse files- app.py +5 -3
- src/dataloading.py +13 -7
app.py
CHANGED
@@ -87,7 +87,7 @@ def update_datasets_based_on_models(selected_models, current_dataset):
|
|
87 |
return gr.update(choices=[], value=None)
|
88 |
|
89 |
links_markdown = """
|
90 |
-
[📄 Paper](https://arxiv.org/
|
91 |
[☯ Homepage](https://model-similarity.github.io/) |
|
92 |
[🐱 Code](https://github.com/model-similarity/lm-similarity) |
|
93 |
[🐍 pip install lm-sim](https://pypi.org/project/lm-sim/) |
|
@@ -103,6 +103,7 @@ with gr.Blocks(title="LLM Similarity Analyzer") as demo:
|
|
103 |
dataset_dropdown = gr.Dropdown(
|
104 |
choices=get_leaderboard_datasets(None),
|
105 |
label="Select Dataset",
|
|
|
106 |
filterable=True,
|
107 |
interactive=True,
|
108 |
allow_custom_value=False,
|
@@ -117,6 +118,7 @@ with gr.Blocks(title="LLM Similarity Analyzer") as demo:
|
|
117 |
model_dropdown = gr.Dropdown(
|
118 |
choices=get_leaderboard_models_cached(),
|
119 |
label="Select Models",
|
|
|
120 |
multiselect=True,
|
121 |
filterable=True,
|
122 |
allow_custom_value=False,
|
@@ -154,8 +156,8 @@ with gr.Blocks(title="LLM Similarity Analyzer") as demo:
|
|
154 |
- **Datasets**: [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard#/) benchmark datasets \n
|
155 |
- Some datasets are not multiple-choice - for these, the metrics are not applicable. \n
|
156 |
- **Models**: Open LLM Leaderboard models \n
|
157 |
-
- Every model is gated on Hugging Face and access has to be requested. \n
|
158 |
-
- We requested access
|
159 |
- **Metrics**: Kappa_p (probabilistic), Kappa_p (deterministic), Error Consistency""")
|
160 |
|
161 |
if __name__ == "__main__":
|
|
|
87 |
return gr.update(choices=[], value=None)
|
88 |
|
89 |
links_markdown = """
|
90 |
+
[📄 Paper](https://arxiv.org/abs/6181841) |
|
91 |
[☯ Homepage](https://model-similarity.github.io/) |
|
92 |
[🐱 Code](https://github.com/model-similarity/lm-similarity) |
|
93 |
[🐍 pip install lm-sim](https://pypi.org/project/lm-sim/) |
|
|
|
103 |
dataset_dropdown = gr.Dropdown(
|
104 |
choices=get_leaderboard_datasets(None),
|
105 |
label="Select Dataset",
|
106 |
+
value="mmlu_pro",
|
107 |
filterable=True,
|
108 |
interactive=True,
|
109 |
allow_custom_value=False,
|
|
|
118 |
model_dropdown = gr.Dropdown(
|
119 |
choices=get_leaderboard_models_cached(),
|
120 |
label="Select Models",
|
121 |
+
value=["Qwen/Qwen2.5-"],
|
122 |
multiselect=True,
|
123 |
filterable=True,
|
124 |
allow_custom_value=False,
|
|
|
156 |
- **Datasets**: [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard#/) benchmark datasets \n
|
157 |
- Some datasets are not multiple-choice - for these, the metrics are not applicable. \n
|
158 |
- **Models**: Open LLM Leaderboard models \n
|
159 |
+
- Every model evaluation is gated on Hugging Face and access has to be requested. \n
|
160 |
+
- We requested access for the most popular models, but some may be missing. \n
|
161 |
- **Metrics**: Kappa_p (probabilistic), Kappa_p (deterministic), Error Consistency""")
|
162 |
|
163 |
if __name__ == "__main__":
|
src/dataloading.py
CHANGED
@@ -14,13 +14,19 @@ def get_leaderboard_models():
|
|
14 |
models = []
|
15 |
for dataset in datasets:
|
16 |
if dataset.id.endswith("-details"):
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
return sorted(models)
|
26 |
|
|
|
14 |
models = []
|
15 |
for dataset in datasets:
|
16 |
if dataset.id.endswith("-details"):
|
17 |
+
dataset_id = dataset.id
|
18 |
+
try:
|
19 |
+
# Check if the dataset can be loaded
|
20 |
+
check_gated = datasets.get_dataset_config_names(dataset_id)
|
21 |
+
# Format: "open-llm-leaderboard/<provider>__<model_name>-details"
|
22 |
+
model_part = dataset.id.split("/")[-1].replace("-details", "")
|
23 |
+
if "__" in model_part:
|
24 |
+
provider, model = model_part.split("__", 1)
|
25 |
+
models.append(f"{provider}/{model}")
|
26 |
+
else:
|
27 |
+
models.append(model_part)
|
28 |
+
except Exception as e:
|
29 |
+
pass
|
30 |
|
31 |
return sorted(models)
|
32 |
|