Joschka Strueber commited on
Commit
5d4059c
·
1 Parent(s): 238bffb

[Add] filter gated models

Browse files
Files changed (2) hide show
  1. app.py +5 -3
  2. src/dataloading.py +13 -7
app.py CHANGED
@@ -87,7 +87,7 @@ def update_datasets_based_on_models(selected_models, current_dataset):
87
  return gr.update(choices=[], value=None)
88
 
89
  links_markdown = """
90
- [📄 Paper](https://arxiv.org/pdf/%3CARXIV%20PAPER%20ID%3E.pdf)   |  
91
  [☯ Homepage](https://model-similarity.github.io/)   |  
92
  [🐱 Code](https://github.com/model-similarity/lm-similarity)   |  
93
  [🐍 pip install lm-sim](https://pypi.org/project/lm-sim/)   |  
@@ -103,6 +103,7 @@ with gr.Blocks(title="LLM Similarity Analyzer") as demo:
103
  dataset_dropdown = gr.Dropdown(
104
  choices=get_leaderboard_datasets(None),
105
  label="Select Dataset",
 
106
  filterable=True,
107
  interactive=True,
108
  allow_custom_value=False,
@@ -117,6 +118,7 @@ with gr.Blocks(title="LLM Similarity Analyzer") as demo:
117
  model_dropdown = gr.Dropdown(
118
  choices=get_leaderboard_models_cached(),
119
  label="Select Models",
 
120
  multiselect=True,
121
  filterable=True,
122
  allow_custom_value=False,
@@ -154,8 +156,8 @@ with gr.Blocks(title="LLM Similarity Analyzer") as demo:
154
  - **Datasets**: [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard#/) benchmark datasets \n
155
  - Some datasets are not multiple-choice - for these, the metrics are not applicable. \n
156
  - **Models**: Open LLM Leaderboard models \n
157
- - Every model is gated on Hugging Face and access has to be requested. \n
158
- - We requested access to the most popular models, but some may be missing. \n
159
  - **Metrics**: Kappa_p (probabilistic), Kappa_p (deterministic), Error Consistency""")
160
 
161
  if __name__ == "__main__":
 
87
  return gr.update(choices=[], value=None)
88
 
89
  links_markdown = """
90
+ [📄 Paper](https://arxiv.org/abs/6181841)   |  
91
  [☯ Homepage](https://model-similarity.github.io/)   |  
92
  [🐱 Code](https://github.com/model-similarity/lm-similarity)   |  
93
  [🐍 pip install lm-sim](https://pypi.org/project/lm-sim/)   |  
 
103
  dataset_dropdown = gr.Dropdown(
104
  choices=get_leaderboard_datasets(None),
105
  label="Select Dataset",
106
+ value="mmlu_pro",
107
  filterable=True,
108
  interactive=True,
109
  allow_custom_value=False,
 
118
  model_dropdown = gr.Dropdown(
119
  choices=get_leaderboard_models_cached(),
120
  label="Select Models",
121
+ value=["Qwen/Qwen2.5-"],
122
  multiselect=True,
123
  filterable=True,
124
  allow_custom_value=False,
 
156
  - **Datasets**: [Open LLM Leaderboard v2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard#/) benchmark datasets \n
157
  - Some datasets are not multiple-choice - for these, the metrics are not applicable. \n
158
  - **Models**: Open LLM Leaderboard models \n
159
+ - Every model evaluation is gated on Hugging Face and access has to be requested. \n
160
+ - We requested access for the most popular models, but some may be missing. \n
161
  - **Metrics**: Kappa_p (probabilistic), Kappa_p (deterministic), Error Consistency""")
162
 
163
  if __name__ == "__main__":
src/dataloading.py CHANGED
@@ -14,13 +14,19 @@ def get_leaderboard_models():
14
  models = []
15
  for dataset in datasets:
16
  if dataset.id.endswith("-details"):
17
- # Format: "open-llm-leaderboard/<provider>__<model_name>-details"
18
- model_part = dataset.id.split("/")[-1].replace("-details", "")
19
- if "__" in model_part:
20
- provider, model = model_part.split("__", 1)
21
- models.append(f"{provider}/{model}")
22
- else:
23
- models.append(model_part)
 
 
 
 
 
 
24
 
25
  return sorted(models)
26
 
 
14
  models = []
15
  for dataset in datasets:
16
  if dataset.id.endswith("-details"):
17
+ dataset_id = dataset.id
18
+ try:
19
+ # Check if the dataset can be loaded
20
+ check_gated = datasets.get_dataset_config_names(dataset_id)
21
+ # Format: "open-llm-leaderboard/<provider>__<model_name>-details"
22
+ model_part = dataset.id.split("/")[-1].replace("-details", "")
23
+ if "__" in model_part:
24
+ provider, model = model_part.split("__", 1)
25
+ models.append(f"{provider}/{model}")
26
+ else:
27
+ models.append(model_part)
28
+ except Exception as e:
29
+ pass
30
 
31
  return sorted(models)
32