Joschka Strueber commited on
Commit
d2471f2
·
1 Parent(s): 1072829

[Fix, Add] fix bug with metric names

Browse files
Files changed (2) hide show
  1. app.py +2 -1
  2. src/similarity.py +3 -3
app.py CHANGED
@@ -118,7 +118,7 @@ with gr.Blocks(title="LLM Similarity Analyzer") as demo:
118
  model_dropdown = gr.Dropdown(
119
  choices=get_leaderboard_models_cached(),
120
  label="Select Models",
121
- value=["HuggingFaceTB/SmolLM2-1.7B-Instruct", "meta_llama/Llama-3.2-3B-Instruct", "Qwen/Qwen2.5-72B-Instruct"],
122
  multiselect=True,
123
  filterable=True,
124
  allow_custom_value=False,
@@ -158,6 +158,7 @@ with gr.Blocks(title="LLM Similarity Analyzer") as demo:
158
  - **Models**: Open LLM Leaderboard models \n
159
  - Every model evaluation is gated on Hugging Face and access has to be requested. \n
160
  - We requested access for the most popular models, but some may be missing. \n
 
161
  - **Metrics**: CAPA (probabilistic), CAPA (deterministic), Error Consistency""")
162
 
163
  if __name__ == "__main__":
 
118
  model_dropdown = gr.Dropdown(
119
  choices=get_leaderboard_models_cached(),
120
  label="Select Models",
121
+ value=["HuggingFaceTB/SmolLM2-1.7B-Instruct", "tiiuae/Falcon3-7B-Instruct", "google/gemma-2-27b-it", "Qwen/Qwen2.5-72B-Instruct"],
122
  multiselect=True,
123
  filterable=True,
124
  allow_custom_value=False,
 
158
  - **Models**: Open LLM Leaderboard models \n
159
  - Every model evaluation is gated on Hugging Face and access has to be requested. \n
160
  - We requested access for the most popular models, but some may be missing. \n
161
+ - Notably, loading data is not possible for many meta-llama and gemma models.
162
  - **Metrics**: CAPA (probabilistic), CAPA (deterministic), Error Consistency""")
163
 
164
  if __name__ == "__main__":
src/similarity.py CHANGED
@@ -31,9 +31,9 @@ def compute_similarity(metric: Metrics, outputs_a: list[np.array], outputs_b: li
31
 
32
  def compute_pairwise_similarities(metric_name: str, probs: list[list[np.array]], gts: list[list[int]]) -> np.array:
33
  # Select chosen metric
34
- if metric_name == "Kappa_p (prob.)":
35
  metric = CAPA()
36
- elif metric_name == "Kappa_p (det.)":
37
  metric = CAPA(prob=False)
38
  # Convert probabilities to one-hot
39
  probs = [[one_hot(p) for p in model_probs] for model_probs in probs]
@@ -51,7 +51,7 @@ def compute_pairwise_similarities(metric_name: str, probs: list[list[np.array]],
51
  gt_b = gts[j]
52
 
53
  # Format softmax outputs
54
- if metric_name == "Kappa_p (prob.)":
55
  outputs_a = [softmax(logits) for logits in outputs_a]
56
  outputs_b = [softmax(logits) for logits in outputs_b]
57
 
 
31
 
32
  def compute_pairwise_similarities(metric_name: str, probs: list[list[np.array]], gts: list[list[int]]) -> np.array:
33
  # Select chosen metric
34
+ if metric_name == "CAPA":
35
  metric = CAPA()
36
+ elif metric_name == "CAPA (det.)":
37
  metric = CAPA(prob=False)
38
  # Convert probabilities to one-hot
39
  probs = [[one_hot(p) for p in model_probs] for model_probs in probs]
 
51
  gt_b = gts[j]
52
 
53
  # Format softmax outputs
54
+ if metric_name == "CAPA":
55
  outputs_a = [softmax(logits) for logits in outputs_a]
56
  outputs_b = [softmax(logits) for logits in outputs_b]
57