Spaces:
Running
Running
Joschka Strueber
commited on
Commit
·
d2471f2
1
Parent(s):
1072829
[Fix, Add] fix bug with metric names
Browse files- app.py +2 -1
- src/similarity.py +3 -3
app.py
CHANGED
@@ -118,7 +118,7 @@ with gr.Blocks(title="LLM Similarity Analyzer") as demo:
|
|
118 |
model_dropdown = gr.Dropdown(
|
119 |
choices=get_leaderboard_models_cached(),
|
120 |
label="Select Models",
|
121 |
-
value=["HuggingFaceTB/SmolLM2-1.7B-Instruct", "
|
122 |
multiselect=True,
|
123 |
filterable=True,
|
124 |
allow_custom_value=False,
|
@@ -158,6 +158,7 @@ with gr.Blocks(title="LLM Similarity Analyzer") as demo:
|
|
158 |
- **Models**: Open LLM Leaderboard models \n
|
159 |
- Every model evaluation is gated on Hugging Face and access has to be requested. \n
|
160 |
- We requested access for the most popular models, but some may be missing. \n
|
|
|
161 |
- **Metrics**: CAPA (probabilistic), CAPA (deterministic), Error Consistency""")
|
162 |
|
163 |
if __name__ == "__main__":
|
|
|
118 |
model_dropdown = gr.Dropdown(
|
119 |
choices=get_leaderboard_models_cached(),
|
120 |
label="Select Models",
|
121 |
+
value=["HuggingFaceTB/SmolLM2-1.7B-Instruct", "tiiuae/Falcon3-7B-Instruct", "google/gemma-2-27b-it", "Qwen/Qwen2.5-72B-Instruct"],
|
122 |
multiselect=True,
|
123 |
filterable=True,
|
124 |
allow_custom_value=False,
|
|
|
158 |
- **Models**: Open LLM Leaderboard models \n
|
159 |
- Every model evaluation is gated on Hugging Face and access has to be requested. \n
|
160 |
- We requested access for the most popular models, but some may be missing. \n
|
161 |
+
- Notably, loading data is not possible for many meta-llama and gemma models.
|
162 |
- **Metrics**: CAPA (probabilistic), CAPA (deterministic), Error Consistency""")
|
163 |
|
164 |
if __name__ == "__main__":
|
src/similarity.py
CHANGED
@@ -31,9 +31,9 @@ def compute_similarity(metric: Metrics, outputs_a: list[np.array], outputs_b: li
|
|
31 |
|
32 |
def compute_pairwise_similarities(metric_name: str, probs: list[list[np.array]], gts: list[list[int]]) -> np.array:
|
33 |
# Select chosen metric
|
34 |
-
if metric_name == "
|
35 |
metric = CAPA()
|
36 |
-
elif metric_name == "
|
37 |
metric = CAPA(prob=False)
|
38 |
# Convert probabilities to one-hot
|
39 |
probs = [[one_hot(p) for p in model_probs] for model_probs in probs]
|
@@ -51,7 +51,7 @@ def compute_pairwise_similarities(metric_name: str, probs: list[list[np.array]],
|
|
51 |
gt_b = gts[j]
|
52 |
|
53 |
# Format softmax outputs
|
54 |
-
if metric_name == "
|
55 |
outputs_a = [softmax(logits) for logits in outputs_a]
|
56 |
outputs_b = [softmax(logits) for logits in outputs_b]
|
57 |
|
|
|
31 |
|
32 |
def compute_pairwise_similarities(metric_name: str, probs: list[list[np.array]], gts: list[list[int]]) -> np.array:
|
33 |
# Select chosen metric
|
34 |
+
if metric_name == "CAPA":
|
35 |
metric = CAPA()
|
36 |
+
elif metric_name == "CAPA (det.)":
|
37 |
metric = CAPA(prob=False)
|
38 |
# Convert probabilities to one-hot
|
39 |
probs = [[one_hot(p) for p in model_probs] for model_probs in probs]
|
|
|
51 |
gt_b = gts[j]
|
52 |
|
53 |
# Format softmax outputs
|
54 |
+
if metric_name == "CAPA":
|
55 |
outputs_a = [softmax(logits) for logits in outputs_a]
|
56 |
outputs_b = [softmax(logits) for logits in outputs_b]
|
57 |
|