Spaces:
Running
Running
Joschka Strueber
commited on
Commit
·
00b5438
1
Parent(s):
8851661
[Fix] convert logits to softmax for kappa_p
Browse files- src/similarity.py +30 -22
- src/utils.py +11 -0
src/similarity.py
CHANGED
@@ -1,8 +1,9 @@
|
|
1 |
import numpy as np
|
2 |
|
3 |
-
from src.dataloading import load_run_data
|
4 |
from lmsim.metrics import Metrics, Kappa_p, EC
|
5 |
|
|
|
|
|
6 |
|
7 |
def load_data_and_compute_similarities(models: list[str], dataset: str, metric_name: str) -> np.array:
|
8 |
# Load data
|
@@ -18,22 +19,12 @@ def load_data_and_compute_similarities(models: list[str], dataset: str, metric_n
|
|
18 |
return similarities
|
19 |
|
20 |
|
21 |
-
def compute_similarity(metric: Metrics,
|
22 |
# Check that the models have the same number of responses
|
23 |
-
assert len(
|
24 |
-
|
25 |
-
#
|
26 |
-
|
27 |
-
output_b = []
|
28 |
-
gt = []
|
29 |
-
for i in range(len(probs_a)):
|
30 |
-
if gt_a[i] == gt_b[i]:
|
31 |
-
output_a.append(probs_a[i])
|
32 |
-
output_b.append(probs_b[i])
|
33 |
-
gt.append(gt_a[i])
|
34 |
-
|
35 |
-
# Placeholder similarity value
|
36 |
-
similarity = metric.compute_k(output_a, output_b, gt)
|
37 |
|
38 |
return similarity
|
39 |
|
@@ -54,11 +45,28 @@ def compute_pairwise_similarities(metric_name: str, probs: list[list[np.array]],
|
|
54 |
similarities = np.zeros((len(probs), len(probs)))
|
55 |
for i in range(len(probs)):
|
56 |
for j in range(i, len(probs)):
|
57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
similarities[j, i] = similarities[i, j]
|
59 |
return similarities
|
60 |
-
|
61 |
-
def one_hot(probs: np.array) -> np.array:
|
62 |
-
one_hot = np.zeros_like(probs)
|
63 |
-
one_hot[np.argmax(probs)] = 1
|
64 |
-
return one_hot
|
|
|
1 |
import numpy as np
|
2 |
|
|
|
3 |
from lmsim.metrics import Metrics, Kappa_p, EC
|
4 |
|
5 |
+
from src.dataloading import load_run_data
|
6 |
+
from src.utils import softmax, one_hot
|
7 |
|
8 |
def load_data_and_compute_similarities(models: list[str], dataset: str, metric_name: str) -> np.array:
|
9 |
# Load data
|
|
|
19 |
return similarities
|
20 |
|
21 |
|
22 |
+
def compute_similarity(metric: Metrics, outputs_a: list[np.array], outputs_b: list[np.array], gt: list[int],) -> float:
|
23 |
# Check that the models have the same number of responses
|
24 |
+
assert len(outputs_a) == len(outputs_b) == len(gt), f"Models must have the same number of responses: {len(outputs_a)} != {len(outputs_b)} != {len(gt)}"
|
25 |
+
|
26 |
+
# Compute similarity values
|
27 |
+
similarity = metric.compute_k(outputs_a, outputs_b, gt)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
return similarity
|
30 |
|
|
|
45 |
similarities = np.zeros((len(probs), len(probs)))
|
46 |
for i in range(len(probs)):
|
47 |
for j in range(i, len(probs)):
|
48 |
+
outputs_a = probs[i]
|
49 |
+
outputs_b = probs[j]
|
50 |
+
gt_a = gts[i]
|
51 |
+
gt_b = gts[j]
|
52 |
+
|
53 |
+
# Format softmax outputs
|
54 |
+
if metric_name == "Kappa_p (prob.)":
|
55 |
+
outputs_a = [softmax(logits) for logits in outputs_a]
|
56 |
+
outputs_b = [softmax(logits) for logits in outputs_b]
|
57 |
+
|
58 |
+
# Assert that the ground truth index is the same
|
59 |
+
indices_to_remove = []
|
60 |
+
if gt_a != gt_b:
|
61 |
+
for idx, (a, b) in enumerate(zip(gt_a, gt_b)):
|
62 |
+
if a != b:
|
63 |
+
indices_to_remove.append(idx)
|
64 |
+
for idx in sorted(indices_to_remove, reverse=True):
|
65 |
+
del outputs_a[idx]
|
66 |
+
del outputs_b[idx]
|
67 |
+
del gt_a[idx]
|
68 |
+
del gt_b[idx]
|
69 |
+
|
70 |
+
similarities[i, j] = compute_similarity(metric, outputs_a, outputs_b, gt_a)
|
71 |
similarities[j, i] = similarities[i, j]
|
72 |
return similarities
|
|
|
|
|
|
|
|
|
|
src/utils.py
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
|
3 |
+
|
4 |
+
def softmax(logits: np.ndarray) -> np.ndarray:
|
5 |
+
exp_logits = np.exp(logits - np.max(logits))
|
6 |
+
return exp_logits / exp_logits.sum(axis=0)
|
7 |
+
|
8 |
+
def one_hot(probs: np.array) -> np.array:
|
9 |
+
one_hot = np.zeros_like(probs)
|
10 |
+
one_hot[np.argmax(probs)] = 1
|
11 |
+
return one_hot
|