Joschka Strueber commited on
Commit
00b5438
·
1 Parent(s): 8851661

[Fix] convert logits to softmax for kappa_p

Browse files
Files changed (2) hide show
  1. src/similarity.py +30 -22
  2. src/utils.py +11 -0
src/similarity.py CHANGED
@@ -1,8 +1,9 @@
1
  import numpy as np
2
 
3
- from src.dataloading import load_run_data
4
  from lmsim.metrics import Metrics, Kappa_p, EC
5
 
 
 
6
 
7
  def load_data_and_compute_similarities(models: list[str], dataset: str, metric_name: str) -> np.array:
8
  # Load data
@@ -18,22 +19,12 @@ def load_data_and_compute_similarities(models: list[str], dataset: str, metric_n
18
  return similarities
19
 
20
 
21
- def compute_similarity(metric: Metrics, probs_a: list[np.array], gt_a: list[int], probs_b: list[np.array], gt_b: list[int]) -> float:
22
  # Check that the models have the same number of responses
23
- assert len(probs_a) == len(probs_b), f"Models must have the same number of responses: {len(probs_a)} != {len(probs_b)}"
24
-
25
- # Only keep responses where the ground truth is the same
26
- output_a = []
27
- output_b = []
28
- gt = []
29
- for i in range(len(probs_a)):
30
- if gt_a[i] == gt_b[i]:
31
- output_a.append(probs_a[i])
32
- output_b.append(probs_b[i])
33
- gt.append(gt_a[i])
34
-
35
- # Placeholder similarity value
36
- similarity = metric.compute_k(output_a, output_b, gt)
37
 
38
  return similarity
39
 
@@ -54,11 +45,28 @@ def compute_pairwise_similarities(metric_name: str, probs: list[list[np.array]],
54
  similarities = np.zeros((len(probs), len(probs)))
55
  for i in range(len(probs)):
56
  for j in range(i, len(probs)):
57
- similarities[i, j] = compute_similarity(metric, probs[i], gts[i], probs[j], gts[j])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  similarities[j, i] = similarities[i, j]
59
  return similarities
60
-
61
- def one_hot(probs: np.array) -> np.array:
62
- one_hot = np.zeros_like(probs)
63
- one_hot[np.argmax(probs)] = 1
64
- return one_hot
 
1
  import numpy as np
2
 
 
3
  from lmsim.metrics import Metrics, Kappa_p, EC
4
 
5
+ from src.dataloading import load_run_data
6
+ from src.utils import softmax, one_hot
7
 
8
  def load_data_and_compute_similarities(models: list[str], dataset: str, metric_name: str) -> np.array:
9
  # Load data
 
19
  return similarities
20
 
21
 
22
+ def compute_similarity(metric: Metrics, outputs_a: list[np.array], outputs_b: list[np.array], gt: list[int],) -> float:
23
  # Check that the models have the same number of responses
24
+ assert len(outputs_a) == len(outputs_b) == len(gt), f"Models must have the same number of responses: {len(outputs_a)} != {len(outputs_b)} != {len(gt)}"
25
+
26
+ # Compute similarity values
27
+ similarity = metric.compute_k(outputs_a, outputs_b, gt)
 
 
 
 
 
 
 
 
 
 
28
 
29
  return similarity
30
 
 
45
  similarities = np.zeros((len(probs), len(probs)))
46
  for i in range(len(probs)):
47
  for j in range(i, len(probs)):
48
+ outputs_a = probs[i]
49
+ outputs_b = probs[j]
50
+ gt_a = gts[i]
51
+ gt_b = gts[j]
52
+
53
+ # Format softmax outputs
54
+ if metric_name == "Kappa_p (prob.)":
55
+ outputs_a = [softmax(logits) for logits in outputs_a]
56
+ outputs_b = [softmax(logits) for logits in outputs_b]
57
+
58
+ # Assert that the ground truth index is the same
59
+ indices_to_remove = []
60
+ if gt_a != gt_b:
61
+ for idx, (a, b) in enumerate(zip(gt_a, gt_b)):
62
+ if a != b:
63
+ indices_to_remove.append(idx)
64
+ for idx in sorted(indices_to_remove, reverse=True):
65
+ del outputs_a[idx]
66
+ del outputs_b[idx]
67
+ del gt_a[idx]
68
+ del gt_b[idx]
69
+
70
+ similarities[i, j] = compute_similarity(metric, outputs_a, outputs_b, gt_a)
71
  similarities[j, i] = similarities[i, j]
72
  return similarities
 
 
 
 
 
src/utils.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+
4
+ def softmax(logits: np.ndarray) -> np.ndarray:
5
+ exp_logits = np.exp(logits - np.max(logits))
6
+ return exp_logits / exp_logits.sum(axis=0)
7
+
8
+ def one_hot(probs: np.array) -> np.array:
9
+ one_hot = np.zeros_like(probs)
10
+ one_hot[np.argmax(probs)] = 1
11
+ return one_hot