Spaces:
Running
Running
File size: 1,956 Bytes
a48b15f 2535891 a48b15f f3cd231 a48b15f f3cd231 a48b15f f3cd231 a48b15f 874e761 a48b15f f3cd231 a48b15f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
import numpy as np
from src.dataloading import load_run_data
from lmsim.metrics import Metric, Kappa_p, EC
def load_data_and_compute_similarities(models, dataset, metric_name):
# Load data
probs = []
gts = []
for model in models:
model_probs, model_gt = load_run_data(model, dataset)
probs.append(model_probs)
gts.append(model_gt)
# Compute pairwise similarities
similarities = compute_pairwise_similarities(probs, gts, metric_name)
return similarities
def compute_similarity(metric: Metric, probs_a: list[np.array], gt_a: list[int], probs_b: list[np.array], gt_b: list[int]) -> float:
# Check that the models have the same number of responses
assert len(probs_a) == len(probs_b), f"Models must have the same number of responses: {len(probs_a)} != {len(probs_b)}"
# Only keep responses where the ground truth is the same
output_a = []
output_b = []
gt = []
for i in range(len(probs_a)):
if gt_a == gt_b:
output_a.append(probs_a[i])
output_b.append(probs_b[i])
gt.append(gt_a[i])
# Placeholder similarity value
similarity = metric.compute_k(output_a, output_b, gt)
return similarity
def compute_pairwise_similarities(metric_name: str, probs: list[list[np.array]], gts: list[list[int]]) -> np.array:
# Select chosen metric
if metric_name == "Kappa_p (prob.)":
metric = Kappa_p()
elif metric_name == "Kappa_p (det.)":
metric = Kappa_p()
elif metric_name == "Error Consistency":
metric = EC()
else:
raise ValueError(f"Invalid metric: {metric_name}")
similarities = np.zeros((len(probs), len(probs)))
for i in range(len(probs)):
for j in range(i, len(probs)):
similarities[i, j] = compute_similarity(metric, probs[i], gts[i], probs[j], gts[j])
similarities[j, i] = similarities[i, j]
return similarities |