Spaces:
Running
Running
import torch | |
from mteb.evaluation.evaluators import RetrievalEvaluator | |
class CustomEvaluator: | |
def __init__(self, is_multi_vector=False): | |
self.is_multi_vector = is_multi_vector | |
self.mteb_evaluator = RetrievalEvaluator() | |
def evaluate(self, qs, ps): | |
if self.is_multi_vector: | |
scores = self.evaluate_colbert(qs, ps) | |
else: | |
scores = self.evaluate_biencoder(qs, ps) | |
assert scores.shape[0] == len(qs) | |
arg_score = scores.argmax(dim=1) | |
# compare to arange | |
accuracy = (arg_score == torch.arange(scores.shape[0], device=scores.device)).sum().item() / scores.shape[0] | |
print(arg_score) | |
print(f"Top 1 Accuracy (verif): {accuracy}") | |
# cast to numpy | |
# scores = scores.cpu().numpy() | |
scores = scores.to(torch.float32).cpu().numpy() | |
return scores | |
def compute_metrics(self, relevant_docs, results, **kwargs): | |
# wrap mteb package | |
ndcg, _map, recall, precision, naucs = self.mteb_evaluator.evaluate( | |
relevant_docs, | |
results, | |
self.mteb_evaluator.k_values, | |
ignore_identical_ids=kwargs.get("ignore_identical_ids", True), | |
) | |
mrr = self.mteb_evaluator.evaluate_custom(relevant_docs, results, self.mteb_evaluator.k_values, "mrr") | |
scores = { | |
**{f"ndcg_at_{k.split('@')[1]}": v for (k, v) in ndcg.items()}, | |
**{f"map_at_{k.split('@')[1]}": v for (k, v) in _map.items()}, | |
**{f"recall_at_{k.split('@')[1]}": v for (k, v) in recall.items()}, | |
**{f"precision_at_{k.split('@')[1]}": v for (k, v) in precision.items()}, | |
**{f"mrr_at_{k.split('@')[1]}": v for (k, v) in mrr[0].items()}, | |
**{f"naucs_at_{k.split('@')[1]}": v for (k, v) in naucs.items()}, | |
} | |
return scores | |
def evaluate_colbert(self, qs, ps, batch_size=128) -> torch.Tensor: | |
scores = [] | |
for i in range(0, len(qs), batch_size): | |
scores_batch = [] | |
qs_batch = torch.nn.utils.rnn.pad_sequence(qs[i : i + batch_size], batch_first=True, padding_value=0).to( | |
"cpu" | |
) | |
for j in range(0, len(ps), batch_size): | |
ps_batch = torch.nn.utils.rnn.pad_sequence( | |
ps[j : j + batch_size], batch_first=True, padding_value=0 | |
).to("cpu") | |
scores_batch.append(torch.einsum("bnd,csd->bcns", qs_batch, ps_batch).max(dim=3)[0].sum(dim=2)) | |
if scores_batch: # Vérification si scores_batch n'est pas vide | |
scores_batch = torch.cat(scores_batch, dim=1).cpu() | |
scores.append(scores_batch) | |
if scores: # Vérification si scores n'est pas vide | |
scores = torch.cat(scores, dim=0) | |
else: | |
scores = torch.tensor([]) # Retourne un tensor vide si scores est vide | |
return scores | |
def evaluate_biencoder(self, qs, ps) -> torch.Tensor: | |
qs = torch.stack(qs) | |
ps = torch.stack(ps) | |
scores = torch.einsum("bd,cd->bc", qs, ps) | |
return scores | |