HUANG-Stephanie commited on
Commit
a59e0f0
·
verified ·
1 Parent(s): dcdab14

Update colpali-main/colpali_engine/trainer/retrieval_evaluator.py

Browse files
colpali-main/colpali_engine/trainer/retrieval_evaluator.py CHANGED
@@ -47,32 +47,25 @@ class CustomEvaluator:
47
  return scores
48
 
49
  def evaluate_colbert(self, qs, ps, batch_size=128) -> torch.Tensor:
50
- scores = []
51
- for i in range(0, len(qs), batch_size):
52
- scores_batch = []
53
- qs_batch = torch.nn.utils.rnn.pad_sequence(qs[i : i + batch_size], batch_first=True, padding_value=0).to(
54
- "cpu"
55
- )
56
- print(f"qs_batch shape: {qs_batch.shape}") # Impression pour le débogage
57
- for j in range(0, len(ps), batch_size):
58
- ps_batch = torch.nn.utils.rnn.pad_sequence(
59
- ps[j : j + batch_size], batch_first=True, padding_value=0
60
- ).to("cpu")
61
- print(f"ps_batch shape: {ps_batch.shape}") # Impression pour le débogage
62
- scores_batch.append(torch.einsum("bnd,csd->bcns", qs_batch, ps_batch).max(dim=3)[0].sum(dim=2))
63
- if scores_batch: # Vérification si scores_batch n'est pas vide
64
- print(f"scores_batch shapes before concat: {[s.shape for s in scores_batch]}") # Impression pour le débogage
65
- scores_batch = torch.cat(scores_batch, dim=1).cpu()
66
- scores.append(scores_batch)
67
- else:
68
- print("scores_batch is empty!") # Impression pour le débogage
69
- if scores: # Vérification si scores n'est pas vide
70
- print(f"scores shapes before final concat: {[s.shape for s in scores]}") # Impression pour le débogage
71
- scores = torch.cat(scores, dim=0)
72
- else:
73
- print("scores is empty!") # Impression pour le débogage
74
- scores = torch.tensor([]) # Retourne un tensor vide si scores est vide
75
- return scores
76
 
77
  def evaluate_biencoder(self, qs, ps) -> torch.Tensor:
78
 
 
47
  return scores
48
 
49
  def evaluate_colbert(self, qs, ps, batch_size=128) -> torch.Tensor:
50
+ scores = []
51
+ for i in range(0, len(qs), batch_size):
52
+ scores_batch = []
53
+ qs_batch = torch.nn.utils.rnn.pad_sequence(qs[i : i + batch_size], batch_first=True, padding_value=0).to(
54
+ "cpu"
55
+ )
56
+ for j in range(0, len(ps), batch_size):
57
+ ps_batch = torch.nn.utils.rnn.pad_sequence(
58
+ ps[j : j + batch_size], batch_first=True, padding_value=0
59
+ ).to("cpu")
60
+ scores_batch.append(torch.einsum("bnd,csd->bcns", qs_batch, ps_batch).max(dim=3)[0].sum(dim=2))
61
+ if scores_batch: # Vérification si scores_batch n'est pas vide
62
+ scores_batch = torch.cat(scores_batch, dim=1).cpu()
63
+ scores.append(scores_batch)
64
+ if scores: # Vérification si scores n'est pas vide
65
+ scores = torch.cat(scores, dim=0)
66
+ else:
67
+ scores = torch.tensor([]) # Retourne un tensor vide si scores est vide
68
+ return scores
 
 
 
 
 
 
 
69
 
70
  def evaluate_biencoder(self, qs, ps) -> torch.Tensor:
71