Spaces:
Restarting
Restarting
Update eval_utils.py
Browse files- eval_utils.py +5 -2
eval_utils.py
CHANGED
|
@@ -65,16 +65,19 @@ def evaluate_cjpe(gold_data, pred_data):
|
|
| 65 |
R = []
|
| 66 |
B = []
|
| 67 |
rl_evaluator = rouge.Rouge(metrics=['rouge-l'], max_n=2, limit_length=False, apply_avg=True)
|
| 68 |
-
for x in
|
| 69 |
gold_explanations = []
|
| 70 |
pred_explanations = []
|
| 71 |
for k,v in gold_data['explanation'].items():
|
| 72 |
gold_explanations.append(v[f'expert_{x}'])
|
| 73 |
pred_explanations.append(pred_data['explanation'][k])
|
|
|
|
| 74 |
rougex = rl_evaluator.get_scores(pred_explanations, gold_explanations)['rouge-l']['f']
|
| 75 |
bleux = get_BLEU_score(gold_explanations, pred_explanations)
|
| 76 |
R.append(rougex)
|
| 77 |
B.append(bleux)
|
|
|
|
|
|
|
| 78 |
|
| 79 |
rouge_score = sum(R)/len(R)
|
| 80 |
bleu_score = sum(B)/len(B)
|
|
@@ -214,7 +217,7 @@ def evaluate_pcr(gold_data, pred_data):
|
|
| 214 |
f1_scores = []
|
| 215 |
for k in range(1, 21):
|
| 216 |
correct, gold_total, pred_total = 0, 0, 0
|
| 217 |
-
for id, gold_candidates in gold_data.items():
|
| 218 |
pred_candidates = pred_data.get(id, [])
|
| 219 |
gold_candidates = [c for c in gold_candidates if c != id]
|
| 220 |
pred_candidates = [c for c in pred_candidates if c != id]
|
|
|
|
| 65 |
R = []
|
| 66 |
B = []
|
| 67 |
rl_evaluator = rouge.Rouge(metrics=['rouge-l'], max_n=2, limit_length=False, apply_avg=True)
|
| 68 |
+
for x in range(1, 6):
|
| 69 |
gold_explanations = []
|
| 70 |
pred_explanations = []
|
| 71 |
for k,v in gold_data['explanation'].items():
|
| 72 |
gold_explanations.append(v[f'expert_{x}'])
|
| 73 |
pred_explanations.append(pred_data['explanation'][k])
|
| 74 |
+
print("Metrics for expert", x, "...", end=' ')
|
| 75 |
rougex = rl_evaluator.get_scores(pred_explanations, gold_explanations)['rouge-l']['f']
|
| 76 |
bleux = get_BLEU_score(gold_explanations, pred_explanations)
|
| 77 |
R.append(rougex)
|
| 78 |
B.append(bleux)
|
| 79 |
+
print("Done.")
|
| 80 |
+
|
| 81 |
|
| 82 |
rouge_score = sum(R)/len(R)
|
| 83 |
bleu_score = sum(B)/len(B)
|
|
|
|
| 217 |
f1_scores = []
|
| 218 |
for k in range(1, 21):
|
| 219 |
correct, gold_total, pred_total = 0, 0, 0
|
| 220 |
+
for id, gold_candidates in tqdm(gold_data.items(), desc="pcr"):
|
| 221 |
pred_candidates = pred_data.get(id, [])
|
| 222 |
gold_candidates = [c for c in gold_candidates if c != id]
|
| 223 |
pred_candidates = [c for c in pred_candidates if c != id]
|