squad_precision_recall

Sleeping

omidf commited on Jan 30, 2023

Commit

e6391c4

1 Parent(s): 6005fff

Update compute_score.py

Files changed (1) hide show

compute_score.py CHANGED Viewed

@@ -46,17 +46,17 @@ def recall_score(prediction, ground_truth):
     recall = 1.0 * num_same / len(ground_truth_tokens)
     return recall
-def f1_score(prediction, ground_truth):
-    prediction_tokens = normalize_answer(prediction).split()
-    ground_truth_tokens = normalize_answer(ground_truth).split()
-    common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
-    num_same = sum(common.values())
-    if num_same == 0:
-        return 0
-    precision = 1.0 * num_same / len(prediction_tokens)
-    recall = 1.0 * num_same / len(ground_truth_tokens)
-    f1 = (2 * precision * recall) / (precision + recall)
-    return f1
 def exact_match_score(prediction, ground_truth):
@@ -84,9 +84,9 @@ def compute_score(dataset, predictions):
                 ground_truths = list(map(lambda x: x["text"], qa["answers"]))
                 prediction = predictions[qa["id"]]
                 exact_match += metric_max_over_ground_truths(exact_match_score, prediction, ground_truths)
-                f1 += metric_max_over_ground_truths(f1_score, prediction, ground_truths)
                 precision += metric_max_over_ground_truths(precision_score, prediction, ground_truths)
                 recall += metric_max_over_ground_truths(recall_score, prediction, ground_truths)
     exact_match = 100.0 * exact_match / total
     f1 = 100.0 * f1 / total
     recall = 100.0 * recall / total

     recall = 1.0 * num_same / len(ground_truth_tokens)
     return recall
+# def f1_score(prediction, ground_truth):
+#     prediction_tokens = normalize_answer(prediction).split()
+#     ground_truth_tokens = normalize_answer(ground_truth).split()
+#     common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
+#     num_same = sum(common.values())
+#     if num_same == 0:
+#         return 0
+#     precision = 1.0 * num_same / len(prediction_tokens)
+#     recall = 1.0 * num_same / len(ground_truth_tokens)
+#     f1 = (2 * precision * recall) / (precision + recall)
+#     return f1
 def exact_match_score(prediction, ground_truth):
                 ground_truths = list(map(lambda x: x["text"], qa["answers"]))
                 prediction = predictions[qa["id"]]
                 exact_match += metric_max_over_ground_truths(exact_match_score, prediction, ground_truths)
                 precision += metric_max_over_ground_truths(precision_score, prediction, ground_truths)
                 recall += metric_max_over_ground_truths(recall_score, prediction, ground_truths)
+                f1 += (2 * precision * recall) / (precision + recall)
     exact_match = 100.0 * exact_match / total
     f1 = 100.0 * f1 / total
     recall = 100.0 * recall / total