File size: 3,872 Bytes
e6d0f8f 0081443 e6d0f8f 0081443 e6d0f8f 0081443 e6d0f8f 0081443 e6d0f8f 0081443 54d0179 e6d0f8f 0081443 e6d0f8f 0081443 49e5a7f c8a7177 49e5a7f e6d0f8f c8a7177 e6d0f8f 54d0179 4fe73c9 e6d0f8f 0081443 e6d0f8f 54d0179 e6d0f8f 0081443 d82f2b5 e6d0f8f 0081443 e6d0f8f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Mean average precision metric"""
import evaluate
import datasets
import json
from ranx import Qrels, Run
from ranx import evaluate as ran_evaluate
_CITATION = """\
@inproceedings{ranx,
author = {Elias Bassani},
title = {ranx: {A} Blazing-Fast Python Library for Ranking Evaluation and Comparison},
booktitle = {{ECIR} {(2)}},
series = {Lecture Notes in Computer Science},
volume = {13186},
pages = {259--264},
publisher = {Springer},
year = {2022},
doi = {10.1007/978-3-030-99739-7\_30}
}
"""
_DESCRIPTION = """\
This is the mean average precision (map) metric for retrieval systems.
It is the average of the precision scores computer after each relevant document is got. You can refer to [here](https://amenra.github.io/ranx/metrics/#mean-average-precision)
"""
_KWARGS_DESCRIPTION = """
Args:
predictions: dictionary of dictionaries where each dictionary consists of document relevancy scores produced by the model for a given query
One dictionary per query.
references: List of list of strings where each lists consists of the relevant document names for a given query in a sorted relevancy order.
The outer list is sorted from query one to n.
k: `int`, optional, default is None, it is to calculate map@k
Returns:
map (`float`): mean average precision score. Minimum possible value is 0. Maximum possible value is 1.0
Examples:
>>> my_new_module = evaluate.load("map")
>>> references= [json.dumps({"q_1":{"d_1":1, "d_2":2} }),
json.dumps({"q_2":{"d_2":1, "d_3":2, "d_5":3}})]
>>> predictions = [json.dumps({"q_1": { "d_1": 0.8, "d_2": 0.9}}),
json.dumps({"q_2": {"d_2": 0.9, "d_1": 0.8, "d_5": 0.7, "d_3": 0.3}})]
>>> results = my_new_module.compute(references=references, predictions=predictions)
>>> print(results)
{'recall': 1.0}
"""
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
class map(evaluate.Metric):
def _info(self):
return evaluate.MetricInfo(
# This is the description that will appear on the modules page.
module_type="metric",
description=_DESCRIPTION,
citation=_CITATION,
inputs_description=_KWARGS_DESCRIPTION,
# This defines the format of each prediction and reference
features=datasets.Features({
'predictions': datasets.Value("string"),
'references': datasets.Value("string"),
'k': datasets.Value("int32")
}),
# Homepage of the module for documentation
reference_urls=["https://amenra.github.io/ranx/"]
)
def _compute(self, predictions, references, k=None):
"""Returns the scores"""
preds = {}
refs = {}
for pred in predictions:
preds = preds | json.loads(pred)
for ref in references:
refs = refs | json.loads(ref)
run = Run(preds)
qrels = Qrels(refs)
metric = "map" if k is None else f"map@{k}"
map_score = ran_evaluate(qrels, run, metric)
return {
"map": map_score,
} |