File size: 3,822 Bytes
e6d0f8f
 
 
 
 
 
 
 
 
 
 
 
 
0081443
e6d0f8f
 
 
0081443
 
 
e6d0f8f
 
 
0081443
 
 
 
 
 
 
 
 
 
e6d0f8f
 
 
 
0081443
 
e6d0f8f
 
 
 
 
0081443
 
 
 
54d0179
e6d0f8f
0081443
e6d0f8f
0081443
 
49e5a7f
 
c8a7177
49e5a7f
 
e6d0f8f
c8a7177
e6d0f8f
 
 
 
 
 
 
 
 
 
 
 
 
54d0179
ad1efb4
e6d0f8f
 
0081443
e6d0f8f
 
54d0179
e6d0f8f
0081443
 
 
 
 
 
 
 
 
d82f2b5
 
e6d0f8f
0081443
e6d0f8f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Mean average precision metric"""

import evaluate
import datasets
import json
from ranx import Qrels, Run
from ranx import evaluate as ran_evaluate


_CITATION = """\
@inproceedings{ranx,
  author       = {Elias Bassani},
  title        = {ranx: {A} Blazing-Fast Python Library for Ranking Evaluation and Comparison},
  booktitle    = {{ECIR} {(2)}},
  series       = {Lecture Notes in Computer Science},
  volume       = {13186},
  pages        = {259--264},
  publisher    = {Springer},
  year         = {2022},
  doi          = {10.1007/978-3-030-99739-7\_30}
}
"""

_DESCRIPTION = """\
This is the mean average precision (map) metric for retrieval systems.
It is the average of the precision scores computer after each relevant document is got. You can refer to [here](https://amenra.github.io/ranx/metrics/#mean-average-precision)
"""


_KWARGS_DESCRIPTION = """
Args:
    predictions: dictionary of dictionaries where each dictionary consists of document relevancy scores produced by the model for a given query 
        One dictionary per query.  
    references: List of list of strings where each lists consists of the relevant document names for a given query in a sorted relevancy order.
        The outer list is sorted from query one to n.
    k: `int`, optional, default is None, it is to calculate map@k
Returns:
    map (`float`): mean average precision score. Minimum possible value is 0. Maximum possible value is 1.0
Examples:
   
    >>> my_new_module = evaluate.load("map")
    >>> references= [json.dumps({"q_1":{"d_1":1, "d_2":2} }), 
             json.dumps({"q_2":{"d_2":1, "d_3":2, "d_5":3}})] 
    >>> predictions = [json.dumps({"q_1": { "d_1": 0.8, "d_2": 0.9}}),
         json.dumps({"q_2": {"d_2": 0.9, "d_1": 0.8, "d_5": 0.7, "d_3": 0.3}})]
    >>> results = my_new_module.compute(references=references, predictions=predictions)
    >>> print(results)
    {'recall': 1.0}
"""

@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
class map(evaluate.Metric):
    def _info(self):
        return evaluate.MetricInfo(
            # This is the description that will appear on the modules page.
            module_type="metric",
            description=_DESCRIPTION,
            citation=_CITATION,
            inputs_description=_KWARGS_DESCRIPTION,
            # This defines the format of each prediction and reference
            features=datasets.Features({
                'predictions':  datasets.Value("string"),
                'references':  datasets.Value("string")
            }),
            # Homepage of the module for documentation
            reference_urls=["https://amenra.github.io/ranx/"]
        )

    def _compute(self, predictions, references, k=None):
        """Returns the scores"""
        preds = {}
        refs = {}
        for pred in predictions:
            preds = preds | json.loads(pred)
        for ref in references:
            refs = refs | json.loads(ref)
        
        run = Run(preds)
        qrels = Qrels(refs)
        metric = "map" if k is None else f"map@{k}"
        map_score = ran_evaluate(qrels, run, metric)
        return {
            "map": map_score,
        }