implementation is added
Browse files- mrr.py +48 -45
 - requirements.txt +2 -1
 - tests.py +0 -17
 
    	
        mrr.py
    CHANGED
    
    | 
         @@ -11,58 +11,59 @@ 
     | 
|
| 11 | 
         
             
            # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
         
     | 
| 12 | 
         
             
            # See the License for the specific language governing permissions and
         
     | 
| 13 | 
         
             
            # limitations under the License.
         
     | 
| 14 | 
         
            -
            """ 
     | 
| 15 | 
         | 
| 16 | 
         
             
            import evaluate
         
     | 
| 17 | 
         
             
            import datasets
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 18 | 
         | 
| 19 | 
         | 
| 20 | 
         
            -
            # TODO: Add BibTeX citation
         
     | 
| 21 | 
         
             
            _CITATION = """\
         
     | 
| 22 | 
         
            -
            @ 
     | 
| 23 | 
         
            -
             
     | 
| 24 | 
         
            -
             
     | 
| 25 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 26 | 
         
             
            }
         
     | 
| 27 | 
         
             
            """
         
     | 
| 28 | 
         | 
| 29 | 
         
            -
            # TODO: Add description of the module here
         
     | 
| 30 | 
         
             
            _DESCRIPTION = """\
         
     | 
| 31 | 
         
            -
            This  
     | 
| 
         | 
|
| 32 | 
         
             
            """
         
     | 
| 33 | 
         | 
| 34 | 
         | 
| 35 | 
         
            -
            # TODO: Add description of the arguments of the module here
         
     | 
| 36 | 
         
             
            _KWARGS_DESCRIPTION = """
         
     | 
| 37 | 
         
            -
            Calculates how good are predictions given some references, using certain scores
         
     | 
| 38 | 
         
             
            Args:
         
     | 
| 39 | 
         
            -
                predictions:  
     | 
| 40 | 
         
            -
                     
     | 
| 41 | 
         
            -
                references: list of  
     | 
| 42 | 
         
            -
                     
     | 
| 
         | 
|
| 43 | 
         
             
            Returns:
         
     | 
| 44 | 
         
            -
                 
     | 
| 45 | 
         
            -
                another_score: description of the second score,
         
     | 
| 46 | 
         
             
            Examples:
         
     | 
| 47 | 
         
            -
             
     | 
| 48 | 
         
            -
                 
     | 
| 49 | 
         
            -
             
     | 
| 50 | 
         
            -
             
     | 
| 51 | 
         
            -
                >>>  
     | 
| 
         | 
|
| 
         | 
|
| 52 | 
         
             
                >>> print(results)
         
     | 
| 53 | 
         
            -
                {' 
     | 
| 54 | 
         
             
            """
         
     | 
| 55 | 
         | 
| 56 | 
         
            -
            # TODO: Define external resources urls if needed
         
     | 
| 57 | 
         
            -
            BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt"
         
     | 
| 58 | 
         
            -
             
     | 
| 59 | 
         
            -
             
     | 
| 60 | 
         
             
            @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
         
     | 
| 61 | 
         
            -
            class  
     | 
| 62 | 
         
            -
                """TODO: Short description of my evaluation module."""
         
     | 
| 63 | 
         
            -
             
     | 
| 64 | 
         
             
                def _info(self):
         
     | 
| 65 | 
         
            -
                    # TODO: Specifies the evaluate.EvaluationModuleInfo object
         
     | 
| 66 | 
         
             
                    return evaluate.MetricInfo(
         
     | 
| 67 | 
         
             
                        # This is the description that will appear on the modules page.
         
     | 
| 68 | 
         
             
                        module_type="metric",
         
     | 
| 
         @@ -71,25 +72,27 @@ class mrr(evaluate.Metric): 
     | 
|
| 71 | 
         
             
                        inputs_description=_KWARGS_DESCRIPTION,
         
     | 
| 72 | 
         
             
                        # This defines the format of each prediction and reference
         
     | 
| 73 | 
         
             
                        features=datasets.Features({
         
     | 
| 74 | 
         
            -
                            'predictions': 
     | 
| 75 | 
         
            -
                            'references': 
     | 
| 
         | 
|
| 76 | 
         
             
                        }),
         
     | 
| 77 | 
         
             
                        # Homepage of the module for documentation
         
     | 
| 78 | 
         
            -
                         
     | 
| 79 | 
         
            -
                        # Additional links to the codebase or references
         
     | 
| 80 | 
         
            -
                        codebase_urls=["http://github.com/path/to/codebase/of/new_module"],
         
     | 
| 81 | 
         
            -
                        reference_urls=["http://path.to.reference.url/new_module"]
         
     | 
| 82 | 
         
             
                    )
         
     | 
| 83 | 
         | 
| 84 | 
         
            -
                def  
     | 
| 85 | 
         
            -
                    """Optional: download external resources useful to compute the scores"""
         
     | 
| 86 | 
         
            -
                    # TODO: Download external resources if needed
         
     | 
| 87 | 
         
            -
                    pass
         
     | 
| 88 | 
         
            -
             
     | 
| 89 | 
         
            -
                def _compute(self, predictions, references):
         
     | 
| 90 | 
         
             
                    """Returns the scores"""
         
     | 
| 91 | 
         
            -
                     
     | 
| 92 | 
         
            -
                     
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 93 | 
         
             
                    return {
         
     | 
| 94 | 
         
            -
                        " 
     | 
| 95 | 
         
             
                    }
         
     | 
| 
         | 
|
| 11 | 
         
             
            # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
         
     | 
| 12 | 
         
             
            # See the License for the specific language governing permissions and
         
     | 
| 13 | 
         
             
            # limitations under the License.
         
     | 
| 14 | 
         
            +
            """Mean average precision metric"""
         
     | 
| 15 | 
         | 
| 16 | 
         
             
            import evaluate
         
     | 
| 17 | 
         
             
            import datasets
         
     | 
| 18 | 
         
            +
            import json
         
     | 
| 19 | 
         
            +
            from ranx import Qrels, Run
         
     | 
| 20 | 
         
            +
            from ranx import evaluate as ran_evaluate
         
     | 
| 21 | 
         | 
| 22 | 
         | 
| 
         | 
|
| 23 | 
         
             
            _CITATION = """\
         
     | 
| 24 | 
         
            +
            @inproceedings{ranx,
         
     | 
| 25 | 
         
            +
              author       = {Elias Bassani},
         
     | 
| 26 | 
         
            +
              title        = {ranx: {A} Blazing-Fast Python Library for Ranking Evaluation and Comparison},
         
     | 
| 27 | 
         
            +
              booktitle    = {{ECIR} {(2)}},
         
     | 
| 28 | 
         
            +
              series       = {Lecture Notes in Computer Science},
         
     | 
| 29 | 
         
            +
              volume       = {13186},
         
     | 
| 30 | 
         
            +
              pages        = {259--264},
         
     | 
| 31 | 
         
            +
              publisher    = {Springer},
         
     | 
| 32 | 
         
            +
              year         = {2022},
         
     | 
| 33 | 
         
            +
              doi          = {10.1007/978-3-030-99739-7\_30}
         
     | 
| 34 | 
         
             
            }
         
     | 
| 35 | 
         
             
            """
         
     | 
| 36 | 
         | 
| 
         | 
|
| 37 | 
         
             
            _DESCRIPTION = """\
         
     | 
| 38 | 
         
            +
            This is the mean reciprocal rank (mrr) metric for retrieval systems.
         
     | 
| 39 | 
         
            +
            It is the multiplicative inverse of the rank of the first retrieved relevant document: 1 for first place, 1/2 for second place, 1/3 for third place, and so on. You can refer to [here](https://amenra.github.io/ranx/metrics/#mean-reciprocal-rank)
         
     | 
| 40 | 
         
             
            """
         
     | 
| 41 | 
         | 
| 42 | 
         | 
| 
         | 
|
| 43 | 
         
             
            _KWARGS_DESCRIPTION = """
         
     | 
| 
         | 
|
| 44 | 
         
             
            Args:
         
     | 
| 45 | 
         
            +
                predictions: dictionary of dictionaries where each dictionary consists of document relevancy scores produced by the model for a given query 
         
     | 
| 46 | 
         
            +
                    One dictionary per query.  
         
     | 
| 47 | 
         
            +
                references: List of list of strings where each lists consists of the relevant document names for a given query in a sorted relevancy order.
         
     | 
| 48 | 
         
            +
                    The outer list is sorted from query one to n.
         
     | 
| 49 | 
         
            +
                k: `int`, optional, default is None, it is to calculate mrr@k
         
     | 
| 50 | 
         
             
            Returns:
         
     | 
| 51 | 
         
            +
                mrr (`float`): mean reciprocal rank. Minimum possible value is 0. Maximum possible value is 1.0
         
     | 
| 
         | 
|
| 52 | 
         
             
            Examples:
         
     | 
| 53 | 
         
            +
               
         
     | 
| 54 | 
         
            +
                >>> my_new_module = evaluate.load("mrr")
         
     | 
| 55 | 
         
            +
                >>> references= [json.dumps({"q_1":{"d_1":1, "d_2":2} }), 
         
     | 
| 56 | 
         
            +
                         json.dumps({"q_2":{"d_2":1, "d_3":2, "d_5":3}})] 
         
     | 
| 57 | 
         
            +
                >>> predictions = [json.dumps({"q_1": { "d_1": 0.8, "d_2": 0.9}}),
         
     | 
| 58 | 
         
            +
                     json.dumps({"q_2": {"d_2": 0.9, "d_1": 0.8, "d_5": 0.7, "d_3": 0.3}})]
         
     | 
| 59 | 
         
            +
                >>> results = my_new_module.compute(references=references, predictions=predictions)
         
     | 
| 60 | 
         
             
                >>> print(results)
         
     | 
| 61 | 
         
            +
                {'recall': 1.0}
         
     | 
| 62 | 
         
             
            """
         
     | 
| 63 | 
         | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 64 | 
         
             
            @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
         
     | 
| 65 | 
         
            +
            class map(evaluate.Metric):
         
     | 
| 
         | 
|
| 
         | 
|
| 66 | 
         
             
                def _info(self):
         
     | 
| 
         | 
|
| 67 | 
         
             
                    return evaluate.MetricInfo(
         
     | 
| 68 | 
         
             
                        # This is the description that will appear on the modules page.
         
     | 
| 69 | 
         
             
                        module_type="metric",
         
     | 
| 
         | 
|
| 72 | 
         
             
                        inputs_description=_KWARGS_DESCRIPTION,
         
     | 
| 73 | 
         
             
                        # This defines the format of each prediction and reference
         
     | 
| 74 | 
         
             
                        features=datasets.Features({
         
     | 
| 75 | 
         
            +
                            'predictions':  datasets.Value("string"),
         
     | 
| 76 | 
         
            +
                            'references':  datasets.Value("string"),
         
     | 
| 77 | 
         
            +
                            'k': datasets.Value("int", default=None)    
         
     | 
| 78 | 
         
             
                        }),
         
     | 
| 79 | 
         
             
                        # Homepage of the module for documentation
         
     | 
| 80 | 
         
            +
                        reference_urls=["https://amenra.github.io/ranx/"]
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 81 | 
         
             
                    )
         
     | 
| 82 | 
         | 
| 83 | 
         
            +
                def _compute(self, predictions, references, k=None):
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 84 | 
         
             
                    """Returns the scores"""
         
     | 
| 85 | 
         
            +
                    preds = {}
         
     | 
| 86 | 
         
            +
                    refs = {}
         
     | 
| 87 | 
         
            +
                    for pred in predictions:
         
     | 
| 88 | 
         
            +
                        preds = preds | json.loads(pred)
         
     | 
| 89 | 
         
            +
                    for ref in references:
         
     | 
| 90 | 
         
            +
                        refs = refs | json.loads(ref)
         
     | 
| 91 | 
         
            +
                    
         
     | 
| 92 | 
         
            +
                    run = Run(preds)
         
     | 
| 93 | 
         
            +
                    qrels = Qrels(refs)
         
     | 
| 94 | 
         
            +
                    metric = "mrr" if k is None else f"mrr@{k}"
         
     | 
| 95 | 
         
            +
                    mrr_score = ran_evaluate(qrels, run, metric)
         
     | 
| 96 | 
         
             
                    return {
         
     | 
| 97 | 
         
            +
                        "mrr": mrr_score,
         
     | 
| 98 | 
         
             
                    }
         
     | 
    	
        requirements.txt
    CHANGED
    
    | 
         @@ -1 +1,2 @@ 
     | 
|
| 1 | 
         
            -
            git+https://github.com/huggingface/evaluate@main
         
     | 
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            git+https://github.com/huggingface/evaluate@main
         
     | 
| 2 | 
         
            +
            ranx==0.3.19
         
     | 
    	
        tests.py
    DELETED
    
    | 
         @@ -1,17 +0,0 @@ 
     | 
|
| 1 | 
         
            -
            test_cases = [
         
     | 
| 2 | 
         
            -
                {
         
     | 
| 3 | 
         
            -
                    "predictions": [0, 0],
         
     | 
| 4 | 
         
            -
                    "references": [1, 1],
         
     | 
| 5 | 
         
            -
                    "result": {"metric_score": 0}
         
     | 
| 6 | 
         
            -
                },
         
     | 
| 7 | 
         
            -
                {
         
     | 
| 8 | 
         
            -
                    "predictions": [1, 1],
         
     | 
| 9 | 
         
            -
                    "references": [1, 1],
         
     | 
| 10 | 
         
            -
                    "result": {"metric_score": 1}
         
     | 
| 11 | 
         
            -
                },
         
     | 
| 12 | 
         
            -
                {
         
     | 
| 13 | 
         
            -
                    "predictions": [1, 0],
         
     | 
| 14 | 
         
            -
                    "references": [1, 1],
         
     | 
| 15 | 
         
            -
                    "result": {"metric_score": 0.5}
         
     | 
| 16 | 
         
            -
                }
         
     | 
| 17 | 
         
            -
            ]
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         |