Spaces:

CZLC
/

rouge_raw

Runtime error

File size: 8,331 Bytes

732e363

# -*- coding: UTF-8 -*-
"""
Created on 02.02.24
Module for raw ROUGE score calculation from:
@inproceedings{straka-etal-2018-sumeczech,
    title = "{S}ume{C}zech: Large {C}zech News-Based Summarization Dataset",
    author = "Straka, Milan  and
      Mediankin, Nikita  and
      Kocmi, Tom  and
      {\v{Z}}abokrtsk{\'y}, Zden{\v{e}}k  and
      Hude{\v{c}}ek, Vojt{\v{e}}ch  and
      Haji{\v{c}}, Jan",
    editor = "Calzolari, Nicoletta  and
      Choukri, Khalid  and
      Cieri, Christopher  and
      Declerck, Thierry  and
      Goggi, Sara  and
      Hasida, Koiti  and
      Isahara, Hitoshi  and
      Maegaard, Bente  and
      Mariani, Joseph  and
      Mazo, H{\'e}l{\`e}ne  and
      Moreno, Asuncion  and
      Odijk, Jan  and
      Piperidis, Stelios  and
      Tokunaga, Takenobu",
    booktitle = "Proceedings of the Eleventh International Conference on Language Resources and Evaluation ({LREC} 2018)",
    month = may,
    year = "2018",
    address = "Miyazaki, Japan",
    publisher = "European Language Resources Association (ELRA)",
    url = "https://aclanthology.org/L18-1551",
}


:author:     Martin Dočekal
"""

import re
from typing import Sequence

import datasets
import evaluate


class RougeRaw:
    """
    This is the original implementation of the ROUGERaw metric.
    Compute RougeRAW-1, RougeRAW-2, RougeRAW-L metrics.
    """

    class FScore:
        """F1 score representation."""
        def __init__(self, correct, gold, system):
            self.p = correct / system if system else 0.
            self.r = correct / gold if gold else 0.
            self.f = 2 * correct / (system + gold) if system + gold else 0.

    def _rouge_n(self, n, gold_words, system_words):
        """Compute Rouge-n for given words."""
        def n_grams(n, words):
            ngrams = {}
            total = 0
            for i in range(len(words) - n + 1):
                ngram = "\t".join(words[i:i + n])
                ngrams[ngram] = 1 + ngrams.get(ngram, 0)
                total += 1
            return ngrams, total

        gold_ngrams, gold_total = n_grams(n, gold_words)
        system_ngrams, system_total = n_grams(n, system_words)

        intersection = 0
        for ngram in system_ngrams:
            intersection += min(system_ngrams[ngram], gold_ngrams.get(ngram, 0))

        return self.FScore(intersection, gold_total, system_total)

    def _rouge_l(self, gold_words, system_words):
        """Compute Rouge-L for given words."""
        lcs = [[0] * len(system_words) for _ in gold_words]
        for r in range(len(gold_words)):
            for s in range(len(system_words)):
                if gold_words[r] == system_words[s]:
                    lcs[r][s] = 1 + (lcs[r - 1][s - 1] if r and s else 0)
                lcs[r][s] = max(lcs[r][s], lcs[r - 1][s] if r else 0)
                lcs[r][s] = max(lcs[r][s], lcs[r][s - 1] if s else 0)

        return self.FScore(lcs[-1][-1], len(gold_words), len(system_words))

    def _tokenize(self, text):
        """Tokenize given text."""
        return re.sub(r"\s+", " ", re.sub(r"\b", " ", text, re.UNICODE), re.UNICODE).strip().split(" ")

    def document(self, gold, system):
        """Compute RougeRAW-1, RougeRAW-2, RougeRAW-L for given documents.
        Each document should be a string.
        """

        assert isinstance(gold, str) and isinstance(system, str), "Expected string arguments"

        lc_gold_words = [word.lower() for word in self._tokenize(gold)]
        lc_system_words = [word.lower() for word in self._tokenize(system)]

        return {
            "1": self._rouge_n(1, lc_gold_words, lc_system_words),
            "2": self._rouge_n(2, lc_gold_words, lc_system_words),
            "L": self._rouge_l(lc_gold_words, lc_system_words),
        }

    def corpus(self, gold, system):
        """Compute RougeRAW-1, RougeRAW-2, RougeRAW-L for given corpora.
        Each corpus should be a collection of documents, each document a string.
        """

        assert isinstance(gold, list) and isinstance(system, list), "Expected list arguments"
        assert len(gold) == len(system), "Given corpora should be of the same length"

        rouge = {key: self.FScore(0, 0, 0) for key in ["1", "2", "L"]}

        if len(gold):
            for gold_document, system_document in zip(gold, system):
                for key, value in self.document(gold_document, system_document).items():
                    rouge[key].p += value.p
                    rouge[key].r += value.r
                    rouge[key].f += value.f

            for key in rouge:
                rouge[key].p /= len(gold)
                rouge[key].r /= len(gold)
                rouge[key].f /= len(gold)

        return rouge


_CITATION = """\
@inproceedings{straka-etal-2018-sumeczech,
    title = "{S}ume{C}zech: Large {C}zech News-Based Summarization Dataset",
    author = "Straka, Milan  and
      Mediankin, Nikita  and
      Kocmi, Tom  and
      {\v{Z}}abokrtsk{\'y}, Zden{\v{e}}k  and
      Hude{\v{c}}ek, Vojt{\v{e}}ch  and
      Haji{\v{c}}, Jan",
    editor = "Calzolari, Nicoletta  and
      Choukri, Khalid  and
      Cieri, Christopher  and
      Declerck, Thierry  and
      Goggi, Sara  and
      Hasida, Koiti  and
      Isahara, Hitoshi  and
      Maegaard, Bente  and
      Mariani, Joseph  and
      Mazo, H{\'e}l{\`e}ne  and
      Moreno, Asuncion  and
      Odijk, Jan  and
      Piperidis, Stelios  and
      Tokunaga, Takenobu",
    booktitle = "Proceedings of the Eleventh International Conference on Language Resources and Evaluation ({LREC} 2018)",
    month = may,
    year = "2018",
    address = "Miyazaki, Japan",
    publisher = "European Language Resources Association (ELRA)",
    url = "https://aclanthology.org/L18-1551",
}
"""

_DESCRIPTION = """\
ROUGE RAW is language-agnostic variant of ROUGE without stemmer, stop words and synonymas. 
This is a wrapper around the original http://hdl.handle.net/11234/1-2615 script.
"""

_KWARGS_DESCRIPTION = """
ROCUE RAW metric for list of predictions and references.
Args:
    predictions: list of predictions to evaluate. Each prediction should be a string with tokens separated by spaces.
    references: list of reference for each prediction. Each reference should be a string with tokens separated by spaces.
Returns:
    rougeraw1_precision
    rougeraw1_recall
    rougeraw1_fmeasure
    rougeraw2_precision
    rougeraw2_recall
    rougeraw2_fmeasure
    rougerawl_precision
    rougerawl_recall
    rougerawl_fmeasure
Examples:
    >>> rougeraw = evaluate.load('CZLC/rouge_raw')
    >>> predictions = ["the cat is on the mat", "hello there"]
    >>> references = ["the cat is on the mat", "hello there"]
    >>> results = rougeraw.compute(predictions=predictions, references=references)
    >>> print(results)
    {'rougeraw1_precision': 1.0, 'rougeraw1_recall': 1.0, 'rougeraw1_fmeasure': 1.0, 'rougeraw2_precision': 1.0, 'rougeraw2_recall': 1.0, 'rougeraw2_fmeasure': 1.0, 'rougerawl_precision': 1.0, 'rougerawl_recall': 1.0, 'rougerawl_fmeasure': 1.0}
"""


@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
class Rouge(evaluate.Metric):
    def _info(self):
        return evaluate.MetricInfo(
            description=_DESCRIPTION,
            citation=_CITATION,
            inputs_description=_KWARGS_DESCRIPTION,
            features=[
                datasets.Features(
                    {
                        "predictions": datasets.Value("string", id="sequence"),
                        "references": datasets.Value("string", id="sequence"),
                    }
                ),
            ],
            reference_urls=[
                "http://hdl.handle.net/11234/1-2615",
            ],
        )

    def _compute(self, predictions: Sequence[str], references: Sequence[str]):
        res = RougeRaw().corpus(references, predictions)
        return {
            "rougeraw1_precision": res["1"].p,
            "rougeraw1_recall": res["1"].r,
            "rougeraw1_fmeasure": res["1"].f,
            "rougeraw2_precision": res["2"].p,
            "rougeraw2_recall": res["2"].r,
            "rougeraw2_fmeasure": res["2"].f,
            "rougerawl_precision": res["L"].p,
            "rougerawl_recall": res["L"].r,
            "rougerawl_fmeasure": res["L"].f,
        }