signwriting_similarity / signwriting_similarity.py
AmitMY's picture
transpose
db2ce77
# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""SignWriting Similarity metric from the signwriting-evaluation package"""
import evaluate
import datasets
from signwriting_evaluation.metrics.similarity import SignWritingSimilarityMetric
_CITATION = """\
@misc{moryossef2024signwritingevaluationeffectivesignlanguage,
title={signwriting-evaluation: Effective Sign Language Evaluation via SignWriting},
author={Amit Moryossef and Rotem Zilberman and Ohad Langer},
year={2024},
eprint={2410.13668},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2410.13668},
}
"""
_DESCRIPTION = """\
SignWriting Similarity metric from the signwriting-evaluation package
"""
_KWARGS_DESCRIPTION = """
Produces similarity scores for hypotheses given reference translations.
Args:
predictions (list of str):
The predicted sentences.
references (list of list of str):
The references. There should be one reference sub-list for each prediction sentence.
Returns:
score (float): The similarity score between 0 and 1
Examples:
Example 1 -- basic similarity score:
>>> predictions = ["M530x538S37602508x462S15a11493x494S20e00488x510S22f03469x517"]
>>> references = [["M519x534S37900497x466S3770b497x485S15a51491x501S22f03481x513"]]
>>> metric = evaluate.load("signwriting_similarity")
>>> results = metric.compute(predictions=predictions, references=references)
>>> print(results)
{'score': 0.5509574768254414}
Example 2 -- identical signs in different order:
>>> predictions = ["M530x538S37602508x462S15a11493x494S20e00488x510S22f03469x517"]
>>> references = [["M530x538S22f03469x517S37602508x462S20e00488x510S15a11493x494"]]
>>> metric = evaluate.load("signwriting_similarity")
>>> results = metric.compute(predictions=predictions, references=references)
>>> print(results)
{'score': 1.0}
Example 3 -- slightly different symbols:
>>> predictions = ["M530x538S17600508x462S15a11493x494S20e00488x510S22f03469x517"]
>>> references = [["M530x538S17600508x462S12a11493x494S20e00488x510S22f13469x517"]]
>>> metric = evaluate.load("signwriting_similarity")
>>> results = metric.compute(predictions=predictions, references=references)
>>> print(results)
{'score': 0.8326259781509948}
Example 4 -- multiple references, one good and one bad:
>>> predictions = ["M530x538S17600508x462S15a11493x494S20e00488x510S22f03469x517"]
>>> references = [["M530x538S17600508x462S12a11493x494S20e00488x510S22f13469x517"], ["M530x538S17600508x462"]]
>>> metric = evaluate.load("signwriting_similarity")
>>> results = metric.compute(predictions=predictions, references=references)
>>> print(results)
{'score': 0.8326259781509948}
Example 5 -- multiple signs in hypothesis:
>>> predictions = ["M530x538S17600508x462S15a11493x494S20e00488x510S22f03469x517 M530x538S17600508x462S15a11493x494S20e00488x510S22f03469x517"]
>>> references = [["M530x538S17600508x462S12a11493x494S20e00488x510S22f13469x517"]]
>>> metric = evaluate.load("signwriting_similarity")
>>> results = metric.compute(predictions=predictions, references=references)
>>> print(results)
{'score': 0.4163129890754974}
Example 6 -- sign order does not affect similarity:
>>> predictions = ["M530x538S17600508x462S15a11493x494S20e00488x510S22f03469x517 M530x538S17600508x462S12a11493x494S20e00488x510S22f13469x517"]
>>> references = [["M530x538S17600508x462S12a11493x494S20e00488x510S22f13469x517 M530x538S17600508x462S15a11493x494S20e00488x510S22f03469x517"]]
>>> metric = evaluate.load("signwriting_similarity")
>>> results = metric.compute(predictions=predictions, references=references)
>>> print(results)
{'score': 1.0}
Example 7 -- invalid FSW input should result in 0 score:
>>> predictions = ["M<s><s>M<s>p483"]
>>> references = [["M<s><s>M<s>p483"]]
>>> metric = evaluate.load("signwriting_similarity")
>>> results = metric.compute(predictions=predictions, references=references)
>>> print(results)
{'score': 0.0}
"""
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
class SignWritingSimilarity(evaluate.Metric):
metric = SignWritingSimilarityMetric()
def _info(self):
return evaluate.MetricInfo(
module_type="metric",
description=_DESCRIPTION,
citation=_CITATION,
inputs_description=_KWARGS_DESCRIPTION,
homepage="https://github.com/sign-language-processing/signwriting-evaluation",
features=[
datasets.Features(
{
"predictions": datasets.Value("string", id="sequence"),
"references": datasets.Sequence(datasets.Value("string", id="sequence"), id="references"),
}
),
datasets.Features(
{
"predictions": datasets.Value("string", id="sequence"),
"references": datasets.Value("string", id="sequence"),
}
),
],
codebase_urls=["https://github.com/sign-language-processing/signwriting-evaluation"],
reference_urls=[
"https://github.com/sign-language-processing/signwriting-evaluation",
],
)
def _compute(self, predictions, references):
# the internal array is as long as the predictions, the external one is for multiple references.
references = list(zip(*references))
score = self.metric.corpus_score(predictions, references)
return {"score": score}