Spaces:
Sleeping
Sleeping
# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
"""SignWriting Similarity metric from the signwriting-evaluation package""" | |
import evaluate | |
import datasets | |
from signwriting_evaluation.metrics.similarity import SignWritingSimilarityMetric | |
_CITATION = """\ | |
@misc{moryossef2024signwritingevaluationeffectivesignlanguage, | |
title={signwriting-evaluation: Effective Sign Language Evaluation via SignWriting}, | |
author={Amit Moryossef and Rotem Zilberman and Ohad Langer}, | |
year={2024}, | |
eprint={2410.13668}, | |
archivePrefix={arXiv}, | |
primaryClass={cs.CL}, | |
url={https://arxiv.org/abs/2410.13668}, | |
} | |
""" | |
_DESCRIPTION = """\ | |
SignWriting Similarity metric from the signwriting-evaluation package | |
""" | |
_KWARGS_DESCRIPTION = """ | |
Produces similarity scores for hypotheses given reference translations. | |
Args: | |
predictions (list of str): | |
The predicted sentences. | |
references (list of list of str): | |
The references. There should be one reference sub-list for each prediction sentence. | |
Returns: | |
score (float): The similarity score between 0 and 1 | |
Examples: | |
Example 1 -- basic similarity score: | |
>>> predictions = ["M530x538S37602508x462S15a11493x494S20e00488x510S22f03469x517"] | |
>>> references = [["M519x534S37900497x466S3770b497x485S15a51491x501S22f03481x513"]] | |
>>> metric = evaluate.load("signwriting_similarity") | |
>>> results = metric.compute(predictions=predictions, references=references) | |
>>> print(results) | |
{'score': 0.5509574768254414} | |
Example 2 -- identical signs in different order: | |
>>> predictions = ["M530x538S37602508x462S15a11493x494S20e00488x510S22f03469x517"] | |
>>> references = [["M530x538S22f03469x517S37602508x462S20e00488x510S15a11493x494"]] | |
>>> metric = evaluate.load("signwriting_similarity") | |
>>> results = metric.compute(predictions=predictions, references=references) | |
>>> print(results) | |
{'score': 1.0} | |
Example 3 -- slightly different symbols: | |
>>> predictions = ["M530x538S17600508x462S15a11493x494S20e00488x510S22f03469x517"] | |
>>> references = [["M530x538S17600508x462S12a11493x494S20e00488x510S22f13469x517"]] | |
>>> metric = evaluate.load("signwriting_similarity") | |
>>> results = metric.compute(predictions=predictions, references=references) | |
>>> print(results) | |
{'score': 0.8326259781509948} | |
Example 4 -- multiple references, one good and one bad: | |
>>> predictions = ["M530x538S17600508x462S15a11493x494S20e00488x510S22f03469x517"] | |
>>> references = [["M530x538S17600508x462S12a11493x494S20e00488x510S22f13469x517"], ["M530x538S17600508x462"]] | |
>>> metric = evaluate.load("signwriting_similarity") | |
>>> results = metric.compute(predictions=predictions, references=references) | |
>>> print(results) | |
{'score': 0.8326259781509948} | |
Example 5 -- multiple signs in hypothesis: | |
>>> predictions = ["M530x538S17600508x462S15a11493x494S20e00488x510S22f03469x517 M530x538S17600508x462S15a11493x494S20e00488x510S22f03469x517"] | |
>>> references = [["M530x538S17600508x462S12a11493x494S20e00488x510S22f13469x517"]] | |
>>> metric = evaluate.load("signwriting_similarity") | |
>>> results = metric.compute(predictions=predictions, references=references) | |
>>> print(results) | |
{'score': 0.4163129890754974} | |
Example 6 -- sign order does not affect similarity: | |
>>> predictions = ["M530x538S17600508x462S15a11493x494S20e00488x510S22f03469x517 M530x538S17600508x462S12a11493x494S20e00488x510S22f13469x517"] | |
>>> references = [["M530x538S17600508x462S12a11493x494S20e00488x510S22f13469x517 M530x538S17600508x462S15a11493x494S20e00488x510S22f03469x517"]] | |
>>> metric = evaluate.load("signwriting_similarity") | |
>>> results = metric.compute(predictions=predictions, references=references) | |
>>> print(results) | |
{'score': 1.0} | |
Example 7 -- invalid FSW input should result in 0 score: | |
>>> predictions = ["M<s><s>M<s>p483"] | |
>>> references = [["M<s><s>M<s>p483"]] | |
>>> metric = evaluate.load("signwriting_similarity") | |
>>> results = metric.compute(predictions=predictions, references=references) | |
>>> print(results) | |
{'score': 0.0} | |
""" | |
class SignWritingSimilarity(evaluate.Metric): | |
metric = SignWritingSimilarityMetric() | |
def _info(self): | |
return evaluate.MetricInfo( | |
module_type="metric", | |
description=_DESCRIPTION, | |
citation=_CITATION, | |
inputs_description=_KWARGS_DESCRIPTION, | |
homepage="https://github.com/sign-language-processing/signwriting-evaluation", | |
features=[ | |
datasets.Features( | |
{ | |
"predictions": datasets.Value("string", id="sequence"), | |
"references": datasets.Sequence(datasets.Value("string", id="sequence"), id="references"), | |
} | |
), | |
datasets.Features( | |
{ | |
"predictions": datasets.Value("string", id="sequence"), | |
"references": datasets.Value("string", id="sequence"), | |
} | |
), | |
], | |
codebase_urls=["https://github.com/sign-language-processing/signwriting-evaluation"], | |
reference_urls=[ | |
"https://github.com/sign-language-processing/signwriting-evaluation", | |
], | |
) | |
def _compute(self, predictions, references): | |
# the internal array is as long as the predictions, the external one is for multiple references. | |
references = list(zip(*references)) | |
score = self.metric.corpus_score(predictions, references) | |
return {"score": score} | |