Spaces:

ginic
/

phone_errors

Sleeping

File size: 5,675 Bytes

# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Edit distances between Unicode International Phonetic Alphabet strings. 
This is basically a Hugging Face wrapper around the panphone library's distance module. 
"""

import evaluate
import datasets
import numpy as np
import panphon.distance


_CITATION = """\
@inproceedings{Mortensen-et-al:2016,
  author    = {David R. Mortensen and
               Patrick Littell and
               Akash Bharadwaj and
               Kartik Goyal and
               Chris Dyer and
               Lori S. Levin},
  title     = {PanPhon: {A} Resource for Mapping {IPA} Segments to Articulatory Feature Vectors},
  booktitle = {Proceedings of {COLING} 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
  pages     = {3475--3484},
  publisher = {{ACL}},
  year      = {2016}
}
"""

_DESCRIPTION = """\
TODO
"""


# TODO: Add description of the arguments of the module here
_KWARGS_DESCRIPTION = """
TODO
Calculates how good are predictions given some references, using certain scores
Args:
    predictions: list of predictions to score. Each predictions
        should be a string with tokens separated by spaces.
    references: list of reference for each prediction. Each
        reference should be a string with tokens separated by spaces.
Returns:
    accuracy: description of the first score,
    another_score: description of the second score,
Examples:
    Examples should be written in doctest format, and should illustrate how
    to use the function.

    >>> my_new_module = evaluate.load("ginic/phone_distance")
"""

# TODO: Define external resources urls if needed
# BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt"


@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
class PhoneDistance(evaluate.Metric):
    """Class for computing distance between Unicode IPA strings """

    def _info(self):
        # TODO: Specifies the evaluate.EvaluationModuleInfo object
        return evaluate.MetricInfo(
            # This is the description that will appear on the modules page.
            module_type="metric",
            description=_DESCRIPTION,
            citation=_CITATION,
            inputs_description=_KWARGS_DESCRIPTION,
            # This defines the format of each prediction and reference
            features=datasets.Features({
                'predictions': datasets.Value('string', id="sequence"),
                'references': datasets.Value('string', id="sequence"),
            }),
            # Additional links to the codebase or references
            codebase_urls=["https://github.com/dmort27/panphon", "https://huggingface.co/spaces/ginic/phone_distance/tree/main"],
            reference_urls=["https://pypi.org/project/panphon/", "https://arxiv.org/abs/2308.03917"]
        )

    def _compute(self, predictions:list[str]|None=None, references:list[str]|None=None, feature_set:str="spe+", feature_model:str="segment", is_normalize_max_length:bool=False):
        """Computes phoneme error rates, phone feature error rate (Hamming feature edit distance) and feature error rates between prediction and reference strings 

        Args:
            predictions (list[str], optional): Predicted transcriptions. Defaults to None.
            references (list[str], optional): Reference transcriptions. Defaults to None.
            feature_set (str, optional): Feature set to use in the feature model, see panphone documentation for details. Defaults to "spe+".
            feature_model (str, optional): panphon.distance.Distance feature parsing model to be used, choose from "strict", "permissive", "segment". Defaults to "segment".
            is_normalize_max_length (bool, optional): Set to true to normalize phone feature error rates by maximum length (measure won't be a true metric). Defaults to False.

        Returns:
            _type_: _description_
        """
        distance_computer = panphon.distance.Distance(feature_set=feature_set, feature_model=feature_model)
        phoneme_error_rates = []
        feature_error_rates = []
        hamming_distances = []
        for p, r in zip(predictions, references):
            if is_normalize_max_length:
                hd = distance_computer.hamming_feature_edit_distance_div_maxlen(p, r)
            else:
                hd = distance_computer.hamming_feature_edit_distance(p, r)
            hamming_distances.append(hd)
            per = distance_computer.phone_error_rate(p, r)
            phoneme_error_rates.append(per)
            fer = distance_computer.feature_error_rate(p, r)
            feature_error_rates.append(fer)
        
        return {
            "phoneme_error_rates": phoneme_error_rates,
            "mean_phoneme_error_rate": np.mean(phoneme_error_rates),
            "phone_feature_error_rates": hamming_distances, 
            "mean_phone_feature_error_rates": np.mean(hamming_distances), 
            "feature_error_rates": feature_error_rates, 
            "mean_feature_error_rates": np.mean(feature_error_rates)
        }