Spaces:

ginic
/

phone_errors

Running

App Files Files Community

ginic commited on Mar 21, 2024

Commit

f79937a

1 Parent(s): 0d76904

Docstrings for metric

Browse files

Files changed (2) hide show

README.md +3 -1
phone_distance.py +44 -26

README.md CHANGED Viewed

@@ -3,7 +3,9 @@ title: phone_distance
 tags:
 - evaluate
 - metric
-description: "TODO: add a description here"
 sdk: gradio
 sdk_version: 3.19.1
 app_file: app.py

 tags:
 - evaluate
 - metric
+description: "Measures of distance in terms of articulatory phonological features can help understand differences
+between strings in the International Phonetic Alphabet (IPA) in a linguistically motivated way.
+This is useful when evaluating speech recognition or orthographic to IPA conversion tasks."
 sdk: gradio
 sdk_version: 3.19.1
 app_file: app.py

phone_distance.py CHANGED Viewed

@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Edit distances between Unicode International Phonetic Alphabet strings.
-This is basically a Hugging Face wrapper around the panphone library's distance module.
 """
 import evaluate
@@ -37,32 +37,50 @@ _CITATION = """\
 }
 """
-_DESCRIPTION = """\
-TODO
 """
-# TODO: Add description of the arguments of the module here
 _KWARGS_DESCRIPTION = """
-TODO
-Calculates how good are predictions given some references, using certain scores
 Args:
     predictions: list of predictions to score. Each predictions
-        should be a string with tokens separated by spaces.
     references: list of reference for each prediction. Each
-        reference should be a string with tokens separated by spaces.
 Returns:
-    accuracy: description of the first score,
-    another_score: description of the second score,
 Examples:
-    Examples should be written in doctest format, and should illustrate how
-    to use the function.
-    >>> my_new_module = evaluate.load("ginic/phone_distance")
-"""
-# TODO: Define external resources urls if needed
-# BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt"
 @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
@@ -87,37 +105,37 @@ class PhoneDistance(evaluate.Metric):
             reference_urls=["https://pypi.org/project/panphon/", "https://arxiv.org/abs/2308.03917"]
         )
-    def _compute(self, predictions:list[str]|None=None, references:list[str]|None=None, feature_set:str="spe+", feature_model:str="segment", is_normalize_max_length:bool=False):
         """Computes phoneme error rates, phone feature error rate (Hamming feature edit distance) and feature error rates between prediction and reference strings
         Args:
             predictions (list[str], optional): Predicted transcriptions. Defaults to None.
             references (list[str], optional): Reference transcriptions. Defaults to None.
-            feature_set (str, optional): Feature set to use in the feature model, see panphone documentation for details. Defaults to "spe+".
             feature_model (str, optional): panphon.distance.Distance feature parsing model to be used, choose from "strict", "permissive", "segment". Defaults to "segment".
-            is_normalize_max_length (bool, optional): Set to true to normalize phone feature error rates by maximum length (measure won't be a true metric). Defaults to False.
         Returns:
-            _type_: _description_
         """
-        distance_computer = panphon.distance.Distance(feature_set=feature_set, feature_model=feature_model)
-        phoneme_error_rates = []
         feature_error_rates = []
         hamming_distances = []
         for p, r in zip(predictions, references):
-            if is_normalize_max_length:
                 hd = distance_computer.hamming_feature_edit_distance_div_maxlen(p, r)
             else:
                 hd = distance_computer.hamming_feature_edit_distance(p, r)
             hamming_distances.append(hd)
             per = distance_computer.phoneme_error_rate(p, r)
-            phoneme_error_rates.append(per)
             fer = distance_computer.feature_error_rate(p, r)
             feature_error_rates.append(fer)
         return {
-            "phoneme_error_rates": phoneme_error_rates,
-            "mean_phoneme_error_rate": np.mean(phoneme_error_rates),
             "phone_feature_error_rates": hamming_distances,
             "mean_phone_feature_error_rates": np.mean(hamming_distances),
             "feature_error_rates": feature_error_rates,

 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Edit distances between Unicode International Phonetic Alphabet strings.
+This is a Hugging Face wrapper around the panphon library's distance module.
 """
 import evaluate
 }
 """
+_DESCRIPTION = """
+Measures of distance in terms of articulatory phonological features can help understand differences
+between strings in the International Phonetic Alphabet (IPA) in a linguistically motivated way.
+This is useful when evaluating speech recognition or orthographic to IPA conversion tasks.
 """
 _KWARGS_DESCRIPTION = """
+Calculates the following measures of difference that rely on phonetic features:
+ - Phone error rate (PER) gives edit distance in terms of phones, rather than Unicode characters, since phones can consist of\
+multiple characters. It is normalized by the number of phones of the reference string.
+ - Phone feature error rate (PFER) is Levenshtein distance between strings where distance between individual phones\
+is computed using Hamming distance between phonetic features. By default it is a metric that obeys the triangle\
+equality, but can also be normalized by number of phones.
+- Feature error rate (FER) is the edit distance in terms of articulatory features normalized by the number of phones in the reference.
+Each measure is given for each prediction, reference pair along with the mean value across all pairs.
 Args:
     predictions: list of predictions to score. Each predictions
+        should be a string of unicode characters.
     references: list of reference for each prediction. Each
+        reference should be a string with of unicode characters.
+    is_normalize_pfer: bool, set to True to normalize PFER by the largest number of phones in the prediction, reference pair
 Returns:
+    phone_error_rates: list of floats giving PER for each prediction, reference pair
+    mean_phone_error_rate: float, average PER across all examples
+    phone_feature_error_rates: list of floats giving PFER for each prediction, reference pair
+    mean_phone_feature_error_rates: float, average PFER across all examples
+    feature_error_rates: list of floats giving FER for each prediction, reference pair
+    mean_feature_error_rates: float, average FER across all examples
 Examples:
+    Compare articulatory differences in voicing in "bob" vs. "pop" and different pronunciations of "the":
+>>> phone_distance = evaluate.load("ginic/phone_distance")
+>>> phone_distance.compute(predictions=["bob", "θə"], references=["pop", "θi"])
+{'phone_error_rates': [0.6666666666666666, 0.5], 'mean_phone_error_rate': 0.5833333333333333, 'phone_feature_error_rates': [0.08333333333333333, 0.125], 'mean_phone_feature_error_rates': 0.10416666666666666, 'feature_error_rates': [0.027777777777777776, 0.0625], 'mean_feature_error_rates': 0.04513888888888889}
+    Normalize PFER by the length of string with largest number of phones:
+>>> phone_distance = evaluate.load("ginic/phone_distance")
+>>> phone_distance.compute(predictions=["bob", "θə"], references=["pop", "θi"], is_normalize_pfer=True)
+"""
 @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
             reference_urls=["https://pypi.org/project/panphon/", "https://arxiv.org/abs/2308.03917"]
         )
+    def _compute(self, predictions:list[str]|None=None, references:list[str]|None=None, feature_model:str="segment", is_normalize_pfer:bool=False):
         """Computes phoneme error rates, phone feature error rate (Hamming feature edit distance) and feature error rates between prediction and reference strings
         Args:
             predictions (list[str], optional): Predicted transcriptions. Defaults to None.
             references (list[str], optional): Reference transcriptions. Defaults to None.
             feature_model (str, optional): panphon.distance.Distance feature parsing model to be used, choose from "strict", "permissive", "segment". Defaults to "segment".
+            is_normalize_pfer (bool, optional): Set to true to normalize phone feature error rates by maximum length (measure won't be a true metric). Defaults to False.
         Returns:
+            dict:  {"phone_error_rates": list[float], "mean_phone_error_rate": float, "phone_feature_error_rates": list[float], "mean_phone_feature_error_rates": float,
+                    "feature_error_rates": list[float], "mean_feature_error_rates": float}
         """
+        distance_computer = panphon.distance.Distance(feature_model=feature_model)
+        phone_error_rates = []
         feature_error_rates = []
         hamming_distances = []
         for p, r in zip(predictions, references):
+            if is_normalize_pfer:
                 hd = distance_computer.hamming_feature_edit_distance_div_maxlen(p, r)
             else:
                 hd = distance_computer.hamming_feature_edit_distance(p, r)
             hamming_distances.append(hd)
             per = distance_computer.phoneme_error_rate(p, r)
+            phone_error_rates.append(per)
             fer = distance_computer.feature_error_rate(p, r)
             feature_error_rates.append(fer)
         return {
+            "phone_error_rates": phone_error_rates,
+            "mean_phone_error_rate": np.mean(phone_error_rates),
             "phone_feature_error_rates": hamming_distances,
             "mean_phone_feature_error_rates": np.mean(hamming_distances),
             "feature_error_rates": feature_error_rates,