Spaces:
Sleeping
Sleeping
Docstrings for metric
Browse files- README.md +3 -1
- phone_distance.py +44 -26
README.md
CHANGED
|
@@ -3,7 +3,9 @@ title: phone_distance
|
|
| 3 |
tags:
|
| 4 |
- evaluate
|
| 5 |
- metric
|
| 6 |
-
description: "
|
|
|
|
|
|
|
| 7 |
sdk: gradio
|
| 8 |
sdk_version: 3.19.1
|
| 9 |
app_file: app.py
|
|
|
|
| 3 |
tags:
|
| 4 |
- evaluate
|
| 5 |
- metric
|
| 6 |
+
description: "Measures of distance in terms of articulatory phonological features can help understand differences
|
| 7 |
+
between strings in the International Phonetic Alphabet (IPA) in a linguistically motivated way.
|
| 8 |
+
This is useful when evaluating speech recognition or orthographic to IPA conversion tasks."
|
| 9 |
sdk: gradio
|
| 10 |
sdk_version: 3.19.1
|
| 11 |
app_file: app.py
|
phone_distance.py
CHANGED
|
@@ -12,7 +12,7 @@
|
|
| 12 |
# See the License for the specific language governing permissions and
|
| 13 |
# limitations under the License.
|
| 14 |
"""Edit distances between Unicode International Phonetic Alphabet strings.
|
| 15 |
-
This is
|
| 16 |
"""
|
| 17 |
|
| 18 |
import evaluate
|
|
@@ -37,32 +37,50 @@ _CITATION = """\
|
|
| 37 |
}
|
| 38 |
"""
|
| 39 |
|
| 40 |
-
_DESCRIPTION = """
|
| 41 |
-
|
|
|
|
|
|
|
| 42 |
"""
|
| 43 |
|
| 44 |
|
| 45 |
-
# TODO: Add description of the arguments of the module here
|
| 46 |
_KWARGS_DESCRIPTION = """
|
| 47 |
-
|
| 48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
Args:
|
| 50 |
predictions: list of predictions to score. Each predictions
|
| 51 |
-
should be a string
|
| 52 |
references: list of reference for each prediction. Each
|
| 53 |
-
reference should be a string with
|
|
|
|
| 54 |
Returns:
|
| 55 |
-
|
| 56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
Examples:
|
| 58 |
-
|
| 59 |
-
|
|
|
|
|
|
|
| 60 |
|
| 61 |
-
|
| 62 |
-
""
|
|
|
|
| 63 |
|
| 64 |
-
|
| 65 |
-
# BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt"
|
| 66 |
|
| 67 |
|
| 68 |
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
|
|
@@ -87,37 +105,37 @@ class PhoneDistance(evaluate.Metric):
|
|
| 87 |
reference_urls=["https://pypi.org/project/panphon/", "https://arxiv.org/abs/2308.03917"]
|
| 88 |
)
|
| 89 |
|
| 90 |
-
def _compute(self, predictions:list[str]|None=None, references:list[str]|None=None,
|
| 91 |
"""Computes phoneme error rates, phone feature error rate (Hamming feature edit distance) and feature error rates between prediction and reference strings
|
| 92 |
|
| 93 |
Args:
|
| 94 |
predictions (list[str], optional): Predicted transcriptions. Defaults to None.
|
| 95 |
references (list[str], optional): Reference transcriptions. Defaults to None.
|
| 96 |
-
feature_set (str, optional): Feature set to use in the feature model, see panphone documentation for details. Defaults to "spe+".
|
| 97 |
feature_model (str, optional): panphon.distance.Distance feature parsing model to be used, choose from "strict", "permissive", "segment". Defaults to "segment".
|
| 98 |
-
|
| 99 |
|
| 100 |
Returns:
|
| 101 |
-
|
|
|
|
| 102 |
"""
|
| 103 |
-
distance_computer = panphon.distance.Distance(
|
| 104 |
-
|
| 105 |
feature_error_rates = []
|
| 106 |
hamming_distances = []
|
| 107 |
for p, r in zip(predictions, references):
|
| 108 |
-
if
|
| 109 |
hd = distance_computer.hamming_feature_edit_distance_div_maxlen(p, r)
|
| 110 |
else:
|
| 111 |
hd = distance_computer.hamming_feature_edit_distance(p, r)
|
| 112 |
hamming_distances.append(hd)
|
| 113 |
per = distance_computer.phoneme_error_rate(p, r)
|
| 114 |
-
|
| 115 |
fer = distance_computer.feature_error_rate(p, r)
|
| 116 |
feature_error_rates.append(fer)
|
| 117 |
|
| 118 |
return {
|
| 119 |
-
"
|
| 120 |
-
"
|
| 121 |
"phone_feature_error_rates": hamming_distances,
|
| 122 |
"mean_phone_feature_error_rates": np.mean(hamming_distances),
|
| 123 |
"feature_error_rates": feature_error_rates,
|
|
|
|
| 12 |
# See the License for the specific language governing permissions and
|
| 13 |
# limitations under the License.
|
| 14 |
"""Edit distances between Unicode International Phonetic Alphabet strings.
|
| 15 |
+
This is a Hugging Face wrapper around the panphon library's distance module.
|
| 16 |
"""
|
| 17 |
|
| 18 |
import evaluate
|
|
|
|
| 37 |
}
|
| 38 |
"""
|
| 39 |
|
| 40 |
+
_DESCRIPTION = """
|
| 41 |
+
Measures of distance in terms of articulatory phonological features can help understand differences
|
| 42 |
+
between strings in the International Phonetic Alphabet (IPA) in a linguistically motivated way.
|
| 43 |
+
This is useful when evaluating speech recognition or orthographic to IPA conversion tasks.
|
| 44 |
"""
|
| 45 |
|
| 46 |
|
|
|
|
| 47 |
_KWARGS_DESCRIPTION = """
|
| 48 |
+
Calculates the following measures of difference that rely on phonetic features:
|
| 49 |
+
- Phone error rate (PER) gives edit distance in terms of phones, rather than Unicode characters, since phones can consist of\
|
| 50 |
+
multiple characters. It is normalized by the number of phones of the reference string.
|
| 51 |
+
- Phone feature error rate (PFER) is Levenshtein distance between strings where distance between individual phones\
|
| 52 |
+
is computed using Hamming distance between phonetic features. By default it is a metric that obeys the triangle\
|
| 53 |
+
equality, but can also be normalized by number of phones.
|
| 54 |
+
- Feature error rate (FER) is the edit distance in terms of articulatory features normalized by the number of phones in the reference.
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
Each measure is given for each prediction, reference pair along with the mean value across all pairs.
|
| 58 |
+
|
| 59 |
Args:
|
| 60 |
predictions: list of predictions to score. Each predictions
|
| 61 |
+
should be a string of unicode characters.
|
| 62 |
references: list of reference for each prediction. Each
|
| 63 |
+
reference should be a string with of unicode characters.
|
| 64 |
+
is_normalize_pfer: bool, set to True to normalize PFER by the largest number of phones in the prediction, reference pair
|
| 65 |
Returns:
|
| 66 |
+
phone_error_rates: list of floats giving PER for each prediction, reference pair
|
| 67 |
+
mean_phone_error_rate: float, average PER across all examples
|
| 68 |
+
phone_feature_error_rates: list of floats giving PFER for each prediction, reference pair
|
| 69 |
+
mean_phone_feature_error_rates: float, average PFER across all examples
|
| 70 |
+
feature_error_rates: list of floats giving FER for each prediction, reference pair
|
| 71 |
+
mean_feature_error_rates: float, average FER across all examples
|
| 72 |
+
|
| 73 |
Examples:
|
| 74 |
+
Compare articulatory differences in voicing in "bob" vs. "pop" and different pronunciations of "the":
|
| 75 |
+
>>> phone_distance = evaluate.load("ginic/phone_distance")
|
| 76 |
+
>>> phone_distance.compute(predictions=["bob", "θə"], references=["pop", "θi"])
|
| 77 |
+
{'phone_error_rates': [0.6666666666666666, 0.5], 'mean_phone_error_rate': 0.5833333333333333, 'phone_feature_error_rates': [0.08333333333333333, 0.125], 'mean_phone_feature_error_rates': 0.10416666666666666, 'feature_error_rates': [0.027777777777777776, 0.0625], 'mean_feature_error_rates': 0.04513888888888889}
|
| 78 |
|
| 79 |
+
Normalize PFER by the length of string with largest number of phones:
|
| 80 |
+
>>> phone_distance = evaluate.load("ginic/phone_distance")
|
| 81 |
+
>>> phone_distance.compute(predictions=["bob", "θə"], references=["pop", "θi"], is_normalize_pfer=True)
|
| 82 |
|
| 83 |
+
"""
|
|
|
|
| 84 |
|
| 85 |
|
| 86 |
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
|
|
|
|
| 105 |
reference_urls=["https://pypi.org/project/panphon/", "https://arxiv.org/abs/2308.03917"]
|
| 106 |
)
|
| 107 |
|
| 108 |
+
def _compute(self, predictions:list[str]|None=None, references:list[str]|None=None, feature_model:str="segment", is_normalize_pfer:bool=False):
|
| 109 |
"""Computes phoneme error rates, phone feature error rate (Hamming feature edit distance) and feature error rates between prediction and reference strings
|
| 110 |
|
| 111 |
Args:
|
| 112 |
predictions (list[str], optional): Predicted transcriptions. Defaults to None.
|
| 113 |
references (list[str], optional): Reference transcriptions. Defaults to None.
|
|
|
|
| 114 |
feature_model (str, optional): panphon.distance.Distance feature parsing model to be used, choose from "strict", "permissive", "segment". Defaults to "segment".
|
| 115 |
+
is_normalize_pfer (bool, optional): Set to true to normalize phone feature error rates by maximum length (measure won't be a true metric). Defaults to False.
|
| 116 |
|
| 117 |
Returns:
|
| 118 |
+
dict: {"phone_error_rates": list[float], "mean_phone_error_rate": float, "phone_feature_error_rates": list[float], "mean_phone_feature_error_rates": float,
|
| 119 |
+
"feature_error_rates": list[float], "mean_feature_error_rates": float}
|
| 120 |
"""
|
| 121 |
+
distance_computer = panphon.distance.Distance(feature_model=feature_model)
|
| 122 |
+
phone_error_rates = []
|
| 123 |
feature_error_rates = []
|
| 124 |
hamming_distances = []
|
| 125 |
for p, r in zip(predictions, references):
|
| 126 |
+
if is_normalize_pfer:
|
| 127 |
hd = distance_computer.hamming_feature_edit_distance_div_maxlen(p, r)
|
| 128 |
else:
|
| 129 |
hd = distance_computer.hamming_feature_edit_distance(p, r)
|
| 130 |
hamming_distances.append(hd)
|
| 131 |
per = distance_computer.phoneme_error_rate(p, r)
|
| 132 |
+
phone_error_rates.append(per)
|
| 133 |
fer = distance_computer.feature_error_rate(p, r)
|
| 134 |
feature_error_rates.append(fer)
|
| 135 |
|
| 136 |
return {
|
| 137 |
+
"phone_error_rates": phone_error_rates,
|
| 138 |
+
"mean_phone_error_rate": np.mean(phone_error_rates),
|
| 139 |
"phone_feature_error_rates": hamming_distances,
|
| 140 |
"mean_phone_feature_error_rates": np.mean(hamming_distances),
|
| 141 |
"feature_error_rates": feature_error_rates,
|