Spaces:
Sleeping
Sleeping
Docstrings for metric
Browse files- README.md +3 -1
- phone_distance.py +44 -26
README.md
CHANGED
@@ -3,7 +3,9 @@ title: phone_distance
|
|
3 |
tags:
|
4 |
- evaluate
|
5 |
- metric
|
6 |
-
description: "
|
|
|
|
|
7 |
sdk: gradio
|
8 |
sdk_version: 3.19.1
|
9 |
app_file: app.py
|
|
|
3 |
tags:
|
4 |
- evaluate
|
5 |
- metric
|
6 |
+
description: "Measures of distance in terms of articulatory phonological features can help understand differences
|
7 |
+
between strings in the International Phonetic Alphabet (IPA) in a linguistically motivated way.
|
8 |
+
This is useful when evaluating speech recognition or orthographic to IPA conversion tasks."
|
9 |
sdk: gradio
|
10 |
sdk_version: 3.19.1
|
11 |
app_file: app.py
|
phone_distance.py
CHANGED
@@ -12,7 +12,7 @@
|
|
12 |
# See the License for the specific language governing permissions and
|
13 |
# limitations under the License.
|
14 |
"""Edit distances between Unicode International Phonetic Alphabet strings.
|
15 |
-
This is
|
16 |
"""
|
17 |
|
18 |
import evaluate
|
@@ -37,32 +37,50 @@ _CITATION = """\
|
|
37 |
}
|
38 |
"""
|
39 |
|
40 |
-
_DESCRIPTION = """
|
41 |
-
|
|
|
|
|
42 |
"""
|
43 |
|
44 |
|
45 |
-
# TODO: Add description of the arguments of the module here
|
46 |
_KWARGS_DESCRIPTION = """
|
47 |
-
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
Args:
|
50 |
predictions: list of predictions to score. Each predictions
|
51 |
-
should be a string
|
52 |
references: list of reference for each prediction. Each
|
53 |
-
reference should be a string with
|
|
|
54 |
Returns:
|
55 |
-
|
56 |
-
|
|
|
|
|
|
|
|
|
|
|
57 |
Examples:
|
58 |
-
|
59 |
-
|
|
|
|
|
60 |
|
61 |
-
|
62 |
-
""
|
|
|
63 |
|
64 |
-
|
65 |
-
# BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt"
|
66 |
|
67 |
|
68 |
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
|
@@ -87,37 +105,37 @@ class PhoneDistance(evaluate.Metric):
|
|
87 |
reference_urls=["https://pypi.org/project/panphon/", "https://arxiv.org/abs/2308.03917"]
|
88 |
)
|
89 |
|
90 |
-
def _compute(self, predictions:list[str]|None=None, references:list[str]|None=None,
|
91 |
"""Computes phoneme error rates, phone feature error rate (Hamming feature edit distance) and feature error rates between prediction and reference strings
|
92 |
|
93 |
Args:
|
94 |
predictions (list[str], optional): Predicted transcriptions. Defaults to None.
|
95 |
references (list[str], optional): Reference transcriptions. Defaults to None.
|
96 |
-
feature_set (str, optional): Feature set to use in the feature model, see panphone documentation for details. Defaults to "spe+".
|
97 |
feature_model (str, optional): panphon.distance.Distance feature parsing model to be used, choose from "strict", "permissive", "segment". Defaults to "segment".
|
98 |
-
|
99 |
|
100 |
Returns:
|
101 |
-
|
|
|
102 |
"""
|
103 |
-
distance_computer = panphon.distance.Distance(
|
104 |
-
|
105 |
feature_error_rates = []
|
106 |
hamming_distances = []
|
107 |
for p, r in zip(predictions, references):
|
108 |
-
if
|
109 |
hd = distance_computer.hamming_feature_edit_distance_div_maxlen(p, r)
|
110 |
else:
|
111 |
hd = distance_computer.hamming_feature_edit_distance(p, r)
|
112 |
hamming_distances.append(hd)
|
113 |
per = distance_computer.phoneme_error_rate(p, r)
|
114 |
-
|
115 |
fer = distance_computer.feature_error_rate(p, r)
|
116 |
feature_error_rates.append(fer)
|
117 |
|
118 |
return {
|
119 |
-
"
|
120 |
-
"
|
121 |
"phone_feature_error_rates": hamming_distances,
|
122 |
"mean_phone_feature_error_rates": np.mean(hamming_distances),
|
123 |
"feature_error_rates": feature_error_rates,
|
|
|
12 |
# See the License for the specific language governing permissions and
|
13 |
# limitations under the License.
|
14 |
"""Edit distances between Unicode International Phonetic Alphabet strings.
|
15 |
+
This is a Hugging Face wrapper around the panphon library's distance module.
|
16 |
"""
|
17 |
|
18 |
import evaluate
|
|
|
37 |
}
|
38 |
"""
|
39 |
|
40 |
+
_DESCRIPTION = """
|
41 |
+
Measures of distance in terms of articulatory phonological features can help understand differences
|
42 |
+
between strings in the International Phonetic Alphabet (IPA) in a linguistically motivated way.
|
43 |
+
This is useful when evaluating speech recognition or orthographic to IPA conversion tasks.
|
44 |
"""
|
45 |
|
46 |
|
|
|
47 |
_KWARGS_DESCRIPTION = """
|
48 |
+
Calculates the following measures of difference that rely on phonetic features:
|
49 |
+
- Phone error rate (PER) gives edit distance in terms of phones, rather than Unicode characters, since phones can consist of\
|
50 |
+
multiple characters. It is normalized by the number of phones of the reference string.
|
51 |
+
- Phone feature error rate (PFER) is Levenshtein distance between strings where distance between individual phones\
|
52 |
+
is computed using Hamming distance between phonetic features. By default it is a metric that obeys the triangle\
|
53 |
+
equality, but can also be normalized by number of phones.
|
54 |
+
- Feature error rate (FER) is the edit distance in terms of articulatory features normalized by the number of phones in the reference.
|
55 |
+
|
56 |
+
|
57 |
+
Each measure is given for each prediction, reference pair along with the mean value across all pairs.
|
58 |
+
|
59 |
Args:
|
60 |
predictions: list of predictions to score. Each predictions
|
61 |
+
should be a string of unicode characters.
|
62 |
references: list of reference for each prediction. Each
|
63 |
+
reference should be a string with of unicode characters.
|
64 |
+
is_normalize_pfer: bool, set to True to normalize PFER by the largest number of phones in the prediction, reference pair
|
65 |
Returns:
|
66 |
+
phone_error_rates: list of floats giving PER for each prediction, reference pair
|
67 |
+
mean_phone_error_rate: float, average PER across all examples
|
68 |
+
phone_feature_error_rates: list of floats giving PFER for each prediction, reference pair
|
69 |
+
mean_phone_feature_error_rates: float, average PFER across all examples
|
70 |
+
feature_error_rates: list of floats giving FER for each prediction, reference pair
|
71 |
+
mean_feature_error_rates: float, average FER across all examples
|
72 |
+
|
73 |
Examples:
|
74 |
+
Compare articulatory differences in voicing in "bob" vs. "pop" and different pronunciations of "the":
|
75 |
+
>>> phone_distance = evaluate.load("ginic/phone_distance")
|
76 |
+
>>> phone_distance.compute(predictions=["bob", "θə"], references=["pop", "θi"])
|
77 |
+
{'phone_error_rates': [0.6666666666666666, 0.5], 'mean_phone_error_rate': 0.5833333333333333, 'phone_feature_error_rates': [0.08333333333333333, 0.125], 'mean_phone_feature_error_rates': 0.10416666666666666, 'feature_error_rates': [0.027777777777777776, 0.0625], 'mean_feature_error_rates': 0.04513888888888889}
|
78 |
|
79 |
+
Normalize PFER by the length of string with largest number of phones:
|
80 |
+
>>> phone_distance = evaluate.load("ginic/phone_distance")
|
81 |
+
>>> phone_distance.compute(predictions=["bob", "θə"], references=["pop", "θi"], is_normalize_pfer=True)
|
82 |
|
83 |
+
"""
|
|
|
84 |
|
85 |
|
86 |
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
|
|
|
105 |
reference_urls=["https://pypi.org/project/panphon/", "https://arxiv.org/abs/2308.03917"]
|
106 |
)
|
107 |
|
108 |
+
def _compute(self, predictions:list[str]|None=None, references:list[str]|None=None, feature_model:str="segment", is_normalize_pfer:bool=False):
|
109 |
"""Computes phoneme error rates, phone feature error rate (Hamming feature edit distance) and feature error rates between prediction and reference strings
|
110 |
|
111 |
Args:
|
112 |
predictions (list[str], optional): Predicted transcriptions. Defaults to None.
|
113 |
references (list[str], optional): Reference transcriptions. Defaults to None.
|
|
|
114 |
feature_model (str, optional): panphon.distance.Distance feature parsing model to be used, choose from "strict", "permissive", "segment". Defaults to "segment".
|
115 |
+
is_normalize_pfer (bool, optional): Set to true to normalize phone feature error rates by maximum length (measure won't be a true metric). Defaults to False.
|
116 |
|
117 |
Returns:
|
118 |
+
dict: {"phone_error_rates": list[float], "mean_phone_error_rate": float, "phone_feature_error_rates": list[float], "mean_phone_feature_error_rates": float,
|
119 |
+
"feature_error_rates": list[float], "mean_feature_error_rates": float}
|
120 |
"""
|
121 |
+
distance_computer = panphon.distance.Distance(feature_model=feature_model)
|
122 |
+
phone_error_rates = []
|
123 |
feature_error_rates = []
|
124 |
hamming_distances = []
|
125 |
for p, r in zip(predictions, references):
|
126 |
+
if is_normalize_pfer:
|
127 |
hd = distance_computer.hamming_feature_edit_distance_div_maxlen(p, r)
|
128 |
else:
|
129 |
hd = distance_computer.hamming_feature_edit_distance(p, r)
|
130 |
hamming_distances.append(hd)
|
131 |
per = distance_computer.phoneme_error_rate(p, r)
|
132 |
+
phone_error_rates.append(per)
|
133 |
fer = distance_computer.feature_error_rate(p, r)
|
134 |
feature_error_rates.append(fer)
|
135 |
|
136 |
return {
|
137 |
+
"phone_error_rates": phone_error_rates,
|
138 |
+
"mean_phone_error_rate": np.mean(phone_error_rates),
|
139 |
"phone_feature_error_rates": hamming_distances,
|
140 |
"mean_phone_feature_error_rates": np.mean(hamming_distances),
|
141 |
"feature_error_rates": feature_error_rates,
|