ginic commited on
Commit
f79937a
·
1 Parent(s): 0d76904

Docstrings for metric

Browse files
Files changed (2) hide show
  1. README.md +3 -1
  2. phone_distance.py +44 -26
README.md CHANGED
@@ -3,7 +3,9 @@ title: phone_distance
3
  tags:
4
  - evaluate
5
  - metric
6
- description: "TODO: add a description here"
 
 
7
  sdk: gradio
8
  sdk_version: 3.19.1
9
  app_file: app.py
 
3
  tags:
4
  - evaluate
5
  - metric
6
+ description: "Measures of distance in terms of articulatory phonological features can help understand differences
7
+ between strings in the International Phonetic Alphabet (IPA) in a linguistically motivated way.
8
+ This is useful when evaluating speech recognition or orthographic to IPA conversion tasks."
9
  sdk: gradio
10
  sdk_version: 3.19.1
11
  app_file: app.py
phone_distance.py CHANGED
@@ -12,7 +12,7 @@
12
  # See the License for the specific language governing permissions and
13
  # limitations under the License.
14
  """Edit distances between Unicode International Phonetic Alphabet strings.
15
- This is basically a Hugging Face wrapper around the panphone library's distance module.
16
  """
17
 
18
  import evaluate
@@ -37,32 +37,50 @@ _CITATION = """\
37
  }
38
  """
39
 
40
- _DESCRIPTION = """\
41
- TODO
 
 
42
  """
43
 
44
 
45
- # TODO: Add description of the arguments of the module here
46
  _KWARGS_DESCRIPTION = """
47
- TODO
48
- Calculates how good are predictions given some references, using certain scores
 
 
 
 
 
 
 
 
 
49
  Args:
50
  predictions: list of predictions to score. Each predictions
51
- should be a string with tokens separated by spaces.
52
  references: list of reference for each prediction. Each
53
- reference should be a string with tokens separated by spaces.
 
54
  Returns:
55
- accuracy: description of the first score,
56
- another_score: description of the second score,
 
 
 
 
 
57
  Examples:
58
- Examples should be written in doctest format, and should illustrate how
59
- to use the function.
 
 
60
 
61
- >>> my_new_module = evaluate.load("ginic/phone_distance")
62
- """
 
63
 
64
- # TODO: Define external resources urls if needed
65
- # BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt"
66
 
67
 
68
  @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
@@ -87,37 +105,37 @@ class PhoneDistance(evaluate.Metric):
87
  reference_urls=["https://pypi.org/project/panphon/", "https://arxiv.org/abs/2308.03917"]
88
  )
89
 
90
- def _compute(self, predictions:list[str]|None=None, references:list[str]|None=None, feature_set:str="spe+", feature_model:str="segment", is_normalize_max_length:bool=False):
91
  """Computes phoneme error rates, phone feature error rate (Hamming feature edit distance) and feature error rates between prediction and reference strings
92
 
93
  Args:
94
  predictions (list[str], optional): Predicted transcriptions. Defaults to None.
95
  references (list[str], optional): Reference transcriptions. Defaults to None.
96
- feature_set (str, optional): Feature set to use in the feature model, see panphone documentation for details. Defaults to "spe+".
97
  feature_model (str, optional): panphon.distance.Distance feature parsing model to be used, choose from "strict", "permissive", "segment". Defaults to "segment".
98
- is_normalize_max_length (bool, optional): Set to true to normalize phone feature error rates by maximum length (measure won't be a true metric). Defaults to False.
99
 
100
  Returns:
101
- _type_: _description_
 
102
  """
103
- distance_computer = panphon.distance.Distance(feature_set=feature_set, feature_model=feature_model)
104
- phoneme_error_rates = []
105
  feature_error_rates = []
106
  hamming_distances = []
107
  for p, r in zip(predictions, references):
108
- if is_normalize_max_length:
109
  hd = distance_computer.hamming_feature_edit_distance_div_maxlen(p, r)
110
  else:
111
  hd = distance_computer.hamming_feature_edit_distance(p, r)
112
  hamming_distances.append(hd)
113
  per = distance_computer.phoneme_error_rate(p, r)
114
- phoneme_error_rates.append(per)
115
  fer = distance_computer.feature_error_rate(p, r)
116
  feature_error_rates.append(fer)
117
 
118
  return {
119
- "phoneme_error_rates": phoneme_error_rates,
120
- "mean_phoneme_error_rate": np.mean(phoneme_error_rates),
121
  "phone_feature_error_rates": hamming_distances,
122
  "mean_phone_feature_error_rates": np.mean(hamming_distances),
123
  "feature_error_rates": feature_error_rates,
 
12
  # See the License for the specific language governing permissions and
13
  # limitations under the License.
14
  """Edit distances between Unicode International Phonetic Alphabet strings.
15
+ This is a Hugging Face wrapper around the panphon library's distance module.
16
  """
17
 
18
  import evaluate
 
37
  }
38
  """
39
 
40
+ _DESCRIPTION = """
41
+ Measures of distance in terms of articulatory phonological features can help understand differences
42
+ between strings in the International Phonetic Alphabet (IPA) in a linguistically motivated way.
43
+ This is useful when evaluating speech recognition or orthographic to IPA conversion tasks.
44
  """
45
 
46
 
 
47
  _KWARGS_DESCRIPTION = """
48
+ Calculates the following measures of difference that rely on phonetic features:
49
+ - Phone error rate (PER) gives edit distance in terms of phones, rather than Unicode characters, since phones can consist of\
50
+ multiple characters. It is normalized by the number of phones of the reference string.
51
+ - Phone feature error rate (PFER) is Levenshtein distance between strings where distance between individual phones\
52
+ is computed using Hamming distance between phonetic features. By default it is a metric that obeys the triangle\
53
+ equality, but can also be normalized by number of phones.
54
+ - Feature error rate (FER) is the edit distance in terms of articulatory features normalized by the number of phones in the reference.
55
+
56
+
57
+ Each measure is given for each prediction, reference pair along with the mean value across all pairs.
58
+
59
  Args:
60
  predictions: list of predictions to score. Each predictions
61
+ should be a string of unicode characters.
62
  references: list of reference for each prediction. Each
63
+ reference should be a string with of unicode characters.
64
+ is_normalize_pfer: bool, set to True to normalize PFER by the largest number of phones in the prediction, reference pair
65
  Returns:
66
+ phone_error_rates: list of floats giving PER for each prediction, reference pair
67
+ mean_phone_error_rate: float, average PER across all examples
68
+ phone_feature_error_rates: list of floats giving PFER for each prediction, reference pair
69
+ mean_phone_feature_error_rates: float, average PFER across all examples
70
+ feature_error_rates: list of floats giving FER for each prediction, reference pair
71
+ mean_feature_error_rates: float, average FER across all examples
72
+
73
  Examples:
74
+ Compare articulatory differences in voicing in "bob" vs. "pop" and different pronunciations of "the":
75
+ >>> phone_distance = evaluate.load("ginic/phone_distance")
76
+ >>> phone_distance.compute(predictions=["bob", "θə"], references=["pop", "θi"])
77
+ {'phone_error_rates': [0.6666666666666666, 0.5], 'mean_phone_error_rate': 0.5833333333333333, 'phone_feature_error_rates': [0.08333333333333333, 0.125], 'mean_phone_feature_error_rates': 0.10416666666666666, 'feature_error_rates': [0.027777777777777776, 0.0625], 'mean_feature_error_rates': 0.04513888888888889}
78
 
79
+ Normalize PFER by the length of string with largest number of phones:
80
+ >>> phone_distance = evaluate.load("ginic/phone_distance")
81
+ >>> phone_distance.compute(predictions=["bob", "θə"], references=["pop", "θi"], is_normalize_pfer=True)
82
 
83
+ """
 
84
 
85
 
86
  @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
 
105
  reference_urls=["https://pypi.org/project/panphon/", "https://arxiv.org/abs/2308.03917"]
106
  )
107
 
108
+ def _compute(self, predictions:list[str]|None=None, references:list[str]|None=None, feature_model:str="segment", is_normalize_pfer:bool=False):
109
  """Computes phoneme error rates, phone feature error rate (Hamming feature edit distance) and feature error rates between prediction and reference strings
110
 
111
  Args:
112
  predictions (list[str], optional): Predicted transcriptions. Defaults to None.
113
  references (list[str], optional): Reference transcriptions. Defaults to None.
 
114
  feature_model (str, optional): panphon.distance.Distance feature parsing model to be used, choose from "strict", "permissive", "segment". Defaults to "segment".
115
+ is_normalize_pfer (bool, optional): Set to true to normalize phone feature error rates by maximum length (measure won't be a true metric). Defaults to False.
116
 
117
  Returns:
118
+ dict: {"phone_error_rates": list[float], "mean_phone_error_rate": float, "phone_feature_error_rates": list[float], "mean_phone_feature_error_rates": float,
119
+ "feature_error_rates": list[float], "mean_feature_error_rates": float}
120
  """
121
+ distance_computer = panphon.distance.Distance(feature_model=feature_model)
122
+ phone_error_rates = []
123
  feature_error_rates = []
124
  hamming_distances = []
125
  for p, r in zip(predictions, references):
126
+ if is_normalize_pfer:
127
  hd = distance_computer.hamming_feature_edit_distance_div_maxlen(p, r)
128
  else:
129
  hd = distance_computer.hamming_feature_edit_distance(p, r)
130
  hamming_distances.append(hd)
131
  per = distance_computer.phoneme_error_rate(p, r)
132
+ phone_error_rates.append(per)
133
  fer = distance_computer.feature_error_rate(p, r)
134
  feature_error_rates.append(fer)
135
 
136
  return {
137
+ "phone_error_rates": phone_error_rates,
138
+ "mean_phone_error_rate": np.mean(phone_error_rates),
139
  "phone_feature_error_rates": hamming_distances,
140
  "mean_phone_feature_error_rates": np.mean(hamming_distances),
141
  "feature_error_rates": feature_error_rates,