arampacha commited on
Commit
3b222ca
·
1 Parent(s): ba801af

fixed minor bug in eval code

Browse files
Files changed (1) hide show
  1. README.md +1 -1
README.md CHANGED
@@ -93,12 +93,12 @@ resampler = torchaudio.transforms.Resample(48_000, 16_000)
93
  # Preprocessing the datasets.
94
  # We need to read the aduio files as arrays and normalize charecters
95
  def speech_file_to_array_fn(batch):
 
96
  batch["sentence"] = re.sub(re.compile(chars_to_ignore_regex), '', batch["sentence"]).lower().strip()
97
  batch["sentence"] = re.sub(re.compile('i'), 'і', batch['sentence'])
98
  batch["sentence"] = re.sub(re.compile('o'), 'о', batch['sentence'])
99
  batch["sentence"] = re.sub(re.compile('a'), 'а', batch['sentence'])
100
  batch["sentence"] = re.sub(re.compile('ы'), 'и', batch['sentence'])
101
- batch["sentence"] = re.sub(re.compile("['`]"), '’', batch['sentence'])
102
  batch["sentence"] = re.sub(re.compile("–"), '', batch['sentence'])
103
  batch['sentence'] = re.sub(' ', ' ', batch['sentence'])
104
  speech_array, sampling_rate = torchaudio.load(batch["path"])
 
93
  # Preprocessing the datasets.
94
  # We need to read the aduio files as arrays and normalize charecters
95
  def speech_file_to_array_fn(batch):
96
+ batch["sentence"] = re.sub(re.compile("['`]"), '’', batch['sentence'])
97
  batch["sentence"] = re.sub(re.compile(chars_to_ignore_regex), '', batch["sentence"]).lower().strip()
98
  batch["sentence"] = re.sub(re.compile('i'), 'і', batch['sentence'])
99
  batch["sentence"] = re.sub(re.compile('o'), 'о', batch['sentence'])
100
  batch["sentence"] = re.sub(re.compile('a'), 'а', batch['sentence'])
101
  batch["sentence"] = re.sub(re.compile('ы'), 'и', batch['sentence'])
 
102
  batch["sentence"] = re.sub(re.compile("–"), '', batch['sentence'])
103
  batch['sentence'] = re.sub(' ', ' ', batch['sentence'])
104
  speech_array, sampling_rate = torchaudio.load(batch["path"])