arampacha
/

wav2vec2-large-xlsr-ukrainian

Automatic Speech Recognition

xlsr-fine-tuning-week

Inference Endpoints

Model card Files Files and versions Community

arampacha commited on Mar 29, 2021

Commit

3b222ca

·

1 Parent(s): ba801af

fixed minor bug in eval code

Files changed (1) hide show

README.md +1 -1

README.md CHANGED Viewed

@@ -93,12 +93,12 @@ resampler = torchaudio.transforms.Resample(48_000, 16_000)
 # Preprocessing the datasets.
 # We need to read the aduio files as arrays and normalize charecters
 def speech_file_to_array_fn(batch):
     batch["sentence"] = re.sub(re.compile(chars_to_ignore_regex), '', batch["sentence"]).lower().strip()
     batch["sentence"] = re.sub(re.compile('i'), 'і', batch['sentence'])
     batch["sentence"] = re.sub(re.compile('o'), 'о', batch['sentence'])
     batch["sentence"] = re.sub(re.compile('a'), 'а', batch['sentence'])
     batch["sentence"] = re.sub(re.compile('ы'), 'и', batch['sentence'])
-    batch["sentence"] = re.sub(re.compile("['`]"), '’', batch['sentence'])
     batch["sentence"] = re.sub(re.compile("–"), '', batch['sentence'])
     batch['sentence'] = re.sub('  ', ' ', batch['sentence'])
     speech_array, sampling_rate = torchaudio.load(batch["path"])

 # Preprocessing the datasets.
 # We need to read the aduio files as arrays and normalize charecters
 def speech_file_to_array_fn(batch):
+    batch["sentence"] = re.sub(re.compile("['`]"), '’', batch['sentence'])
     batch["sentence"] = re.sub(re.compile(chars_to_ignore_regex), '', batch["sentence"]).lower().strip()
     batch["sentence"] = re.sub(re.compile('i'), 'і', batch['sentence'])
     batch["sentence"] = re.sub(re.compile('o'), 'о', batch['sentence'])
     batch["sentence"] = re.sub(re.compile('a'), 'а', batch['sentence'])
     batch["sentence"] = re.sub(re.compile('ы'), 'и', batch['sentence'])
     batch["sentence"] = re.sub(re.compile("–"), '', batch['sentence'])
     batch['sentence'] = re.sub('  ', ' ', batch['sentence'])
     speech_array, sampling_rate = torchaudio.load(batch["path"])