fixed minor bug in eval code
Browse files
README.md
CHANGED
@@ -93,12 +93,12 @@ resampler = torchaudio.transforms.Resample(48_000, 16_000)
|
|
93 |
# Preprocessing the datasets.
|
94 |
# We need to read the aduio files as arrays and normalize charecters
|
95 |
def speech_file_to_array_fn(batch):
|
|
|
96 |
batch["sentence"] = re.sub(re.compile(chars_to_ignore_regex), '', batch["sentence"]).lower().strip()
|
97 |
batch["sentence"] = re.sub(re.compile('i'), 'і', batch['sentence'])
|
98 |
batch["sentence"] = re.sub(re.compile('o'), 'о', batch['sentence'])
|
99 |
batch["sentence"] = re.sub(re.compile('a'), 'а', batch['sentence'])
|
100 |
batch["sentence"] = re.sub(re.compile('ы'), 'и', batch['sentence'])
|
101 |
-
batch["sentence"] = re.sub(re.compile("['`]"), '’', batch['sentence'])
|
102 |
batch["sentence"] = re.sub(re.compile("–"), '', batch['sentence'])
|
103 |
batch['sentence'] = re.sub(' ', ' ', batch['sentence'])
|
104 |
speech_array, sampling_rate = torchaudio.load(batch["path"])
|
|
|
93 |
# Preprocessing the datasets.
|
94 |
# We need to read the aduio files as arrays and normalize charecters
|
95 |
def speech_file_to_array_fn(batch):
|
96 |
+
batch["sentence"] = re.sub(re.compile("['`]"), '’', batch['sentence'])
|
97 |
batch["sentence"] = re.sub(re.compile(chars_to_ignore_regex), '', batch["sentence"]).lower().strip()
|
98 |
batch["sentence"] = re.sub(re.compile('i'), 'і', batch['sentence'])
|
99 |
batch["sentence"] = re.sub(re.compile('o'), 'о', batch['sentence'])
|
100 |
batch["sentence"] = re.sub(re.compile('a'), 'а', batch['sentence'])
|
101 |
batch["sentence"] = re.sub(re.compile('ы'), 'и', batch['sentence'])
|
|
|
102 |
batch["sentence"] = re.sub(re.compile("–"), '', batch['sentence'])
|
103 |
batch['sentence'] = re.sub(' ', ' ', batch['sentence'])
|
104 |
speech_array, sampling_rate = torchaudio.load(batch["path"])
|