Commit
·
ec5a7fc
1
Parent(s):
02f2518
Update README.md
Browse files
README.md
CHANGED
@@ -37,6 +37,7 @@ The model can be used directly (without a language model) as follows, assuming y
|
|
37 |
```python
|
38 |
import torch
|
39 |
import torchaudio
|
|
|
40 |
from datasets import load_dataset
|
41 |
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
|
42 |
|
@@ -45,10 +46,6 @@ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
|
|
45 |
processor = Wav2Vec2Processor.from_pretrained("gchhablani/wav2vec2-large-xlsr-mr-3")
|
46 |
model = Wav2Vec2ForCTC.from_pretrained("gchhablani/wav2vec2-large-xlsr-mr-3")
|
47 |
|
48 |
-
|
49 |
-
import librosa
|
50 |
-
import numpy as np
|
51 |
-
|
52 |
# Preprocessing the datasets.
|
53 |
# We need to read the audio files as arrays
|
54 |
def speech_file_to_array_fn(batch):
|
@@ -76,6 +73,7 @@ The model can be evaluated as follows on 10% of the Marathi data on OpenSLR.
|
|
76 |
```python
|
77 |
import torch
|
78 |
import torchaudio
|
|
|
79 |
from datasets import load_dataset, load_metric
|
80 |
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
|
81 |
import re
|
@@ -89,6 +87,7 @@ model.to("cuda")
|
|
89 |
|
90 |
chars_to_ignore_regex = '[\,\?\.\!\-\;\:\"\“\%\‘\”\�\–\…]'
|
91 |
|
|
|
92 |
# Preprocessing the datasets.
|
93 |
# We need to read the audio files as arrays
|
94 |
def speech_file_to_array_fn(batch):
|
@@ -113,7 +112,7 @@ result = test_data.map(evaluate, batched=True, batch_size=8)
|
|
113 |
print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["text"])))
|
114 |
```
|
115 |
|
116 |
-
**Test Result**: 19.05 %
|
117 |
|
118 |
**Test Result on OpenSLR test**: 14.15 % (157 examples)
|
119 |
|
|
|
37 |
```python
|
38 |
import torch
|
39 |
import torchaudio
|
40 |
+
import librosa
|
41 |
from datasets import load_dataset
|
42 |
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
|
43 |
|
|
|
46 |
processor = Wav2Vec2Processor.from_pretrained("gchhablani/wav2vec2-large-xlsr-mr-3")
|
47 |
model = Wav2Vec2ForCTC.from_pretrained("gchhablani/wav2vec2-large-xlsr-mr-3")
|
48 |
|
|
|
|
|
|
|
|
|
49 |
# Preprocessing the datasets.
|
50 |
# We need to read the audio files as arrays
|
51 |
def speech_file_to_array_fn(batch):
|
|
|
73 |
```python
|
74 |
import torch
|
75 |
import torchaudio
|
76 |
+
import librosa
|
77 |
from datasets import load_dataset, load_metric
|
78 |
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
|
79 |
import re
|
|
|
87 |
|
88 |
chars_to_ignore_regex = '[\,\?\.\!\-\;\:\"\“\%\‘\”\�\–\…]'
|
89 |
|
90 |
+
|
91 |
# Preprocessing the datasets.
|
92 |
# We need to read the audio files as arrays
|
93 |
def speech_file_to_array_fn(batch):
|
|
|
112 |
print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["text"])))
|
113 |
```
|
114 |
|
115 |
+
**Test Result**: 19.05 % (157+157 examples)
|
116 |
|
117 |
**Test Result on OpenSLR test**: 14.15 % (157 examples)
|
118 |
|