gchhablani commited on
Commit
ec5a7fc
·
1 Parent(s): 02f2518

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +4 -5
README.md CHANGED
@@ -37,6 +37,7 @@ The model can be used directly (without a language model) as follows, assuming y
37
  ```python
38
  import torch
39
  import torchaudio
 
40
  from datasets import load_dataset
41
  from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
42
 
@@ -45,10 +46,6 @@ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
45
  processor = Wav2Vec2Processor.from_pretrained("gchhablani/wav2vec2-large-xlsr-mr-3")
46
  model = Wav2Vec2ForCTC.from_pretrained("gchhablani/wav2vec2-large-xlsr-mr-3")
47
 
48
-
49
- import librosa
50
- import numpy as np
51
-
52
  # Preprocessing the datasets.
53
  # We need to read the audio files as arrays
54
  def speech_file_to_array_fn(batch):
@@ -76,6 +73,7 @@ The model can be evaluated as follows on 10% of the Marathi data on OpenSLR.
76
  ```python
77
  import torch
78
  import torchaudio
 
79
  from datasets import load_dataset, load_metric
80
  from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
81
  import re
@@ -89,6 +87,7 @@ model.to("cuda")
89
 
90
  chars_to_ignore_regex = '[\,\?\.\!\-\;\:\"\“\%\‘\”\�\–\…]'
91
 
 
92
  # Preprocessing the datasets.
93
  # We need to read the audio files as arrays
94
  def speech_file_to_array_fn(batch):
@@ -113,7 +112,7 @@ result = test_data.map(evaluate, batched=True, batch_size=8)
113
  print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["text"])))
114
  ```
115
 
116
- **Test Result**: 19.05 %
117
 
118
  **Test Result on OpenSLR test**: 14.15 % (157 examples)
119
 
 
37
  ```python
38
  import torch
39
  import torchaudio
40
+ import librosa
41
  from datasets import load_dataset
42
  from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
43
 
 
46
  processor = Wav2Vec2Processor.from_pretrained("gchhablani/wav2vec2-large-xlsr-mr-3")
47
  model = Wav2Vec2ForCTC.from_pretrained("gchhablani/wav2vec2-large-xlsr-mr-3")
48
 
 
 
 
 
49
  # Preprocessing the datasets.
50
  # We need to read the audio files as arrays
51
  def speech_file_to_array_fn(batch):
 
73
  ```python
74
  import torch
75
  import torchaudio
76
+ import librosa
77
  from datasets import load_dataset, load_metric
78
  from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
79
  import re
 
87
 
88
  chars_to_ignore_regex = '[\,\?\.\!\-\;\:\"\“\%\‘\”\�\–\…]'
89
 
90
+
91
  # Preprocessing the datasets.
92
  # We need to read the audio files as arrays
93
  def speech_file_to_array_fn(batch):
 
112
  print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["text"])))
113
  ```
114
 
115
+ **Test Result**: 19.05 % (157+157 examples)
116
 
117
  **Test Result on OpenSLR test**: 14.15 % (157 examples)
118