fix: ffmpeg_microphone_live key error
#1
by
daraiii
- opened
- kotoba_whisper.py +2 -2
kotoba_whisper.py
CHANGED
@@ -155,14 +155,14 @@ class KotobaWhisperPipeline(AutomaticSpeechRecognitionPipeline):
|
|
155 |
inputs = ffmpeg_read(inputs, self.feature_extractor.sampling_rate)
|
156 |
if isinstance(inputs, dict):
|
157 |
# Accepting `"array"` which is the key defined in `datasets` for better integration
|
158 |
-
if not ("sampling_rate" in inputs and "array" in inputs):
|
159 |
raise ValueError(
|
160 |
"When passing a dictionary to AutomaticSpeechRecognitionPipeline, the dict needs to contain a "
|
161 |
'"array" key containing the numpy array representing the audio and a "sampling_rate" key, '
|
162 |
"containing the sampling_rate associated with that array"
|
163 |
)
|
164 |
in_sampling_rate = inputs.pop("sampling_rate")
|
165 |
-
inputs = inputs.pop("array", None)
|
166 |
if in_sampling_rate != self.feature_extractor.sampling_rate:
|
167 |
if is_torchaudio_available():
|
168 |
from torchaudio import functional as F
|
|
|
155 |
inputs = ffmpeg_read(inputs, self.feature_extractor.sampling_rate)
|
156 |
if isinstance(inputs, dict):
|
157 |
# Accepting `"array"` which is the key defined in `datasets` for better integration
|
158 |
+
if not ("sampling_rate" in inputs and ("raw" in inputs or "array" in inputs)):
|
159 |
raise ValueError(
|
160 |
"When passing a dictionary to AutomaticSpeechRecognitionPipeline, the dict needs to contain a "
|
161 |
'"array" key containing the numpy array representing the audio and a "sampling_rate" key, '
|
162 |
"containing the sampling_rate associated with that array"
|
163 |
)
|
164 |
in_sampling_rate = inputs.pop("sampling_rate")
|
165 |
+
inputs = inputs.pop("array", inputs.pop("raw", None))
|
166 |
if in_sampling_rate != self.feature_extractor.sampling_rate:
|
167 |
if is_torchaudio_available():
|
168 |
from torchaudio import functional as F
|