Spaces:
Running
Running
Commit
·
e927cf5
1
Parent(s):
a356f8e
fix wavform type
Browse files- requirements.txt +1 -2
- run_demo.py +12 -7
requirements.txt
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
transformers
|
| 2 |
-
|
| 3 |
-
torchaudio
|
| 4 |
pyctcdecode
|
| 5 |
pypi-kenlm
|
|
|
|
| 1 |
transformers
|
| 2 |
+
librosa
|
|
|
|
| 3 |
pyctcdecode
|
| 4 |
pypi-kenlm
|
run_demo.py
CHANGED
|
@@ -2,7 +2,8 @@ import logging
|
|
| 2 |
import warnings
|
| 3 |
|
| 4 |
import gradio as gr
|
| 5 |
-
import
|
|
|
|
| 6 |
from transformers import pipeline
|
| 7 |
from transformers.utils.logging import disable_progress_bar
|
| 8 |
|
|
@@ -24,13 +25,17 @@ logger.info("ASR pipeline has been initialized")
|
|
| 24 |
|
| 25 |
|
| 26 |
def process_audio_file(audio_file):
|
| 27 |
-
waveform, sample_rate = torchaudio.load(audio_file)
|
| 28 |
-
waveform = waveform.squeeze(axis=0) # mono
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
# resample
|
| 31 |
if sample_rate != SAMPLE_RATE:
|
| 32 |
-
|
| 33 |
-
waveform = resampler(waveform)
|
| 34 |
|
| 35 |
return waveform
|
| 36 |
|
|
@@ -52,7 +57,7 @@ def transcribe(microphone_audio_file, uploaded_audio_file):
|
|
| 52 |
|
| 53 |
audio_data = process_audio_file(audio_file)
|
| 54 |
|
| 55 |
-
# text = pipe(
|
| 56 |
text = pipe(audio_data)["text"]
|
| 57 |
logger.info(f"Transcription for {audio_file}: {text}")
|
| 58 |
|
|
|
|
| 2 |
import warnings
|
| 3 |
|
| 4 |
import gradio as gr
|
| 5 |
+
import librosa
|
| 6 |
+
# import torchaudio
|
| 7 |
from transformers import pipeline
|
| 8 |
from transformers.utils.logging import disable_progress_bar
|
| 9 |
|
|
|
|
| 25 |
|
| 26 |
|
| 27 |
def process_audio_file(audio_file):
|
| 28 |
+
# waveform, sample_rate = torchaudio.load(audio_file)
|
| 29 |
+
# waveform = waveform.squeeze(axis=0) # mono
|
| 30 |
+
# # resample
|
| 31 |
+
# if sample_rate != SAMPLE_RATE:
|
| 32 |
+
# resampler = torchaudio.transforms.Resample(sample_rate, SAMPLE_RATE)
|
| 33 |
+
# waveform = resampler(waveform)
|
| 34 |
+
|
| 35 |
+
waveform, sample_rate = librosa.load(audio_file, mono=True)
|
| 36 |
# resample
|
| 37 |
if sample_rate != SAMPLE_RATE:
|
| 38 |
+
waveform = librosa.resample(waveform, orig_sr=sample_rate, target_sr=SAMPLE_RATE)
|
|
|
|
| 39 |
|
| 40 |
return waveform
|
| 41 |
|
|
|
|
| 57 |
|
| 58 |
audio_data = process_audio_file(audio_file)
|
| 59 |
|
| 60 |
+
# text = pipe(audio_data, chunk_length_s=30, stride_length_s=5)["text"]
|
| 61 |
text = pipe(audio_data)["text"]
|
| 62 |
logger.info(f"Transcription for {audio_file}: {text}")
|
| 63 |
|