Update app.py
Browse files
app.py
CHANGED
@@ -23,15 +23,12 @@ def ASR(audio):
|
|
23 |
# waveform = resampler(waveform)
|
24 |
# Convert the PyTorch tensor to a NumPy ndarray
|
25 |
# Preprocess the audio file
|
26 |
-
input_values = processor(waveform.squeeze(
|
27 |
# Transcribe the audio file
|
28 |
with torch.no_grad():
|
29 |
logits = model(input_values).logits
|
30 |
-
# audio_array = waveform.numpy()
|
31 |
-
# inputs = processor(audio_array, sampling_rate=16_000)
|
32 |
-
# text = pipe(np.array(inputs))
|
33 |
# Decode the transcription
|
34 |
-
transcription = processor.decode(torch.argmax(logits
|
35 |
return transcription
|
36 |
iface = gr.Interface(fn=ASR, inputs=audio_input, outputs=text_output)
|
37 |
iface.launch(share=False)
|
|
|
23 |
# waveform = resampler(waveform)
|
24 |
# Convert the PyTorch tensor to a NumPy ndarray
|
25 |
# Preprocess the audio file
|
26 |
+
input_values = processor(waveform.squeeze().numpy(),sampling_rate=16_000, return_tensors="pt").input_values
|
27 |
# Transcribe the audio file
|
28 |
with torch.no_grad():
|
29 |
logits = model(input_values).logits
|
|
|
|
|
|
|
30 |
# Decode the transcription
|
31 |
+
transcription = processor.decode(torch.argmax(logits, dim=-1))
|
32 |
return transcription
|
33 |
iface = gr.Interface(fn=ASR, inputs=audio_input, outputs=text_output)
|
34 |
iface.launch(share=False)
|